Skip to content

Commit 903963a

Browse files
committed
Update training script
1 parent 7e06ab2 commit 903963a

File tree

1 file changed

+8
-8
lines changed

1 file changed

+8
-8
lines changed

examples/on_policy_distillation_trainer/run_qwen_gsmk8k.sh

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@ conda activate verl
44
export PATH=$CONDA_PREFIX/bin:$PATH
55
export NCCL_P2P_DISABLE=1
66
export CUDA_DEVICE_ORDER=PCI_BUS_ID
7-
export CUDA_VISIBLE_DEVICES=5,6,7,8
7+
export CUDA_VISIBLE_DEVICES=3,4
88
export DATA_PATH=$PWD/../verlData
99
export HF_HOME=$DATA_PATH
1010
export VLLM_CACHE_DIR=$DATA_PATH/vllm_cache
@@ -17,17 +17,17 @@ ROLLOUT_NAME="vllm" # sglang or vllm
1717

1818
FAMILY="Qwen"
1919
STUDENT_MODEL=Qwen2.5-0.5B
20-
TEACHER_MODEL=Qwen2.5-3B-Instruct
20+
TEACHER_MODEL=Qwen2.5-7B-Instruct
2121

2222
USE_POLICY_GRADIENT=False
23-
DISTILLATION_LOSS_MODE="k3"
23+
# DISTILLATION_LOSS_MODE="k3"
2424
DISTILLATION_LOSS_MODE="forward_kl_topk"
2525

26-
DISTILLATION_LOSS_MODE="k1"
27-
USE_POLICY_GRADIENT=True
26+
# USE_POLICY_GRADIENT=True
27+
# DISTILLATION_LOSS_MODE="k1"
2828

2929
DISTILLATION_LOSS_MAX_CLAMP=10.0
30-
DISTILLATION_LOG_PROB_MIN_CLAMP=null
30+
DISTILLATION_LOG_PROB_MIN_CLAMP=-10.0
3131

3232
PROJECT_NAME='verl_on_policy_distillation_example_gsm8k'
3333
EXP_NAME="${FAMILY}/student-${STUDENT_MODEL}/teacher-${TEACHER_MODEL}/loss-${DISTILLATION_LOSS_MODE}-pg-${USE_POLICY_GRADIENT}-maxclamp-${DISTILLATION_LOSS_MAX_CLAMP}-logprobminclamp-${DISTILLATION_LOG_PROB_MIN_CLAMP}"
@@ -41,7 +41,7 @@ USE_DYNAMIC_BSZ=False
4141

4242
STUDENT_WORLD_SIZE=2
4343

44-
TEACHER_RESOURCE_POOL=True
44+
TEACHER_RESOURCE_POOL=False
4545
TEACHER_WORLD_SIZE=2
4646

4747
ENFORCE_EAGER=False # true for faster debugging
@@ -122,7 +122,7 @@ ALGORITHM=(
122122
)
123123

124124
TRAINER=(
125-
trainer.logger='["console"]'
125+
trainer.logger='["console","wandb"]'
126126
trainer.project_name=$PROJECT_NAME
127127
trainer.experiment_name=$EXP_NAME
128128
trainer.n_gpus_per_node=$STUDENT_WORLD_SIZE

0 commit comments

Comments
 (0)