File tree Expand file tree Collapse file tree 1 file changed +5
-5
lines changed
recipes_custom/Qwen2.5-72B-sft Expand file tree Collapse file tree 1 file changed +5
-5
lines changed Original file line number Diff line number Diff line change @@ -6,7 +6,7 @@ TRAIN_FILES=${TRAIN_FILES:-/mnt/data/liuchonghan/235b_dataset/merged_sft_with_me
66TRAIN_BATCH_SIZE=${TRAIN_BATCH_SIZE:- 256}
77backend=${BACKEND:- megatron}
88project_name=verl_sft_qwen2.5_72b
9- RESUME_MODE=disable
9+ RESUME_MODE=disable # auto
1010MODEL_ID=${MODEL_ID:-/ mnt/ data/ liuchonghan/ Qwen2.5-72B-A064}
1111TOTAL_EPOCHS=${TOTAL_EPOCHS:- 2}
1212
6161fi
6262
6363CKPT_HOME=${CKPT_HOME:-/ mnt/ data/ liuchonghan/ ckpt_verl/ sft/ ${project_name} / ${exp_name} }
64- NNODES=${WORLD_SIZE:- 16}
65- NODE_RANK=${RANK:- 0}
66- MASTER_ADDR=${MASTER_ADDR:- " 127.0.0.1" }
67- MASTER_PORT=${MASTER_PORT:- 23457}
64+ NNODES=${WORLD_SIZE:- 16}
65+ NODE_RANK=${RANK:- 0}
66+ MASTER_ADDR=${MASTER_ADDR:- " 127.0.0.1" }
67+ MASTER_PORT=${MASTER_PORT:- 23457}
6868
6969echo " >>> 节点信息: RANK $NODE_RANK / WORLD_SIZE $NNODES "
7070echo " >>> 通信信息: MASTER $MASTER_ADDR : $MASTER_PORT "
You can’t perform that action at this time.
0 commit comments