Skip to content

Commit a4df663

Browse files
committed
Add fp8 trtllm rollout test to CI
Signed-off-by: shuyixiong <219646547+shuyixiong@users.noreply.github.com>
1 parent c1a751f commit a4df663

File tree

2 files changed

+50
-3
lines changed

2 files changed

+50
-3
lines changed

.github/workflows/e2e_ppo_grpo_trainer_trtllm.yml

Lines changed: 43 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,11 @@ on:
7272
- ".github/workflows/e2e_ppo_grpo_trainer_trtllm"
7373
- "examples/data_preprocess/gsm8k.py"
7474
- "examples/data_preprocess/geo3k.py"
75+
- "examples/data_preprocess/dapo_multiturn_w_tool.py"
76+
- "examples/data_preprocess/aime2024_multiturn_w_tool.py"
77+
- "examples/grpo_trainer/run_qwen2-7b_math_trtllm.sh"
78+
- "examples/grpo_trainer/run_qwen2-7b_math_megatron_trtllm.sh"
79+
- "examples/grpo_trainer/run_qwen3-30b_dapo_megatron_fp8_trtllm.sh"
7580
# add back when ppo flow is ready
7681
# - "tests/special_e2e/run_ppo_trainer_megatron.sh"
7782
# - "verl/trainer/main_ppo.py"
@@ -203,9 +208,46 @@ jobs:
203208
run: |
204209
rm -rf checkpoints
205210
211+
e2e_dapo_trainer_megatron-qwen3:
212+
needs: setup
213+
runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
214+
timeout-minutes: 60
215+
env:
216+
HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
217+
HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
218+
NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
219+
HF_ENDPOINT: "https://hf-mirror.com"
220+
HF_HUB_ENABLE_HF_TRANSFER: "0"
221+
steps:
222+
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
223+
with:
224+
fetch-depth: 0
225+
- name: Install the current repository
226+
run: |
227+
pip3 install -r requirements-test.txt
228+
pip3 install --no-deps -e .
229+
- name: Prepare DAPO-Math-17k and AIME-2024 datasets (data_preprocess)
230+
run: |
231+
python3 examples/data_preprocess/dapo_multiturn_w_tool.py --local_dataset_path ${HOME}/models/hf_data/dapo-math-17k --local_save_dir ${PWD}/data/dapo-math-17k
232+
python3 examples/data_preprocess/aime2024_multiturn_w_tool.py --local_dataset_path ${HOME}/models/hf_data/aime-2024 --local_save_dir ${PWD}/data/aime-2024
233+
- name: Running DAPO E2E with FP8 TRT-LLM rollout (Qwen3-8B)
234+
run: |
235+
ray stop --force
236+
export INFER_TP=2 ACTOR_TP=2 ACTOR_PP=2 ACTOR_VPP=2 ACTOR_EP=1 ACTOR_CP=2 REF_TP=2 REF_PP=2 REF_VPP=2 REF_EP=1 REF_CP=2 GEN_MOE_TP=null GEN_MOE_EP=null
237+
export NNODES=1 GPUS_PER_NODE=8 TRTLLM_MOE_BACKEND=CUTLASS
238+
export DATA_DIR=${PWD} DAPO_MATH_TRAIN=${PWD}/data/dapo-math-17k/train.parquet AIME_VAL=${PWD}/data/aime-2024/train.parquet MODEL_PATH=${HOME}/models/Qwen/Qwen3-0.6B
239+
bash examples/grpo_trainer/run_qwen3-30b_dapo_megatron_fp8_trtllm.sh \
240+
actor_rollout_ref.actor.megatron.override_transformer_config.moe_grouped_gemm=False \
241+
actor_rollout_ref.actor.megatron.override_transformer_config.moe_permute_fusion=False \
242+
trainer.total_training_steps=1 \
243+
trainer.logger='["console"]'
244+
- name: clean up
245+
run: |
246+
rm -rf checkpoints
247+
206248
cleanup:
207249
runs-on: ubuntu-latest
208-
needs: [setup, trtllm_unit_tests, e2e_grpo_trainer_fsdp-qwen2, e2e_grpo_trainer_megatron-qwen2]
250+
needs: [setup, trtllm_unit_tests, e2e_grpo_trainer_fsdp-qwen2, e2e_grpo_trainer_megatron-qwen2, e2e_dapo_trainer_megatron-qwen3]
209251
if: always()
210252
steps:
211253
- id: destroy-runner

examples/grpo_trainer/run_qwen3-30b_dapo_megatron_fp8_trtllm.sh

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,14 +10,17 @@ export RAY_DEDUP_LOGS=0
1010
# -----
1111
# Config
1212
# -----
13-
TP=${1:-4}
13+
TP=${INFER_TP:-4}
1414
ACTOR_TP=${ACTOR_TP:-4}
1515
ACTOR_PP=${ACTOR_PP:-2}
1616
ACTOR_VPP=${ACTOR_VPP:-2}
1717
ACTOR_EP=${ACTOR_EP:-2}
18+
ACTOR_CP=${ACTOR_CP:-1}
19+
REF_TP=${REF_TP:4}
1820
REF_PP=${REF_PP:-2}
1921
REF_VPP=${REF_VPP:-2}
2022
REF_EP=${REF_EP:-2}
23+
REF_CP=${REF_CP:-1}
2124
GEN_MOE_TP=${GEN_MOE_TP:-2}
2225
GEN_MOE_EP=${GEN_MOE_EP:-2}
2326
PROJECT_NAME=${PROJECT_NAME:-"Qwen3-30B-A3B-DAPO-GB200"}
@@ -93,6 +96,7 @@ python3 -m verl.trainer.main_ppo --config-path=config \
9396
actor_rollout_ref.actor.megatron.pipeline_model_parallel_size=${ACTOR_PP} \
9497
actor_rollout_ref.actor.megatron.virtual_pipeline_model_parallel_size=${ACTOR_VPP_OVERRIDE} \
9598
actor_rollout_ref.actor.megatron.expert_model_parallel_size=${ACTOR_EP} \
99+
actor_rollout_ref.actor.megatron.context_parallel_size=${ACTOR_CP} \
96100
actor_rollout_ref.actor.megatron.param_offload=True \
97101
+actor_rollout_ref.actor.megatron.override_transformer_config.recompute_method=uniform \
98102
+actor_rollout_ref.actor.megatron.override_transformer_config.recompute_granularity=full \
@@ -109,10 +113,11 @@ python3 -m verl.trainer.main_ppo --config-path=config \
109113
actor_rollout_ref.ref.log_prob_max_token_len_per_gpu=40960 \
110114
actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=4 \
111115
actor_rollout_ref.ref.log_prob_use_dynamic_bsz=True \
112-
actor_rollout_ref.ref.megatron.tensor_model_parallel_size=${ACTOR_TP} \
116+
actor_rollout_ref.ref.megatron.tensor_model_parallel_size=${REF_TP} \
113117
actor_rollout_ref.ref.megatron.pipeline_model_parallel_size=${REF_PP} \
114118
actor_rollout_ref.ref.megatron.virtual_pipeline_model_parallel_size=${REF_VPP_OVERRIDE} \
115119
actor_rollout_ref.ref.megatron.expert_model_parallel_size=${REF_EP} \
120+
actor_rollout_ref.ref.megatron.context_parallel_size=${REF_CP} \
116121
actor_rollout_ref.rollout.name=trtllm \
117122
actor_rollout_ref.rollout.mode=async \
118123
actor_rollout_ref.rollout.calculate_log_probs=True \

0 commit comments

Comments
 (0)