Add fp8 trtllm rollout test to CI

shuyixiong · shuyixiong · commit a4df663bdadb · 2026-02-25T02:11:48.000-08:00
Signed-off-by: shuyixiong &lt;219646547+shuyixiong@users.noreply.github.com&gt;
diff --git a/.github/workflows/e2e_ppo_grpo_trainer_trtllm.yml b/.github/workflows/e2e_ppo_grpo_trainer_trtllm.yml
@@ -72,6 +72,11 @@ on:
       - ".github/workflows/e2e_ppo_grpo_trainer_trtllm"
       - "examples/data_preprocess/gsm8k.py"
       - "examples/data_preprocess/geo3k.py"
+      - "examples/data_preprocess/dapo_multiturn_w_tool.py"
+      - "examples/data_preprocess/aime2024_multiturn_w_tool.py"
+      - "examples/grpo_trainer/run_qwen2-7b_math_trtllm.sh"
+      - "examples/grpo_trainer/run_qwen2-7b_math_megatron_trtllm.sh"
+      - "examples/grpo_trainer/run_qwen3-30b_dapo_megatron_fp8_trtllm.sh"
       # add back when ppo flow is ready
       # - "tests/special_e2e/run_ppo_trainer_megatron.sh"
       # - "verl/trainer/main_ppo.py"
@@ -203,9 +208,46 @@ jobs:
         run: |
           rm -rf checkpoints
 
+  e2e_dapo_trainer_megatron-qwen3:
+    needs: setup
+    runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"]
+    timeout-minutes: 60
+    env:
+      HTTP_PROXY: ${{ secrets.PROXY_HTTP }}
+      HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }}
+      NO_PROXY: "localhost,127.0.0.1,hf-mirror.com"
+      HF_ENDPOINT: "https://hf-mirror.com"
+      HF_HUB_ENABLE_HF_TRANSFER: "0"
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          fetch-depth: 0
+      - name: Install the current repository
+        run: |
+          pip3 install -r requirements-test.txt
+          pip3 install --no-deps -e .
+      - name: Prepare DAPO-Math-17k and AIME-2024 datasets (data_preprocess)
+        run: |
+          python3 examples/data_preprocess/dapo_multiturn_w_tool.py --local_dataset_path ${HOME}/models/hf_data/dapo-math-17k --local_save_dir ${PWD}/data/dapo-math-17k
+          python3 examples/data_preprocess/aime2024_multiturn_w_tool.py --local_dataset_path ${HOME}/models/hf_data/aime-2024 --local_save_dir ${PWD}/data/aime-2024
+      - name: Running DAPO E2E with FP8 TRT-LLM rollout (Qwen3-8B)
+        run: |
+          ray stop --force
+          export INFER_TP=2 ACTOR_TP=2 ACTOR_PP=2 ACTOR_VPP=2 ACTOR_EP=1 ACTOR_CP=2 REF_TP=2 REF_PP=2 REF_VPP=2 REF_EP=1 REF_CP=2 GEN_MOE_TP=null GEN_MOE_EP=null
+          export NNODES=1 GPUS_PER_NODE=8 TRTLLM_MOE_BACKEND=CUTLASS
+          export DATA_DIR=${PWD} DAPO_MATH_TRAIN=${PWD}/data/dapo-math-17k/train.parquet AIME_VAL=${PWD}/data/aime-2024/train.parquet MODEL_PATH=${HOME}/models/Qwen/Qwen3-0.6B
+          bash examples/grpo_trainer/run_qwen3-30b_dapo_megatron_fp8_trtllm.sh \
+            actor_rollout_ref.actor.megatron.override_transformer_config.moe_grouped_gemm=False \
+            actor_rollout_ref.actor.megatron.override_transformer_config.moe_permute_fusion=False \
+            trainer.total_training_steps=1 \
+            trainer.logger='["console"]'
+      - name: clean up
+        run: |
+          rm -rf checkpoints
+
   cleanup:
     runs-on: ubuntu-latest
-    needs: [setup, trtllm_unit_tests, e2e_grpo_trainer_fsdp-qwen2, e2e_grpo_trainer_megatron-qwen2]
+    needs: [setup, trtllm_unit_tests, e2e_grpo_trainer_fsdp-qwen2, e2e_grpo_trainer_megatron-qwen2, e2e_dapo_trainer_megatron-qwen3]
     if: always()
     steps:
       - id: destroy-runner
diff --git a/examples/grpo_trainer/run_qwen3-30b_dapo_megatron_fp8_trtllm.sh b/examples/grpo_trainer/run_qwen3-30b_dapo_megatron_fp8_trtllm.sh
@@ -10,14 +10,17 @@ export RAY_DEDUP_LOGS=0
 # -----
 # Config
 # -----
-TP=${1:-4}
+TP=${INFER_TP:-4}
 ACTOR_TP=${ACTOR_TP:-4}
 ACTOR_PP=${ACTOR_PP:-2}
 ACTOR_VPP=${ACTOR_VPP:-2}
 ACTOR_EP=${ACTOR_EP:-2}
+ACTOR_CP=${ACTOR_CP:-1}
+REF_TP=${REF_TP:4}
 REF_PP=${REF_PP:-2}
 REF_VPP=${REF_VPP:-2}
 REF_EP=${REF_EP:-2}
+REF_CP=${REF_CP:-1}
 GEN_MOE_TP=${GEN_MOE_TP:-2}
 GEN_MOE_EP=${GEN_MOE_EP:-2}
 PROJECT_NAME=${PROJECT_NAME:-"Qwen3-30B-A3B-DAPO-GB200"}
@@ -93,6 +96,7 @@ python3 -m verl.trainer.main_ppo --config-path=config \
     actor_rollout_ref.actor.megatron.pipeline_model_parallel_size=${ACTOR_PP} \
     actor_rollout_ref.actor.megatron.virtual_pipeline_model_parallel_size=${ACTOR_VPP_OVERRIDE} \
     actor_rollout_ref.actor.megatron.expert_model_parallel_size=${ACTOR_EP} \
+    actor_rollout_ref.actor.megatron.context_parallel_size=${ACTOR_CP} \
     actor_rollout_ref.actor.megatron.param_offload=True \
     +actor_rollout_ref.actor.megatron.override_transformer_config.recompute_method=uniform \
     +actor_rollout_ref.actor.megatron.override_transformer_config.recompute_granularity=full \
@@ -109,10 +113,11 @@ python3 -m verl.trainer.main_ppo --config-path=config \
     actor_rollout_ref.ref.log_prob_max_token_len_per_gpu=40960 \
     actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=4 \
     actor_rollout_ref.ref.log_prob_use_dynamic_bsz=True \
-    actor_rollout_ref.ref.megatron.tensor_model_parallel_size=${ACTOR_TP} \
+    actor_rollout_ref.ref.megatron.tensor_model_parallel_size=${REF_TP} \
     actor_rollout_ref.ref.megatron.pipeline_model_parallel_size=${REF_PP} \
     actor_rollout_ref.ref.megatron.virtual_pipeline_model_parallel_size=${REF_VPP_OVERRIDE} \
     actor_rollout_ref.ref.megatron.expert_model_parallel_size=${REF_EP} \
+    actor_rollout_ref.ref.megatron.context_parallel_size=${REF_CP} \
     actor_rollout_ref.rollout.name=trtllm \
     actor_rollout_ref.rollout.mode=async \
     actor_rollout_ref.rollout.calculate_log_probs=True \