|
72 | 72 | - ".github/workflows/e2e_ppo_grpo_trainer_trtllm" |
73 | 73 | - "examples/data_preprocess/gsm8k.py" |
74 | 74 | - "examples/data_preprocess/geo3k.py" |
| 75 | + - "examples/data_preprocess/dapo_multiturn_w_tool.py" |
| 76 | + - "examples/data_preprocess/aime2024_multiturn_w_tool.py" |
| 77 | + - "examples/grpo_trainer/run_qwen2-7b_math_trtllm.sh" |
| 78 | + - "examples/grpo_trainer/run_qwen2-7b_math_megatron_trtllm.sh" |
| 79 | + - "examples/grpo_trainer/run_qwen3-30b_dapo_megatron_fp8_trtllm.sh" |
75 | 80 | # add back when ppo flow is ready |
76 | 81 | # - "tests/special_e2e/run_ppo_trainer_megatron.sh" |
77 | 82 | # - "verl/trainer/main_ppo.py" |
@@ -203,9 +208,46 @@ jobs: |
203 | 208 | run: | |
204 | 209 | rm -rf checkpoints |
205 | 210 |
|
| 211 | + e2e_dapo_trainer_megatron-qwen3: |
| 212 | + needs: setup |
| 213 | + runs-on: ["${{ needs.setup.outputs.runner-label || 'L20x8' }}"] |
| 214 | + timeout-minutes: 60 |
| 215 | + env: |
| 216 | + HTTP_PROXY: ${{ secrets.PROXY_HTTP }} |
| 217 | + HTTPS_PROXY: ${{ secrets.PROXY_HTTPS }} |
| 218 | + NO_PROXY: "localhost,127.0.0.1,hf-mirror.com" |
| 219 | + HF_ENDPOINT: "https://hf-mirror.com" |
| 220 | + HF_HUB_ENABLE_HF_TRANSFER: "0" |
| 221 | + steps: |
| 222 | + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 |
| 223 | + with: |
| 224 | + fetch-depth: 0 |
| 225 | + - name: Install the current repository |
| 226 | + run: | |
| 227 | + pip3 install -r requirements-test.txt |
| 228 | + pip3 install --no-deps -e . |
| 229 | + - name: Prepare DAPO-Math-17k and AIME-2024 datasets (data_preprocess) |
| 230 | + run: | |
| 231 | + python3 examples/data_preprocess/dapo_multiturn_w_tool.py --local_dataset_path ${HOME}/models/hf_data/dapo-math-17k --local_save_dir ${PWD}/data/dapo-math-17k |
| 232 | + python3 examples/data_preprocess/aime2024_multiturn_w_tool.py --local_dataset_path ${HOME}/models/hf_data/aime-2024 --local_save_dir ${PWD}/data/aime-2024 |
| 233 | + - name: Running DAPO E2E with FP8 TRT-LLM rollout (Qwen3-8B) |
| 234 | + run: | |
| 235 | + ray stop --force |
| 236 | + export INFER_TP=2 ACTOR_TP=2 ACTOR_PP=2 ACTOR_VPP=2 ACTOR_EP=1 ACTOR_CP=2 REF_TP=2 REF_PP=2 REF_VPP=2 REF_EP=1 REF_CP=2 GEN_MOE_TP=null GEN_MOE_EP=null |
| 237 | + export NNODES=1 GPUS_PER_NODE=8 TRTLLM_MOE_BACKEND=CUTLASS |
| 238 | + export DATA_DIR=${PWD} DAPO_MATH_TRAIN=${PWD}/data/dapo-math-17k/train.parquet AIME_VAL=${PWD}/data/aime-2024/train.parquet MODEL_PATH=${HOME}/models/Qwen/Qwen3-0.6B |
| 239 | + bash examples/grpo_trainer/run_qwen3-30b_dapo_megatron_fp8_trtllm.sh \ |
| 240 | + actor_rollout_ref.actor.megatron.override_transformer_config.moe_grouped_gemm=False \ |
| 241 | + actor_rollout_ref.actor.megatron.override_transformer_config.moe_permute_fusion=False \ |
| 242 | + trainer.total_training_steps=1 \ |
| 243 | + trainer.logger='["console"]' |
| 244 | + - name: clean up |
| 245 | + run: | |
| 246 | + rm -rf checkpoints |
| 247 | +
|
206 | 248 | cleanup: |
207 | 249 | runs-on: ubuntu-latest |
208 | | - needs: [setup, trtllm_unit_tests, e2e_grpo_trainer_fsdp-qwen2, e2e_grpo_trainer_megatron-qwen2] |
| 250 | + needs: [setup, trtllm_unit_tests, e2e_grpo_trainer_fsdp-qwen2, e2e_grpo_trainer_megatron-qwen2, e2e_dapo_trainer_megatron-qwen3] |
209 | 251 | if: always() |
210 | 252 | steps: |
211 | 253 | - id: destroy-runner |
|
0 commit comments