verl-project
diff --git a/‎.github/CODEOWNERS‎
Lines changed: 1 addition & 0 deletions b/‎.github/CODEOWNERS‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.github/PULL_REQUEST_TEMPLATE.md‎
Lines changed: 1 addition & 1 deletion b/‎.github/PULL_REQUEST_TEMPLATE.md‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/e2e_ascend.yml‎
Lines changed: 4 additions & 0 deletions b/‎.github/workflows/e2e_ascend.yml‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎.github/workflows/e2e_one_step_off_policy_ascend.yml‎
Lines changed: 3 additions & 3 deletions b/‎.github/workflows/e2e_one_step_off_policy_ascend.yml‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎.github/workflows/e2e_sft_llm.yml‎
Lines changed: 1 addition & 9 deletions b/‎.github/workflows/e2e_sft_llm.yml‎
Lines changed: 1 addition & 9 deletions
diff --git a/‎.github/workflows/e2e_sft_llm_ascend.yml‎
Lines changed: 1 addition & 10 deletions b/‎.github/workflows/e2e_sft_llm_ascend.yml‎
Lines changed: 1 addition & 10 deletions
diff --git a/‎.github/workflows/gpu_unit_tests.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/gpu_unit_tests.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/sgl.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/sgl.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎.github/workflows/vllm.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/vllm.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 0 deletions b/‎.gitignore‎
Lines changed: 2 additions & 0 deletions
@@ -20,6 +20,7 @@
 /verl/workers/actor/megatron_actor.py @ISEEKYAN @vermouth1992
 /verl/workers/critic/megatron_critic.py @ISEEKYAN @vermouth1992
 /verl/workers/megatron_workers.py @ISEEKYAN @vermouth1992
+/verl/experimental @wuxibin89 @ArronHZG
 
 /tests/single_controller @zw0610 @wuxibin89
 /tests/trainer @eric-haibin-lin @vermouth1992 @tongyx361 @PeterSH6
 
@@ -6,7 +6,7 @@
 
 - [ ] Search for similar PRs. Paste at least one query link here: ...
 - [ ] Format the PR title as `[{modules}] {type}: {description}` (This will be checked by the CI)
-  - `{modules}` include `fsdp`, `megatron`, `veomni`, `sglang`, `vllm`, `rollout`, `trainer`, `ci`, `training_utils`, `recipe`, `hardware`, `deployment`, `ray`, `worker`, `single_controller`, `misc`, `perf`, `model`, `algo`, `env`, `tool`, `ckpt`, `doc`, `data`, `cfg`, `reward`
+  - `{modules}` include `fsdp`, `megatron`, `veomni`, `sglang`, `vllm`, `rollout`, `trainer`, `ci`, `training_utils`, `recipe`, `hardware`, `deployment`, `ray`, `worker`, `single_controller`, `misc`, `perf`, `model`, `algo`, `env`, `tool`, `ckpt`, `doc`, `data`, `cfg`, `reward`, `fully_async`, `one_step_off`
   - If this PR involves multiple modules, separate them with `,` like `[megatron, fsdp, doc]`
   - `{type}` is in `feat`, `fix`, `refactor`, `chore`, `test`
   - If this PR breaks any API (CLI arguments, config, function signature, etc.), add `[BREAKING]` to the beginning of the title.
 
@@ -126,6 +126,10 @@ jobs:
           ray stop --force
           export PYTHONPATH=$PYTHONPATH:/Megatron-LM
           USE_DIST_CKPT=True USE_DUMMY_MODEL=True DUMMY_MODEL_CONFIG_PATH=tests/special_e2e/ppo_trainer/expert_parallel/qwen3moe_minimal.json DUMMY_MODEL_PATH=$HOME/dist_ckpt/qwen3_30b_grpo_mindspeed bash tests/special_npu/run_qwen3_30b_grpo_mindspeed.sh
+      - name: Running the E2E test with fully_async_policy algorithm (FSDP2)
+        run: |
+          ray stop --force
+          bash tests/special_npu/run_fully_async_policy.sh
 
   vlm_rl_job:
     if: github.repository_owner == 'verl-project'
 
@@ -68,7 +68,7 @@ on:
       # Entrypoints
       - ".github/workflows/e2e_one_step_off_policy_ascend.yml"
       - "examples/data_preprocess/gsm8k.py"
-      - "tests/special_e2e/run_one_step_off_policy.sh"
+      - "tests/special_npu/run_one_step_off_policy.sh"
 
 # Cancel jobs on the same ref if a new one is triggered
 concurrency:
@@ -122,7 +122,7 @@ jobs:
       - name: Running the E2E test with one_step_off_policy algorithm (FSDP2)
         run: |
           ray stop --force
-          bash tests/special_e2e/run_one_step_off_policy.sh
+          bash tests/special_npu/run_one_step_off_policy.sh
 
   # Test Megatron strategy
   e2e_one_step_off_policy_megatron_ascend:
@@ -167,4 +167,4 @@ jobs:
         run: |
           ray stop --force
           export PYTHONPATH=$PYTHONPATH:/Megatron-LM
-          bash tests/special_e2e/run_one_step_off_policy.sh
+          bash tests/special_npu/run_one_step_off_policy.sh
@@ -110,7 +110,7 @@ jobs:
       - name: Prepare gsm8k dataset
         run: |
           ray stop --force
-          python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
+          python3 examples/data_preprocess/gsm8k_multiturn_sft.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
       - name: Running GSM8K E2E training tests on 8 L20 GPUs with rmpad using function rm
         run: |
           ray stop --force
@@ -123,10 +123,6 @@ jobs:
         run: |
           ray stop --force
           SP_SIZE=2 bash tests/special_e2e/sft/run_sft.sh
-      - name: Check loss difference between sequence parallel vs. default implementation
-        run: |
-          ray stop --force
-          ENTRYPOINT="tests/special_e2e/sft/test_sp_loss_match.py" SP_SIZE=2 bash tests/special_e2e/sft/run_sft.sh
       - name: Running GSM8K E2E training tests on 8 L20 GPUs with sequence parallism and liger
         run: |
           ray stop --force
@@ -140,10 +136,6 @@ jobs:
           ray stop --force
           LORA_RANK=32 RESUME_MODE=auto TOTAL_TRAIN_STEP=2 bash tests/special_e2e/sft/run_sft.sh
       # TODO: multiturn
-      - name: Prepare gsm8k dataset
-        run: |
-          ray stop --force
-          python3 examples/data_preprocess/gsm8k_multiturn_sft.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
       - name: Running GSM8K E2E training tests with multiturn and various configs and compare results
         run: |
           bash tests/special_e2e/sft/test_sft_engine_all.sh
 
@@ -109,7 +109,7 @@ jobs:
           ln -s /root/.cache/models ~/models
       - name: Prepare gsm8k dataset
         run: |
-          python examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k
+          python3 examples/data_preprocess/gsm8k_multiturn_sft.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k
       - name: Running GSM8K E2E training tests on 8 NPUs with rmpad using function rm
         run: |
           ray stop --force
@@ -122,10 +122,6 @@ jobs:
         run: |
           ray stop --force
           SP_SIZE=2 bash tests/special_e2e/sft/run_sft.sh
-      - name: Check loss difference between sequence parallel vs. default implementation
-        run: |
-          ray stop --force
-          ENTRYPOINT="tests/special_e2e/sft/test_sp_loss_match.py" SP_SIZE=2 bash tests/special_e2e/sft/run_sft.sh
       - name: Running GSM8K E2E training tests with LoRA
         run: |
           ray stop --force
@@ -134,11 +130,6 @@ jobs:
         run: |
           ray stop --force
           LORA_RANK=32 RESUME_MODE=auto TOTAL_TRAIN_STEP=2 bash tests/special_e2e/sft/run_sft.sh
-      # TODO: multiturn
-      - name: Prepare gsm8k dataset
-        run: |
-          ray stop --force
-          python3 examples/data_preprocess/gsm8k_multiturn_sft.py --local_dataset_path ${HOME}/.cache/datasets/openai/gsm8k
       - name: Running GSM8K E2E training tests with multiturn and various configs and compare results
         run: |
           export PYTHONPATH=$PYTHONPATH:/Megatron-LM
 
@@ -108,7 +108,7 @@ jobs:
           pip3 install hf_transfer
           pip3 install -r requirements-test.txt
           pip3 install --no-deps -e .
-          pip3 install cupy-cuda12x pytest-asyncio
+          pip3 install cupy-cuda12x==13.6.0 pytest-asyncio
           pip3 install --ignore-installed blinker
           pip3 install --ignore-installed mlflow "numpy<2.0"
       - name: Run all GPU unit tests
 
@@ -113,7 +113,7 @@ jobs:
           fetch-depth: 0
       - name: Install the current repository
         run: |
-          pip3 install cupy-cuda12x pytest-asyncio
+          pip3 install cupy-cuda12x==13.6.0 pytest-asyncio
           pip3 install hf_transfer fastmcp pytest-asyncio
           pip3 install -r requirements-test.txt
           pip3 install --no-deps -e .
@@ -144,7 +144,7 @@ jobs:
           fetch-depth: 0
       - name: Install the current repository
         run: |
-          pip3 install cupy-cuda12x pytest-asyncio
+          pip3 install cupy-cuda12x==13.6.0 pytest-asyncio
           pip3 install hf_transfer fastmcp pytest-asyncio
           pip3 install -r requirements-test.txt
           pip3 install --no-deps -e .
 
@@ -144,7 +144,7 @@ jobs:
           fetch-depth: 0
       - name: Install the current repository
         run: |
-          pip3 install cupy-cuda12x pytest-asyncio
+          pip3 install pytest-asyncio
           pip3 install -r requirements-test.txt
           pip3 install --no-deps -e .
           pip3 install --upgrade "transformers<5.0"
 
@@ -8,6 +8,8 @@
 **/playground
 **/wandb
 
+/pyrightconfig.json
+
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]