verl-project
diff --git a/‎.github/workflows/cpu_unit_tests.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/cpu_unit_tests.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/gpu_unit_tests.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/gpu_unit_tests.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/model.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/model.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/npu_unit_tests.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/npu_unit_tests.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/reward_model_sglang.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/reward_model_sglang.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/reward_model_vllm.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/reward_model_vllm.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/reward_model_vllm_ascend.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/reward_model_vllm_ascend.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/sanity.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/sanity.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/vllm.yml‎
Lines changed: 6 additions & 4 deletions b/‎.github/workflows/vllm.yml‎
Lines changed: 6 additions & 4 deletions
diff --git a/‎README.md‎
Lines changed: 2 additions & 0 deletions b/‎README.md‎
Lines changed: 2 additions & 0 deletions
@@ -95,7 +95,7 @@ jobs:
         run: |
           pip3 install -r requirements-test.txt
           pip3 install --no-deps -e .
-          pip3 install --upgrade transformers
+          pip3 install --upgrade "transformers<5.0.0"
       - name: Download datasets
         run: |
           python3 examples/data_preprocess/gsm8k.py --local_dataset_path ${HOME}/models/hf_data/gsm8k
 
@@ -113,7 +113,7 @@ jobs:
           pip3 install --ignore-installed mlflow "numpy<2.0"
       - name: Run all GPU unit tests
         run: |
-          pytest -s -x --ignore-glob="*on_npu.py" --ignore-glob="*test_special_*.py" --ignore-glob='*on_cpu.py' --ignore-glob="*test_vllm*" --ignore-glob="*_sglang*" --ignore-glob="*_hf_rollout*" --ignore-glob="tests/models/" --ignore-glob='tests/special*' --ignore-glob="tests/experimental" --ignore-glob="tests/workers/reward_model" tests/
+          pytest -s -x --ignore-glob="*on_npu.py" --ignore-glob="*test_special_*.py" --ignore-glob='*on_cpu.py' --ignore-glob="*test_vllm*" --ignore-glob="*_sglang*" --ignore-glob="*_hf_rollout*" --ignore-glob="tests/models/" --ignore-glob='tests/special*' --ignore-glob="tests/experimental" --ignore-glob="tests/workers/reward_model" --ignore-glob="*test_shared_memory*" tests/
       - name: Testing LinearCrossEntropyTP Correctness, Computation Time and Memory Consumption
         run: |
           LOW_MEMORY=True torchrun --standalone --nnodes=1 --nproc-per-node=8 tests/utils/test_special_linear_cross_entropy_tp.py
 
@@ -99,7 +99,7 @@ jobs:
         run: |
           pip3 install -r requirements-test.txt
           pip3 install --no-deps -e .
-          pip3 install --upgrade transformers
+          pip3 install --upgrade "transformers<5.0.0"
       - name: Running rmpad model tests on 8 L20 GPUs + flash_attn 2.5.8
         run: |
           pytest -s tests/models/test_transformer.py
 
@@ -109,7 +109,7 @@ jobs:
       - name: Run all NPU unit tests
         run: |
           export PYTHONPATH=$PYTHONPATH:/Megatron-LM
-          pytest -s -x --ignore-glob="*test_special_*.py" --ignore-glob="*on_cpu.py" --ignore-glob="*test_vllm*" --ignore-glob="*_sglang*" --ignore-glob="*_hf_rollout*" --ignore-glob="tests/models/" --ignore-glob="tests/special*" --ignore-glob="tests/experimental" --ignore-glob="tests/workers/reward_model" --ignore-glob="*test_rvdz*" --ignore-glob="*test_ray_collectives*" --ignore-glob="*test_nvtx_profile*" --ignore-glob="tests/checkpoint_engine" tests/
+          pytest -s -x --ignore-glob="*test_special_*.py" --ignore-glob="*on_cpu.py" --ignore-glob="*test_vllm*" --ignore-glob="*_sglang*" --ignore-glob="*_hf_rollout*" --ignore-glob="tests/models/" --ignore-glob="tests/special*" --ignore-glob="tests/experimental" --ignore-glob="tests/workers/reward_model" --ignore-glob="*test_rvdz*" --ignore-glob="*test_ray_collectives*" --ignore-glob="*test_nvtx_profile*" --ignore-glob="tests/checkpoint_engine" --ignore-glob="*test_shared_memory*" tests/
       - name: Testing FSDP2 actor functionality
         run: |
           torchrun --standalone --nnodes=1 --nproc-per-node=2 tests/workers/actor/test_special_dp_actor.py
 
@@ -115,7 +115,7 @@ jobs:
       - name: Running sglang agent loop with reward manager tests on 8 L20 GPUs
         run: |
           unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY
-          ROLLOUT_NAME=sglang pytest -s -x tests/experimental/reward_loop/test_agent_loop_reward_manager.py
+          ROLLOUT_NAME=sglang pytest -s -x tests/experimental/reward_loop/test_agent_reward_loop_standalone.py
       - name: Running sglang agent loop with reward model colocate tests on 8 L20 GPUs
         run: |
           unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY
 
@@ -115,7 +115,7 @@ jobs:
       - name: Running vllm agent loop with reward manager tests on 8 L20 GPUs
         run: |
           unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY
-          ROLLOUT_NAME=vllm pytest -s -x tests/experimental/reward_loop/test_agent_loop_reward_manager.py
+          ROLLOUT_NAME=vllm pytest -s -x tests/experimental/reward_loop/test_agent_reward_loop_standalone.py
       - name: Running vllm agent loop with reward model colocate tests on 8 L20 GPUs
         run: |
           unset http_proxy https_proxy HTTP_PROXY HTTPS_PROXY
 
@@ -105,7 +105,7 @@ jobs:
           ROLLOUT_NAME=vllm pytest -s -x tests/experimental/reward_loop/test_reward_model_disrm.py
       - name: Running vllm agent loop with reward manager tests on 8 NPUs
         run: |
-          ROLLOUT_NAME=vllm pytest -s -x tests/experimental/reward_loop/test_agent_loop_reward_manager.py
+          ROLLOUT_NAME=vllm pytest -s -x tests/experimental/reward_loop/test_agent_reward_loop_standalone.py
       - name: Running vllm agent loop with reward model colocate tests on 8 NPUs
         run: |
           export HCCL_HOST_SOCKET_PORT_RANGE=auto
 
@@ -90,7 +90,7 @@ jobs:
           fi
       - name: Assert SGLang naming convention
         run: |
-          if grep -rIn --exclude-dir=.git --exclude-dir=.github --exclude-dir=venv --exclude-dir=__pycache__ -E 'Sglang|sgLang|sglAng|sglaNg|sglanG' .; then
+          if grep -rIn --exclude-dir=.git --exclude-dir=.github --exclude-dir=venv --exclude-dir=__pycache__ --exclude=ascend_sglang_best_practices.rst -E 'Sglang|sgLang|sglAng|sglaNg|sglanG' .; then
             echo "Please use SGLang or sglang as the formal name of SGLang rollout engine"
             exit 1
           fi
 
@@ -109,12 +109,13 @@ jobs:
         run: |
           pip3 install -r requirements-test.txt
           pip3 install --no-deps -e .
+          pip3 install --upgrade "transformers<5.0"
       #      - name: Download Model to Use
       #        run: |
-      #          huggingface-cli download Qwen/Qwen2.5-0.5B-Instruct --local-dir ${HOME}/models/Qwen/Qwen2.5-0.5B-Instruct
-      #          huggingface-cli download Qwen/Qwen2.5-1.5B-Instruct --local-dir ${HOME}/models/Qwen/Qwen2.5-1.5B-Instruct
-      #          huggingface-cli download Qwen/Qwen2.5-VL-3B-Instruct --local-dir ${HOME}/models/Qwen/Qwen2.5-VL-3B-Instruct
-      #          huggingface-cli download OldKingMeister/Qwen2.5-1.5B-Instruct-YaRN --local-dir ${HOME}/models/OldKingMeister/Qwen2.5-1.5B-Instruct-YaRN
+      #          hf download Qwen/Qwen2.5-0.5B-Instruct --local-dir ${HOME}/models/Qwen/Qwen2.5-0.5B-Instruct
+      #          hf download Qwen/Qwen2.5-1.5B-Instruct --local-dir ${HOME}/models/Qwen/Qwen2.5-1.5B-Instruct
+      #          hf download Qwen/Qwen2.5-VL-3B-Instruct --local-dir ${HOME}/models/Qwen/Qwen2.5-VL-3B-Instruct
+      #          hf download OldKingMeister/Qwen2.5-1.5B-Instruct-YaRN --local-dir ${HOME}/models/OldKingMeister/Qwen2.5-1.5B-Instruct-YaRN
       #          export HF_HUB_OFFLINE=1
       - name: Prepare gsm8k dataset
         run: |
@@ -146,6 +147,7 @@ jobs:
           pip3 install cupy-cuda12x pytest-asyncio
           pip3 install -r requirements-test.txt
           pip3 install --no-deps -e .
+          pip3 install --upgrade "transformers<5.0"
       - name: Test vLLM ServerAdapter with Checkpoint Engine (NCCL)
         run: |
           ROLLOUT_NAME=vllm pytest -svvv tests/checkpoint_engine/test_special_server_adapter.py
 
@@ -282,6 +282,8 @@ Welcome to register your awesome project build with `verl` for other developers'
 - [deepscaler](https://github.com/agentica-project/rllm/tree/deepscaler): iterative context scaling with GRPO ![GitHub Repo stars](https://img.shields.io/github/stars/agentica-project/deepscaler)
 - [DAPO](https://dapo-sia.github.io/): the fully open source SOTA RL algorithm that beats DeepSeek-R1-zero-32B ![GitHub Repo stars](https://img.shields.io/github/stars/volcengine/verl)
 - [NoisyRollout](https://github.com/NUS-TRAIL/NoisyRollout): Reinforcing Visual Reasoning with Data Augmentation ![GitHub Repo stars](https://img.shields.io/github/stars/NUS-TRAIL/NoisyRollout)
+- [SPEAR](https://github.com/TencentYoutuResearch/SPEAR): **Self-imitation** with **Progressive Exploration** for Agentic Reinforcement Learning (ICLR 2026) ![GitHub Repo stars](https://img.shields.io/github/stars/TencentYoutuResearch/SPEAR)
+- [RuleReasoner](https://github.com/bigai-nlco/RuleReasoner): **RuleReasoner:** Reinforced Rule-based Reasoning via **Domain-aware Dynamic Sampling** (ICLR 2026) ![GitHub Repo stars](https://img.shields.io/github/stars/bigai-nlco/RuleReasoner)
 
 ## Contribution Guide