chore: tune vLLM rollout memory for single-node

khazic · khazic · commit 8e8deedc99e8 · 2026-02-05T17:13:34.000+08:00
- raise vLLM gpu_memory_utilization to 0.30 for KV cache
- lower rollout.n and cap max batched tokens for stability
- apply settings to both Megatron and FSDP single-node scripts
diff --git a/recipes_custom/RLVR_ABCDE_dense/run_grpo_fsdp_single_node.sh b/recipes_custom/RLVR_ABCDE_dense/run_grpo_fsdp_single_node.sh
@@ -58,8 +58,9 @@ python3 $ENTRYPOINT --config-path=/llm-align/liuchonghan/verl_lao/verl/trainer/c
     actor_rollout_ref.rollout.tensor_model_parallel_size=1 \
     actor_rollout_ref.rollout.name=vllm \
     actor_rollout_ref.rollout.mode=$rollout_mode \
-    actor_rollout_ref.rollout.gpu_memory_utilization=0.25 \
-    actor_rollout_ref.rollout.n=4 \
+    actor_rollout_ref.rollout.gpu_memory_utilization=0.30 \
+    actor_rollout_ref.rollout.n=2 \
+    actor_rollout_ref.rollout.max_num_batched_tokens=4096 \
     actor_rollout_ref.ref.fsdp_config.fsdp_size=$FSDP_SIZE \
     actor_rollout_ref.ref.fsdp_config.param_offload=$REF_OFFLOAD \
     actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=1 \
diff --git a/recipes_custom/RLVR_ABCDE_dense/run_grpo_megatron_single_node.sh b/recipes_custom/RLVR_ABCDE_dense/run_grpo_megatron_single_node.sh
@@ -53,8 +53,9 @@ python3 $ENTRYPOINT --config-path=/llm-align/liuchonghan/verl_lao/verl/trainer/c
     actor_rollout_ref.rollout.tensor_model_parallel_size=$TP_SIZE \
     actor_rollout_ref.rollout.name=vllm \
     actor_rollout_ref.rollout.mode=$rollout_mode \
-    actor_rollout_ref.rollout.gpu_memory_utilization=0.25 \
-    actor_rollout_ref.rollout.n=4 \
+    actor_rollout_ref.rollout.gpu_memory_utilization=0.30 \
+    actor_rollout_ref.rollout.n=2 \
+    actor_rollout_ref.rollout.max_num_batched_tokens=4096 \
     actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=1 \
     actor_rollout_ref.ref.megatron.pipeline_model_parallel_size=$PP_SIZE \
     actor_rollout_ref.ref.megatron.tensor_model_parallel_size=$TP_SIZE \