Skip to content

Commit 41b786c

Browse files
committed
Update scripts for engine workers.
1 parent c2711e2 commit 41b786c

18 files changed

+18
-18
lines changed

verl/experimental/fully_async_policy/shell/dapo_30b_a3b_base_math_fsdp.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,7 @@ ray job submit --no-wait --runtime-env="${RUNTIME_ENV}" \
103103
data.val_files="${TEST_FILE}" \
104104
data.prompt_key=prompt \
105105
data.truncation='left' \
106-
actor_rollout_ref.actor.strategy=fsdp \
106+
actor_rollout_ref.actor.fsdp_config.strategy=fsdp \
107107
critic.strategy=fsdp \
108108
data.max_prompt_length=${max_prompt_length} \
109109
data.max_response_length=${max_response_length} \

verl/experimental/fully_async_policy/shell/dapo_7b_async_retool.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -98,7 +98,7 @@ python3 -m verl.experimental.fully_async_policy.fully_async_main \
9898
actor_rollout_ref.actor.use_dynamic_bsz=True \
9999
actor_rollout_ref.actor.ppo_mini_batch_size=$ppo_mini_batch_size \
100100
actor_rollout_ref.actor.ppo_max_token_len_per_gpu=$actor_max_token_len_per_gpu \
101-
actor_rollout_ref.actor.strategy=fsdp2 \
101+
actor_rollout_ref.actor.fsdp_config.strategy=fsdp2 \
102102
critic.strategy=fsdp2 \
103103
actor_rollout_ref.actor.fsdp_config.fsdp_size=${fsdp_size} \
104104
actor_rollout_ref.actor.ulysses_sequence_parallel_size=$train_sp \

verl/experimental/fully_async_policy/shell/dapo_7b_math_fsdp2_16_16.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ python -m verl.experimental.fully_async_policy.fully_async_main \
9090
algorithm.adv_estimator=${adv_estimator} \
9191
algorithm.use_kl_in_reward=${use_kl_in_reward} \
9292
algorithm.kl_ctrl.kl_coef=${kl_coef} \
93-
actor_rollout_ref.actor.strategy=fsdp2 \
93+
actor_rollout_ref.actor.fsdp_config.strategy=fsdp2 \
9494
critic.strategy=fsdp2 \
9595
actor_rollout_ref.actor.use_kl_loss=${use_kl_loss} \
9696
actor_rollout_ref.actor.kl_loss_coef=${kl_loss_coef} \

verl/experimental/fully_async_policy/shell/dapo_7b_math_fsdp2_32_32.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ python -m verl.experimental.fully_async_policy.fully_async_main \
9090
algorithm.adv_estimator=${adv_estimator} \
9191
algorithm.use_kl_in_reward=${use_kl_in_reward} \
9292
algorithm.kl_ctrl.kl_coef=${kl_coef} \
93-
actor_rollout_ref.actor.strategy=fsdp2 \
93+
actor_rollout_ref.actor.fsdp_config.strategy=fsdp2 \
9494
critic.strategy=fsdp2 \
9595
actor_rollout_ref.actor.use_kl_loss=${use_kl_loss} \
9696
actor_rollout_ref.actor.kl_loss_coef=${kl_loss_coef} \

verl/experimental/fully_async_policy/shell/dapo_7b_math_fsdp2_4_12.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ python -m verl.experimental.fully_async_policy.fully_async_main \
9292
algorithm.adv_estimator=${adv_estimator} \
9393
algorithm.use_kl_in_reward=${use_kl_in_reward} \
9494
algorithm.kl_ctrl.kl_coef=${kl_coef} \
95-
actor_rollout_ref.actor.strategy=fsdp2 \
95+
actor_rollout_ref.actor.fsdp_config.strategy=fsdp2 \
9696
critic.strategy=fsdp2 \
9797
actor_rollout_ref.actor.use_kl_loss=${use_kl_loss} \
9898
actor_rollout_ref.actor.kl_loss_coef=${kl_loss_coef} \

verl/experimental/fully_async_policy/shell/dapo_7b_math_fsdp2_4_4.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ python -m verl.experimental.fully_async_policy.fully_async_main \
9292
algorithm.adv_estimator=${adv_estimator} \
9393
algorithm.use_kl_in_reward=${use_kl_in_reward} \
9494
algorithm.kl_ctrl.kl_coef=${kl_coef} \
95-
actor_rollout_ref.actor.strategy=fsdp2 \
95+
actor_rollout_ref.actor.fsdp_config.strategy=fsdp2 \
9696
critic.strategy=fsdp2 \
9797
actor_rollout_ref.actor.use_kl_loss=${use_kl_loss} \
9898
actor_rollout_ref.actor.kl_loss_coef=${kl_loss_coef} \

verl/experimental/fully_async_policy/shell/dapo_7b_math_fsdp2_64_64.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ python -m verl.experimental.fully_async_policy.fully_async_main \
9090
algorithm.adv_estimator=${adv_estimator} \
9191
algorithm.use_kl_in_reward=${use_kl_in_reward} \
9292
algorithm.kl_ctrl.kl_coef=${kl_coef} \
93-
actor_rollout_ref.actor.strategy=fsdp2 \
93+
actor_rollout_ref.actor.fsdp_config.strategy=fsdp2 \
9494
critic.strategy=fsdp2 \
9595
actor_rollout_ref.actor.use_kl_loss=${use_kl_loss} \
9696
actor_rollout_ref.actor.kl_loss_coef=${kl_loss_coef} \

verl/experimental/fully_async_policy/shell/dapo_7b_math_fsdp2_64_64_mis.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,7 @@ python -m verl.experimental.fully_async_policy.fully_async_main \
9696
algorithm.adv_estimator=${adv_estimator} \
9797
algorithm.use_kl_in_reward=${use_kl_in_reward} \
9898
algorithm.kl_ctrl.kl_coef=${kl_coef} \
99-
actor_rollout_ref.actor.strategy=fsdp2 \
99+
actor_rollout_ref.actor.fsdp_config.strategy=fsdp2 \
100100
critic.strategy=fsdp2 \
101101
actor_rollout_ref.actor.use_kl_loss=${use_kl_loss} \
102102
actor_rollout_ref.actor.kl_loss_coef=${kl_loss_coef} \

verl/experimental/fully_async_policy/shell/dapo_7b_math_fsdp2_8_8.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ python -m verl.experimental.fully_async_policy.fully_async_main \
9090
algorithm.adv_estimator=${adv_estimator} \
9191
algorithm.use_kl_in_reward=${use_kl_in_reward} \
9292
algorithm.kl_ctrl.kl_coef=${kl_coef} \
93-
actor_rollout_ref.actor.strategy=fsdp2 \
93+
actor_rollout_ref.actor.fsdp_config.strategy=fsdp2 \
9494
critic.strategy=fsdp2 \
9595
actor_rollout_ref.actor.use_kl_loss=${use_kl_loss} \
9696
actor_rollout_ref.actor.kl_loss_coef=${kl_loss_coef} \

verl/experimental/one_step_off_policy/shell/dapo_7b_math_fsdp2_4_12.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ python3 -m verl.experimental.one_step_off_policy.main_ppo \
7373
algorithm.adv_estimator=${adv_estimator} \
7474
algorithm.use_kl_in_reward=${use_kl_in_reward} \
7575
algorithm.kl_ctrl.kl_coef=${kl_coef} \
76-
actor_rollout_ref.actor.strategy=fsdp2 \
76+
actor_rollout_ref.actor.fsdp_config.strategy=fsdp2 \
7777
critic.strategy=fsdp2 \
7878
actor_rollout_ref.actor.use_kl_loss=${use_kl_loss} \
7979
actor_rollout_ref.actor.kl_loss_coef=${kl_loss_coef} \

0 commit comments

Comments
 (0)