Skip to content
Open
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion tests/experimental/agent_loop/agent_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ def init_agent_loop_manager(config: DictConfig) -> AgentLoopManager | RayWorkerG
config=config,
rm_resource_pool=rm_resource_pool,
)
agent_loop_manager = AgentLoopManager(
agent_loop_manager = AgentLoopManager.create(
config=config,
worker_group=actor_rollout_wg,
reward_loop_worker_handles=reward_loop_manager.reward_loop_workers,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,10 @@ async def test_agent_loop_extra_fields_schema_stable_for_training_concat_on_cpu(
# Minimal config surface used by the agent loops.
config = OmegaConf.create(
{
"actor_rollout_ref": {"rollout": {"prompt_length": 16, "response_length": 16}},
"actor_rollout_ref": {
"rollout": {"prompt_length": 16, "response_length": 16, "multi_turn": {"tool_config_path": None}},
"model": {},
},
"data": {
"tool_config_path": None,
"apply_chat_template_kwargs": {},
Expand All @@ -160,23 +163,23 @@ async def test_agent_loop_extra_fields_schema_stable_for_training_concat_on_cpu(
processor = None

trainer_config = DictConfigWrap(config)
dataset_config = DictConfigWrap(config.data)
data_config = DictConfigWrap(config.data)

single_turn = SingleTurnAgentLoop(
trainer_config=trainer_config,
server_manager=server_manager,
tokenizer=tokenizer,
processor=processor,
dataset_cls=RLHFDataset,
dataset_config=dataset_config,
data_config=data_config,
)
partial_single_turn = PartialSingleTurnAgentLoop(
trainer_config=trainer_config,
server_manager=server_manager,
tokenizer=tokenizer,
processor=processor,
dataset_cls=RLHFDataset,
dataset_config=dataset_config,
data_config=data_config,
)

raw_prompt = [{"role": "user", "content": "hi"}]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,10 @@ def test_agent_reward_loop_standalone():
)
actor_rollout_wg.init_model()

agent_loop_manager = AgentLoopManager(config, worker_group=actor_rollout_wg)
agent_loop_manager = AgentLoopManager.create(
config=config,
worker_group=actor_rollout_wg,
)
# sleep rollout replicas
checkpoint_manager = CheckpointEngineManager(
config=omega_conf_to_dataclass(config.actor_rollout_ref.rollout.checkpoint_engine),
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,8 +76,9 @@ def test_agent_reward_loop_standalone():

# 1. init reward model manager
reward_loop_manager = RewardLoopManager(config)
agent_loop_manager = AgentLoopManager(
config=config, reward_loop_worker_handles=reward_loop_manager.reward_loop_workers
agent_loop_manager = AgentLoopManager.create(
config=config,
reward_loop_worker_handles=reward_loop_manager.reward_loop_workers,
)

# 2. init test data
Expand Down
4 changes: 2 additions & 2 deletions tests/special_npu/run_fully_async_policy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -124,8 +124,8 @@ common_params=(
trainer.nnodes=1
trainer.n_gpus_per_node=${n_gpus_training}
trainer.log_val_generations=10
rollout.nnodes=1
rollout.n_gpus_per_node=${n_gpus_rollout}
actor_rollout_ref.rollout.nnodes=1
actor_rollout_ref.rollout.n_gpus_per_node=${n_gpus_rollout}
rollout.total_rollout_steps=${total_rollout_steps}
rollout.total_epochs=2
rollout.test_freq=${test_freq}
Expand Down
4 changes: 2 additions & 2 deletions tests/special_npu/run_one_step_off_policy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -108,8 +108,8 @@ common_params=(
trainer.resume_mode=disable
trainer.nnodes=1
trainer.n_gpus_per_node=${n_npus_training}
rollout.nnodes=1
rollout.n_gpus_per_node=${n_npus_rollout}
actor_rollout_ref.rollout.nnodes=1
actor_rollout_ref.rollout.n_gpus_per_node=${n_npus_rollout}

)

Expand Down
Loading
Loading