[misc] feat: delete unnecessary base class in agent loop worker and vLLMHttpServer (#4838)

PeterSH6 · web-flow · commit ba76c5edfc90 · 2026-01-08T18:41:27.000+08:00
diff --git a/verl/experimental/agent_loop/agent_loop.py b/verl/experimental/agent_loop/agent_loop.py
@@ -341,7 +341,7 @@ def decorator(subclass: type[AgentLoopBase]) -> type[AgentLoopBase]:
     return decorator
 
 
-class AgentLoopWorkerBase:
+class AgentLoopWorker:
     """Agent loop worker takes a batch of messages and run each message in an agent loop."""
 
     def __init__(
@@ -351,10 +351,10 @@ def __init__(
         reward_router_address: str = None,
     ):
         """Initialize agent loop manager.
-
         Args:
             config (DictConfig): YAML config.
             server_handles (List[ray.actor.ActorHandle]): OpenAI compatible LLM server actor handles.
+            reward_router_address (str): reward router address.
         """
         self.config = config
 
@@ -804,22 +804,6 @@ def create_transferqueue_client(
         )
 
 
-@ray.remote
-class AgentLoopWorker(AgentLoopWorkerBase):
-    """Agent loop worker takes a batch of messages and run each message in an agent loop."""
-
-    def __init__(
-        self, config: DictConfig, server_handles: list[ray.actor.ActorHandle], reward_router_address: str = None
-    ):
-        """Initialize agent loop manager.
-        Args:
-            config (DictConfig): YAML config.
-            server_handles (List[ray.actor.ActorHandle]): OpenAI compatible LLM server actor handles.
-            reward_router_address (str): reward router address.
-        """
-        super().__init__(config, server_handles, reward_router_address)
-
-
 async def get_trajectory_info(step, index, validate):
     """Get trajectory info.
 
@@ -869,7 +853,7 @@ def __init__(
         if not hasattr(self, "rollout_replica_class"):
             self.rollout_replica_class = get_rollout_replica_class(self.config.actor_rollout_ref.rollout.name)
         if not hasattr(self, "agent_loop_workers_class"):
-            self.agent_loop_workers_class = AgentLoopWorker
+            self.agent_loop_workers_class = ray.remote(AgentLoopWorker)
 
         self._initialize_llm_servers()
         self._init_agent_loop_workers()
diff --git a/verl/experimental/fully_async_policy/agent_loop/agent_loop.py b/verl/experimental/fully_async_policy/agent_loop/agent_loop.py
@@ -24,7 +24,7 @@
 from verl.experimental.agent_loop.agent_loop import (
     AgentLoopManager,
     AgentLoopOutput,
-    AgentLoopWorkerBase,
+    AgentLoopWorker,
     AsyncLLMServerManager,
     DictConfigWrap,
     _agent_loop_registry,
@@ -77,7 +77,7 @@ async def generate_for_partial(
 
 
 @ray.remote
-class FullyAsyncAgentLoopWorker(AgentLoopWorkerBase):
+class FullyAsyncAgentLoopWorker(AgentLoopWorker):
     def __init__(
         self, config: DictConfig, server_handles: list[ray.actor.ActorHandle], reward_router_address: str = None
     ):
diff --git a/verl/experimental/fully_async_policy/vllm_rollout/vllm_async_server.py b/verl/experimental/fully_async_policy/vllm_rollout/vllm_async_server.py
@@ -25,7 +25,7 @@
 from verl.workers.rollout.replica import RolloutMode
 from verl.workers.rollout.vllm_rollout.vllm_async_server import (
     _qwen2_5_vl_dedup_image_tokens,
-    vLLMHttpServerBase,
+    vLLMHttpServer,
     vLLMReplica,
 )
 
@@ -34,7 +34,7 @@
 
 
 @ray.remote(num_cpus=1)
-class vLLMHttpServerForPartial(vLLMHttpServerBase):
+class vLLMHttpServerForPartial(vLLMHttpServer):
     def __init__(
         self,
         config: RolloutConfig,
diff --git a/verl/workers/rollout/vllm_rollout/vllm_async_server.py b/verl/workers/rollout/vllm_rollout/vllm_async_server.py
@@ -168,7 +168,7 @@ def check_health(self):
         return
 
 
-class vLLMHttpServerBase:
+class vLLMHttpServer:
     """vLLM http server in single node, this is equivalent to launch server with command line:
     ```
     vllm serve --tensor-parallel-size=8 ...
@@ -663,28 +663,6 @@ async def abort_request(self, request_id: str, reset_prefix_cache: bool = True)
             return {"aborted": False, "request_id": request_id, "error": str(e)}
 
 
-@ray.remote(num_cpus=1)
-class vLLMHttpServer(vLLMHttpServerBase):
-    """vLLM http server in single node, this is equivalent to launch server with command line:
-    ```
-    vllm serve --tensor-parallel-size=8 ...
-    ```
-    """
-
-    def __init__(
-        self,
-        config: RolloutConfig,
-        model_config: HFModelConfig,
-        rollout_mode: RolloutMode,
-        workers: list[ActorHandle],
-        replica_rank: int,
-        node_rank: int,
-        gpus_per_node: int,
-        nnodes: int,
-    ):
-        super().__init__(config, model_config, rollout_mode, workers, replica_rank, node_rank, gpus_per_node, nnodes)
-
-
 _rollout_worker_actor_cls = ray.remote(vLLMAsyncRollout)
 
 
@@ -698,7 +676,7 @@ def __init__(
         is_reward_model: bool = False,
     ):
         super().__init__(replica_rank, config, model_config, gpus_per_node, is_reward_model)
-        self.server_class = vLLMHttpServer
+        self.server_class = ray.remote(vLLMHttpServer)
 
     def get_ray_class_with_init_args(self) -> RayClassWithInitArgs:
         """Get rollout worker actor class for colocated and standalone mode."""