diff --git a/verl/experimental/agent_loop/tool_agent_loop.py b/verl/experimental/agent_loop/tool_agent_loop.py
index ee6176775e0..d8cf336bdb1 100644
--- a/verl/experimental/agent_loop/tool_agent_loop.py
+++ b/verl/experimental/agent_loop/tool_agent_loop.py
@@ -353,10 +353,14 @@ async def _handle_processing_tools_state(self, agent_data: AgentData) -> AgentSt
                 None, lambda: self.tokenizer.encode(tool_response_text, add_special_tokens=False)
             )
         else:
+            # Note that we have to pass None to the images and videos if there are no new images / videos
+            # to stay compatible with downstream image processing logic!
+            images = new_images_this_turn if new_images_this_turn else None
+            videos = None
             response_ids = await self.apply_chat_template(
                 add_messages,
-                images=new_images_this_turn,  # Using local variable
-                videos=None,
+                images=images,
+                videos=videos,
                 remove_system_prompt=True,
             )