Skip to content

Commit e63cb1b

Browse files
sai-meyyappanSai Meyyappanp0deje
authored
feat: switch to next/previous tabs (#246)
--------- Co-authored-by: Sai Meyyappan <sai.meyyappan@airbnb.com> Co-authored-by: Alex Rodionov <p0deje@gmail.com>
1 parent 752655c commit e63cb1b

26 files changed

+775
-33
lines changed
Lines changed: 37 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,37 @@
1-
def test_autoswitch_to_tab(al, navigate):
2-
navigate("https://the-internet.herokuapp.com/windows")
3-
al.do("click on 'Click Here' link")
4-
assert al.get("header text") == "New Window"
1+
from os import getenv
2+
3+
from pytest import mark
4+
5+
from alumnium.alumni import Alumni
6+
from alumnium.tools import SwitchToNextTabTool, SwitchToPreviousTabTool
7+
8+
9+
@mark.xfail(
10+
"appium" in getenv("ALUMNIUM_DRIVER", "selenium"),
11+
reason="Appium doesn't support tab manipulation yet",
12+
)
13+
def test_switching_tabs(al, driver, navigate):
14+
al = Alumni(
15+
driver,
16+
extra_tools=[
17+
SwitchToNextTabTool,
18+
SwitchToPreviousTabTool,
19+
],
20+
)
21+
22+
navigate("multi_tab_page.html")
23+
24+
al.do("click on 'Open New Tab' button")
25+
assert al.get("current page URL") == "about:blank"
26+
27+
al.do("switch to previous browser tab")
28+
assert al.get("header text") == "Multi-Tab Test Page"
29+
30+
al.do("switch to next browser tab")
31+
assert al.get("current page URL") == "about:blank"
32+
33+
al.do("switch to next browser tab")
34+
assert al.get("header text") == "Multi-Tab Test Page"
35+
36+
al.do("switch to previous browser tab")
37+
assert al.get("current page URL") == "about:blank"
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
<!DOCTYPE html>
2+
<html>
3+
<head>
4+
<title>Multi-Tab Test Page</title>
5+
</head>
6+
<body>
7+
<h1>Multi-Tab Test Page</h1>
8+
<p>This page is used to test tab management functionality.</p>
9+
10+
<button id="open-tab-btn" onclick="window.open('about:blank', '_blank')">
11+
Open New Tab
12+
</button>
13+
14+
<button id="open-popup-btn" onclick="openPopup()">
15+
Open Popup (auto-closes)
16+
</button>
17+
18+
<div id="delayed-element" style="display: none;">
19+
Delayed Element Appeared!
20+
</div>
21+
22+
<button id="show-delayed-btn" onclick="showDelayed()">
23+
Show Delayed Element
24+
</button>
25+
26+
<script>
27+
function openPopup() {
28+
var popup = window.open('', 'popup', 'width=400,height=300');
29+
popup.document.write('<html><head><title>Popup Window</title></head>');
30+
popup.document.write('<body><h1>Popup Content</h1>');
31+
popup.document.write('<p>This popup will close in 2 seconds...</p></body></html>');
32+
setTimeout(function() { popup.close(); }, 2000);
33+
}
34+
35+
function showDelayed() {
36+
setTimeout(function() {
37+
document.getElementById('delayed-element').style.display = 'block';
38+
}, 1000);
39+
}
40+
</script>
41+
</body>
42+
</html>

packages/python/src/alumnium/drivers/appium_driver.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -242,3 +242,9 @@ def _scroll_into_view_android(self, element: WebElement, max_scrolls: int = 10,
242242
f"Element not visible after {max_scrolls} scrolls. "
243243
f"Try adjusting the scroll direction or increase max_scrolls."
244244
)
245+
246+
def switch_to_next_tab(self):
247+
raise NotImplementedError("Tab switching not supported for this driver")
248+
249+
def switch_to_previous_tab(self):
250+
raise NotImplementedError("Tab switching not supported for this driver")

packages/python/src/alumnium/drivers/base_driver.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,3 +65,11 @@ def find_element(self, id: int) -> Element:
6565
@abstractmethod
6666
def execute_script(self, script: str):
6767
pass
68+
69+
@abstractmethod
70+
def switch_to_next_tab(self):
71+
pass
72+
73+
@abstractmethod
74+
def switch_to_previous_tab(self):
75+
pass

packages/python/src/alumnium/drivers/playwright_async_driver.py

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ def __init__(self, page: Page, loop: AbstractEventLoop):
2424
self.client = None
2525
self.page = page
2626
self.loop = loop
27+
self.autoswitch_to_new_tab = True # Can be disabled via alumnium:options
2728
self.supported_tools = {
2829
ClickTool,
2930
DragAndDropTool,
@@ -33,6 +34,7 @@ def __init__(self, page: Page, loop: AbstractEventLoop):
3334
UploadTool,
3435
}
3536
self._run_async(self._enable_target_auto_attach())
37+
self._run_async(self._setup_page_tracking(page))
3638

3739
@property
3840
def platform(self) -> str:
@@ -327,6 +329,11 @@ async def _wait_for_page_to_load(self):
327329

328330
@asynccontextmanager
329331
async def _autoswitch_to_new_tab(self):
332+
# If auto-switch is disabled, just yield without waiting for new pages
333+
if not self.autoswitch_to_new_tab:
334+
yield
335+
return
336+
330337
try:
331338
async with self.page.context.expect_page(timeout=PlaywrightDriver.NEW_TAB_TIMEOUT) as new_page_info:
332339
yield
@@ -373,6 +380,29 @@ async def _enable_target_auto_attach(self):
373380
except Exception as e:
374381
logger.debug(f"Could not enable Target.setAutoAttach: {e}")
375382

383+
async def _setup_page_tracking(self, initial_page: Page):
384+
"""Set up tracking for all pages in the context."""
385+
self._pages: list[Page] = [initial_page]
386+
self._attach_page_listeners(initial_page)
387+
388+
def _attach_page_listeners(self, page: Page):
389+
"""Attach popup and close listeners to a page."""
390+
# Use sync handler to avoid deadlock - async handler would block via _run_async
391+
page.on("popup", self._on_popup_sync)
392+
page.on("close", self._on_page_close)
393+
394+
def _on_popup_sync(self, popup: Page):
395+
"""Handle new popup/tab opened from a page (sync to avoid deadlock)."""
396+
logger.debug(f"New popup opened: {popup.url}")
397+
self._pages.append(popup)
398+
self._attach_page_listeners(popup) # Chain: new page also listens for popups
399+
400+
def _on_page_close(self, popup: Page):
401+
"""Handle page closed."""
402+
if popup in self._pages:
403+
logger.debug(f"Page closed: {popup.url}")
404+
self._pages.remove(popup)
405+
376406
def _get_all_frame_ids(self, frame_info: dict) -> list[str]:
377407
"""Recursively collect all frame IDs from CDP frame tree."""
378408
frame_ids = [frame_info["frame"]["id"]]
@@ -572,6 +602,40 @@ def search_frame(frame_info: dict) -> str | None:
572602

573603
return search_frame(cdp_frame_tree["frameTree"])
574604

605+
def switch_to_next_tab(self):
606+
self._run_async(self._switch_to_next_tab())
607+
608+
async def _switch_to_next_tab(self):
609+
# Brief wait to allow popup handlers to complete
610+
await self.page.wait_for_timeout(100)
611+
if len(self._pages) <= 1:
612+
return # Only one tab, nothing to switch
613+
614+
current_index = self._pages.index(self.page)
615+
next_index = (current_index + 1) % len(self._pages) # Wrap to first
616+
617+
self.page = self._pages[next_index]
618+
self.client = None # Reset CDP client for new page
619+
await self.page.wait_for_load_state()
620+
logger.debug(f"Switched to next tab: {self.page.url}")
621+
622+
def switch_to_previous_tab(self):
623+
self._run_async(self._switch_to_previous_tab())
624+
625+
async def _switch_to_previous_tab(self):
626+
# Brief wait to allow popup handlers to complete
627+
await self.page.wait_for_timeout(100)
628+
if len(self._pages) <= 1:
629+
return # Only one tab, nothing to switch
630+
631+
current_index = self._pages.index(self.page)
632+
prev_index = (current_index - 1) % len(self._pages) # Wrap to last
633+
634+
self.page = self._pages[prev_index]
635+
self.client = None # Reset CDP client for new page
636+
await self.page.wait_for_load_state()
637+
logger.debug(f"Switched to previous tab: {self.page.url}")
638+
575639
def _run_async(self, coro):
576640
future = run_coroutine_threadsafe(coro, self.loop)
577641
return future.result()

packages/python/src/alumnium/drivers/playwright_driver.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ def __init__(self, page: Page):
4444
UploadTool,
4545
}
4646
self._enable_target_auto_attach()
47+
self._setup_page_tracking(page)
4748

4849
@property
4950
def platform(self) -> str:
@@ -325,6 +326,28 @@ def _enable_target_auto_attach(self):
325326
except Exception as e:
326327
logger.debug(f"Could not enable Target.setAutoAttach: {e}")
327328

329+
def _setup_page_tracking(self, initial_page: Page):
330+
"""Set up tracking for all pages in the context."""
331+
self._pages: list[Page] = [initial_page]
332+
self._attach_page_listeners(initial_page)
333+
334+
def _attach_page_listeners(self, page: Page):
335+
"""Attach popup and close listeners to a page."""
336+
page.on("popup", self._on_popup)
337+
page.on("close", self._on_page_close)
338+
339+
def _on_popup(self, popup: Page):
340+
"""Handle new popup/tab opened from a page."""
341+
logger.debug(f"New popup opened: {popup.url}")
342+
self._pages.append(popup)
343+
self._attach_page_listeners(popup) # Chain: new page also listens for popups
344+
345+
def _on_page_close(self, popup: Page):
346+
"""Handle page closed."""
347+
if popup in self._pages:
348+
logger.debug(f"Page closed: {popup.url}")
349+
self._pages.remove(popup)
350+
328351
def _get_all_frame_ids(self, frame_info: dict) -> list[str]:
329352
"""Recursively collect all frame IDs from CDP frame tree."""
330353
frame_ids = [frame_info["frame"]["id"]]
@@ -523,3 +546,29 @@ def search_frame(frame_info: dict) -> str | None:
523546
return None
524547

525548
return search_frame(cdp_frame_tree["frameTree"])
549+
550+
def switch_to_next_tab(self):
551+
# Brief wait to allow popup handlers to complete
552+
self.page.wait_for_timeout(100)
553+
if len(self._pages) <= 1:
554+
return # Only one tab, nothing to switch
555+
556+
current_index = self._pages.index(self.page)
557+
next_index = (current_index + 1) % len(self._pages) # Wrap to first
558+
559+
self.page = self._pages[next_index]
560+
self.client = self.page.context.new_cdp_session(self.page)
561+
self.page.wait_for_load_state()
562+
563+
def switch_to_previous_tab(self):
564+
# Brief wait to allow popup handlers to complete
565+
self.page.wait_for_timeout(100)
566+
if len(self._pages) <= 1:
567+
return # Only one tab, nothing to switch
568+
569+
current_index = self._pages.index(self.page)
570+
prev_index = (current_index - 1) % len(self._pages) # Wrap to last
571+
572+
self.page = self._pages[prev_index]
573+
self.client = self.page.context.new_cdp_session(self.page)
574+
self.page.wait_for_load_state()

packages/python/src/alumnium/drivers/selenium_driver.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -555,3 +555,21 @@ def _wait_for_page_to_load(self):
555555
logger.debug(f" <- Failed to wait for page to load: {error}")
556556
else:
557557
logger.debug(" <- Page finished loading")
558+
559+
def switch_to_next_tab(self):
560+
handles = self.driver.window_handles
561+
if len(handles) <= 1:
562+
return
563+
current_index = handles.index(self.driver.current_window_handle)
564+
next_index = (current_index + 1) % len(handles)
565+
self.driver.switch_to.window(handles[next_index])
566+
logger.debug(f"Switched to next tab: {self.driver.title} ({self.driver.current_url})")
567+
568+
def switch_to_previous_tab(self):
569+
handles = self.driver.window_handles
570+
if len(handles) <= 1:
571+
return
572+
current_index = handles.index(self.driver.current_window_handle)
573+
prev_index = (current_index - 1) % len(handles)
574+
self.driver.switch_to.window(handles[prev_index])
575+
logger.debug(f"Switched to previous tab: {self.driver.title} ({self.driver.current_url})")

packages/python/src/alumnium/mcp/drivers.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,11 @@ async def _create_resources():
5757
cookie["path"] = "/"
5858
await context.add_cookies(cookies)
5959

60+
permissions = capabilities.get("permissions", [])
61+
if permissions:
62+
logger.debug(f"Granting permissions: {permissions}")
63+
await context.grant_permissions(permissions)
64+
6065
page = await context.new_page()
6166

6267
return page

packages/python/src/alumnium/mcp/handlers.py

Lines changed: 56 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,14 @@
1010
from .. import Alumni
1111
from ..clients.native_client import NativeClient
1212
from ..server.logutils import get_logger
13-
from ..tools import ExecuteJavascriptTool, NavigateBackTool, NavigateToUrlTool, ScrollTool
13+
from ..tools import (
14+
ExecuteJavascriptTool,
15+
NavigateBackTool,
16+
NavigateToUrlTool,
17+
ScrollTool,
18+
SwitchToNextTabTool,
19+
SwitchToPreviousTabTool,
20+
)
1421
from . import drivers, screenshots, state
1522

1623
logger = get_logger(__name__)
@@ -75,6 +82,8 @@ async def handle_start_driver(args: dict[str, Any]) -> list[dict]:
7582
NavigateBackTool,
7683
NavigateToUrlTool,
7784
ScrollTool,
85+
SwitchToNextTabTool,
86+
SwitchToPreviousTabTool,
7887
],
7988
)
8089

@@ -245,3 +254,49 @@ async def handle_stop_driver(args: dict[str, Any]) -> list[dict]:
245254
)
246255

247256
return [{"type": "text", "text": message}]
257+
258+
259+
async def handle_wait(args: dict[str, Any]) -> list[dict]:
260+
"""Wait for seconds or a natural language condition."""
261+
import asyncio
262+
import time
263+
264+
wait_for = args["for"]
265+
266+
# If it's a number, wait that many seconds
267+
if isinstance(wait_for, (int, float)):
268+
seconds = max(1, min(30, int(wait_for)))
269+
logger.info(f"Waiting for {seconds} seconds")
270+
await asyncio.sleep(seconds)
271+
return [{"type": "text", "text": f"Waited {seconds} seconds"}]
272+
273+
# Otherwise, treat as natural language condition
274+
condition = str(wait_for)
275+
driver_id = args.get("driver_id")
276+
if not driver_id:
277+
return [{"type": "text", "text": "driver_id is required when waiting for a condition"}]
278+
279+
timeout = args.get("timeout", 10)
280+
poll_interval = 1.0
281+
282+
logger.info(f"Driver {driver_id}: Waiting for '{condition}' (timeout={timeout}s)")
283+
284+
al, _ = state.get_driver(driver_id)
285+
286+
start_time = time.time()
287+
last_error = None
288+
attempts = 0
289+
290+
while time.time() - start_time < timeout:
291+
attempts += 1
292+
try:
293+
explanation = al.check(condition)
294+
logger.info(f"Driver {driver_id}: Condition met after {attempts} attempt(s)")
295+
return [{"type": "text", "text": f"Condition met: {condition}\n{explanation}"}]
296+
except AssertionError as e:
297+
last_error = str(e)
298+
logger.debug(f"Driver {driver_id}: Condition not met (attempt {attempts})")
299+
await asyncio.sleep(poll_interval)
300+
301+
logger.warning(f"Driver {driver_id}: Timeout waiting for '{condition}'")
302+
return [{"type": "text", "text": f"Timeout after {timeout}s waiting for: {condition}\nLast check: {last_error}"}]

packages/python/src/alumnium/mcp/server.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,8 @@ async def call_tool(name: str, arguments: dict[str, Any]) -> list[Any]:
4444
return await handlers.handle_get(arguments)
4545
elif name == "fetch_accessibility_tree":
4646
return await handlers.handle_fetch_accessibility_tree(arguments)
47+
elif name == "wait":
48+
return await handlers.handle_wait(arguments)
4749
elif name == "stop_driver":
4850
return await handlers.handle_stop_driver(arguments)
4951
else:

0 commit comments

Comments
 (0)