Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions skyvern/core/script_generations/script_skyvern_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,10 +546,11 @@ async def _wait_for_page_ready_before_action(self) -> None:
3. DOM stability (no significant mutations for 300ms)
"""
try:
if not self._page:
# Note: SkyvernPage uses self.page, not self._page
if not self.page:
return

skyvern_frame = await SkyvernFrame.create_instance(frame=self._page)
skyvern_frame = await SkyvernFrame.create_instance(frame=self.page)
await skyvern_frame.wait_for_page_ready(
network_idle_timeout_ms=settings.PAGE_READY_NETWORK_IDLE_TIMEOUT_MS,
loading_indicator_timeout_ms=settings.PAGE_READY_LOADING_INDICATOR_TIMEOUT_MS,
Expand Down
17 changes: 16 additions & 1 deletion skyvern/forge/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -2456,8 +2456,23 @@ async def build_and_record_step_prompt(
break
except (FailedToTakeScreenshot, ScrapingFailed) as e:
if idx < len(SCRAPE_TYPE_ORDER) - 1:
LOG.warning(
"Scrape attempt failed, will retry with next strategy",
attempt=idx + 1,
scrape_type=scrape_type.value if hasattr(scrape_type, "value") else str(scrape_type),
error_type=e.__class__.__name__,
url=task.url,
)
continue
LOG.exception(f"{e.__class__.__name__} happened in two normal attempts and reload-page retry")
LOG.error(
"All scrape attempts failed",
total_attempts=len(SCRAPE_TYPE_ORDER),
error_type=e.__class__.__name__,
url=task.url,
step_order=step.order,
step_retry=step.retry_index,
exc_info=True,
)
Comment on lines 2458 to +2475
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Redact URLs in scrape failure logs.
These logs include full URLs, which may carry credentials or tokens in query strings. Please sanitize before logging.

🔒 Suggested change
-                        url=task.url,
+                        url=_redact_url(task.url),
...
-                        url=task.url,
+                        url=_redact_url(task.url),

Example helper (place near the top of the module):

from urllib.parse import urlsplit, urlunsplit

def _redact_url(raw_url: str) -> str:
    parts = urlsplit(raw_url)
    return urlunsplit((parts.scheme, parts.netloc, parts.path, "", ""))

As per coding guidelines "Never expose sensitive information in error messages".

🤖 Prompt for AI Agents
In `@skyvern/forge/agent.py` around lines 2458 - 2475, The failure logs currently
include full URLs (task.url); create a small helper like _redact_url(raw_url:
str) that uses urllib.parse.urlsplit/urlunsplit to strip query and fragment,
import those functions at the top of the module, and replace usages of task.url
in the LOG.warning and LOG.error calls inside the scrape retry block (the block
where SCRAPE_TYPE_ORDER is iterated and where step.order/step.retry_index are
logged) with _redact_url(task.url) so logs never emit sensitive query strings or
tokens.

raise e

if scraped_page is None:
Expand Down
28 changes: 26 additions & 2 deletions skyvern/webeye/utils/page.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,15 @@ async def _current_viewpoint_screenshot_helper(
) -> bytes:
if page.is_closed():
raise FailedToTakeScreenshot(error_message="Page is closed")

# Capture page context for debugging screenshot issues
url = page.url
try:
viewport = page.viewport_size
viewport_info = f"{viewport['width']}x{viewport['height']}" if viewport else "unknown"
except Exception:
viewport_info = "unknown"

try:
if mode == ScreenshotMode.DETAILED:
await page.wait_for_load_state(timeout=SettingsManager.get_settings().BROWSER_LOADING_TIMEOUT_MS)
Expand All @@ -94,10 +103,25 @@ async def _current_viewpoint_screenshot_helper(
)
return screenshot
except TimeoutError as e:
LOG.exception(f"Timeout error while taking screenshot: {str(e)}")
LOG.error(
"Screenshot timeout",
timeout_ms=timeout,
url=url,
viewport=viewport_info,
full_page=full_page,
mode=mode.value if hasattr(mode, "value") else str(mode),
error=str(e),
)
raise FailedToTakeScreenshot(error_message=str(e)) from e
except Exception as e:
LOG.exception(f"Unknown error while taking screenshot: {str(e)}")
LOG.error(
"Screenshot failed",
url=url,
viewport=viewport_info,
full_page=full_page,
error=str(e),
exc_info=True,
)
raise FailedToTakeScreenshot(error_message=str(e)) from e


Expand Down
Loading