From c13c36f99e9ebdcc87fd43fd054d640f9b2d5184 Mon Sep 17 00:00:00 2001 From: Jonathan Dobson Date: Thu, 17 Jul 2025 16:19:16 -0400 Subject: [PATCH] distinctify failed scrapes due to no url (#2977) --- skyvern/exceptions.py | 8 +++++++- skyvern/forge/agent.py | 6 ++++-- skyvern/webeye/scraper/scraper.py | 6 +++++- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/skyvern/exceptions.py b/skyvern/exceptions.py index eac63a2e..076153e3 100644 --- a/skyvern/exceptions.py +++ b/skyvern/exceptions.py @@ -264,10 +264,16 @@ class EmptyScrapePage(SkyvernException): class ScrapingFailed(SkyvernException): - def __init__(self) -> None: + def __init__(self, *, reason: str | None = None) -> None: + self.reason = reason super().__init__("Scraping failed.") +class ScrapingFailedNoUrl(ScrapingFailed): + def __init__(self) -> None: + super().__init__(reason="A URL is missing. Please ensure there is a URL for Skyvern to work with.") + + class WorkflowRunContextNotInitialized(SkyvernException): def __init__(self, workflow_run_id: str) -> None: super().__init__(f"WorkflowRunContext not initialized for workflow run {workflow_run_id}") diff --git a/skyvern/forge/agent.py b/skyvern/forge/agent.py index 125ecede..3b0fb8af 100644 --- a/skyvern/forge/agent.py +++ b/skyvern/forge/agent.py @@ -749,17 +749,19 @@ class ForgeAgent: close_browser_on_completion=close_browser_on_completion and browser_session_id is None, ) return step, detailed_output, None - except ScrapingFailed: + except ScrapingFailed as sfe: LOG.warning( "Scraping failed, marking the task as failed", task_id=task.task_id, step_id=step.step_id, exc_info=True, ) + await self.fail_task( task, step, - "Skyvern failed to load the website. This usually happens when the website is not properly designed, and crashes the browser as a result.", + sfe.reason + or "Skyvern failed to load the website. This usually happens when the website is not properly designed, and crashes the browser as a result.", ) await self.clean_up_task( task=task, diff --git a/skyvern/webeye/scraper/scraper.py b/skyvern/webeye/scraper/scraper.py index 98d33832..5f45e460 100644 --- a/skyvern/webeye/scraper/scraper.py +++ b/skyvern/webeye/scraper/scraper.py @@ -13,7 +13,7 @@ from pydantic import BaseModel, PrivateAttr from skyvern.config import settings from skyvern.constants import BUILDING_ELEMENT_TREE_TIMEOUT_MS, DEFAULT_MAX_TOKENS, SKYVERN_DIR, SKYVERN_ID_ATTR -from skyvern.exceptions import FailedToTakeScreenshot, ScrapingFailed, UnknownElementTreeFormat +from skyvern.exceptions import FailedToTakeScreenshot, ScrapingFailed, ScrapingFailedNoUrl, UnknownElementTreeFormat from skyvern.forge.sdk.api.crypto import calculate_sha256 from skyvern.forge.sdk.core import skyvern_context from skyvern.forge.sdk.trace import TraceManager @@ -426,6 +426,10 @@ async def scrape_website( :raises Exception: When scraping fails after maximum retries. """ + + if not url.strip(): + raise ScrapingFailedNoUrl() + try: num_retry += 1 return await scrape_web_unsafe(