From 4f9af0d797c1f951fc0127d8ac6ed576d360bed2 Mon Sep 17 00:00:00 2001 From: Shuchang Zheng Date: Fri, 21 Mar 2025 15:47:36 -0700 Subject: [PATCH] fail task right away when scraping failed - no need to retry scraping failure at step level (#1997) --- skyvern/config.py | 2 +- skyvern/exceptions.py | 5 +++++ skyvern/forge/agent.py | 36 +++++++++++++++++++++++++------ skyvern/webeye/scraper/scraper.py | 4 ++-- skyvern/webeye/utils/page.py | 1 + 5 files changed, 39 insertions(+), 9 deletions(-) diff --git a/skyvern/config.py b/skyvern/config.py index 0d47463d..f3cdf408 100644 --- a/skyvern/config.py +++ b/skyvern/config.py @@ -17,7 +17,7 @@ class Settings(BaseSettings): TEMP_PATH: str = "./temp" BROWSER_ACTION_TIMEOUT_MS: int = 5000 BROWSER_SCREENSHOT_TIMEOUT_MS: int = 20000 - BROWSER_LOADING_TIMEOUT_MS: int = 120000 + BROWSER_LOADING_TIMEOUT_MS: int = 90000 OPTION_LOADING_TIMEOUT_MS: int = 600000 MAX_STEPS_PER_RUN: int = 10 MAX_STEPS_PER_TASK_V2: int = 25 diff --git a/skyvern/exceptions.py b/skyvern/exceptions.py index 90967b1e..42bb772f 100644 --- a/skyvern/exceptions.py +++ b/skyvern/exceptions.py @@ -250,6 +250,11 @@ class EmptyScrapePage(SkyvernException): super().__init__("Failed to scrape the page, returned an NONE result") +class ScrapingFailed(SkyvernException): + def __init__(self) -> None: + super().__init__("Scraping failed.") + + class WorkflowRunContextNotInitialized(SkyvernException): def __init__(self, workflow_run_id: str) -> None: super().__init__(f"WorkflowRunContext not initialized for workflow run {workflow_run_id}") diff --git a/skyvern/forge/agent.py b/skyvern/forge/agent.py index 0e350719..d4de8a08 100644 --- a/skyvern/forge/agent.py +++ b/skyvern/forge/agent.py @@ -36,6 +36,7 @@ from skyvern.exceptions import ( MissingBrowserState, MissingBrowserStatePage, NoTOTPVerificationCodeFound, + ScrapingFailed, SkyvernException, StepTerminationError, StepUnableToExecuteError, @@ -678,7 +679,25 @@ class ForgeAgent: close_browser_on_completion=close_browser_on_completion and browser_session_id is None, ) return step, detailed_output, None - + except ScrapingFailed: + LOG.warning( + "Scraping failed, marking the task as failed", + task_id=task.task_id, + step_id=step.step_id, + ) + await self.fail_task( + task, + step, + "Skyvern failed to load the website. This usually happens when the website is not properly designed, and crashes the browser as a result.", + ) + await self.clean_up_task( + task=task, + last_step=step, + api_key=api_key, + close_browser_on_completion=close_browser_on_completion and browser_session_id is None, + browser_session_id=browser_session_id, + ) + return step, detailed_output, None except Exception as e: LOG.exception( "Got an unexpected exception in step, marking task as failed", @@ -1151,7 +1170,12 @@ class ForgeAgent: output=detailed_agent_step_output.to_agent_step_output(), ) return failed_step, detailed_agent_step_output.get_clean_detailed_output() - except (UnsupportedActionType, UnsupportedTaskType, FailedToParseActionInstruction): + except ( + UnsupportedActionType, + UnsupportedTaskType, + FailedToParseActionInstruction, + ScrapingFailed, + ): raise except Exception as e: @@ -1382,18 +1406,18 @@ class ForgeAgent: scrape_type=scrape_type, ) break - except FailedToTakeScreenshot as e: + except (FailedToTakeScreenshot, ScrapingFailed) as e: if idx < len(SCRAPE_TYPE_ORDER) - 1: continue LOG.error( - "Failed to take screenshot after two normal attempts and reload-page retry", + f"{e.__class__.__name__} happened in two normal attempts and reload-page retry", task_id=task.task_id, step_id=step.step_id, ) - raise e + raise ScrapingFailed() if scraped_page is None: - raise EmptyScrapePage + raise EmptyScrapePage() await app.ARTIFACT_MANAGER.create_artifact( step=step, diff --git a/skyvern/webeye/scraper/scraper.py b/skyvern/webeye/scraper/scraper.py index cddd2c52..dc082ffb 100644 --- a/skyvern/webeye/scraper/scraper.py +++ b/skyvern/webeye/scraper/scraper.py @@ -11,7 +11,7 @@ from pydantic import BaseModel, PrivateAttr from skyvern.config import settings from skyvern.constants import BUILDING_ELEMENT_TREE_TIMEOUT_MS, SKYVERN_DIR, SKYVERN_ID_ATTR -from skyvern.exceptions import FailedToTakeScreenshot, UnknownElementTreeFormat +from skyvern.exceptions import FailedToTakeScreenshot, ScrapingFailed, UnknownElementTreeFormat from skyvern.forge.sdk.api.crypto import calculate_sha256 from skyvern.forge.sdk.core import skyvern_context from skyvern.webeye.browser_factory import BrowserState @@ -356,7 +356,7 @@ async def scrape_website( if isinstance(e, FailedToTakeScreenshot): raise e else: - raise Exception("Scraping failed.") + raise ScrapingFailed() from e LOG.info("Scraping failed, will retry", num_retry=num_retry, url=url) return await scrape_website( browser_state, diff --git a/skyvern/webeye/utils/page.py b/skyvern/webeye/utils/page.py index 4b275ad7..ee5ea11a 100644 --- a/skyvern/webeye/utils/page.py +++ b/skyvern/webeye/utils/page.py @@ -69,6 +69,7 @@ class SkyvernFrame: path=file_path, full_page=full_page, timeout=timeout, + animations="disabled", ) else: screenshot = await page.screenshot(