fail task right away when scraping failed - no need to retry scraping failure at step level (#1997)

This commit is contained in:
Shuchang Zheng
2025-03-21 15:47:36 -07:00
committed by GitHub
parent ad2a915dc1
commit 4f9af0d797
5 changed files with 39 additions and 9 deletions

View File

@@ -17,7 +17,7 @@ class Settings(BaseSettings):
TEMP_PATH: str = "./temp" TEMP_PATH: str = "./temp"
BROWSER_ACTION_TIMEOUT_MS: int = 5000 BROWSER_ACTION_TIMEOUT_MS: int = 5000
BROWSER_SCREENSHOT_TIMEOUT_MS: int = 20000 BROWSER_SCREENSHOT_TIMEOUT_MS: int = 20000
BROWSER_LOADING_TIMEOUT_MS: int = 120000 BROWSER_LOADING_TIMEOUT_MS: int = 90000
OPTION_LOADING_TIMEOUT_MS: int = 600000 OPTION_LOADING_TIMEOUT_MS: int = 600000
MAX_STEPS_PER_RUN: int = 10 MAX_STEPS_PER_RUN: int = 10
MAX_STEPS_PER_TASK_V2: int = 25 MAX_STEPS_PER_TASK_V2: int = 25

View File

@@ -250,6 +250,11 @@ class EmptyScrapePage(SkyvernException):
super().__init__("Failed to scrape the page, returned an NONE result") super().__init__("Failed to scrape the page, returned an NONE result")
class ScrapingFailed(SkyvernException):
def __init__(self) -> None:
super().__init__("Scraping failed.")
class WorkflowRunContextNotInitialized(SkyvernException): class WorkflowRunContextNotInitialized(SkyvernException):
def __init__(self, workflow_run_id: str) -> None: def __init__(self, workflow_run_id: str) -> None:
super().__init__(f"WorkflowRunContext not initialized for workflow run {workflow_run_id}") super().__init__(f"WorkflowRunContext not initialized for workflow run {workflow_run_id}")

View File

@@ -36,6 +36,7 @@ from skyvern.exceptions import (
MissingBrowserState, MissingBrowserState,
MissingBrowserStatePage, MissingBrowserStatePage,
NoTOTPVerificationCodeFound, NoTOTPVerificationCodeFound,
ScrapingFailed,
SkyvernException, SkyvernException,
StepTerminationError, StepTerminationError,
StepUnableToExecuteError, StepUnableToExecuteError,
@@ -678,7 +679,25 @@ class ForgeAgent:
close_browser_on_completion=close_browser_on_completion and browser_session_id is None, close_browser_on_completion=close_browser_on_completion and browser_session_id is None,
) )
return step, detailed_output, None return step, detailed_output, None
except ScrapingFailed:
LOG.warning(
"Scraping failed, marking the task as failed",
task_id=task.task_id,
step_id=step.step_id,
)
await self.fail_task(
task,
step,
"Skyvern failed to load the website. This usually happens when the website is not properly designed, and crashes the browser as a result.",
)
await self.clean_up_task(
task=task,
last_step=step,
api_key=api_key,
close_browser_on_completion=close_browser_on_completion and browser_session_id is None,
browser_session_id=browser_session_id,
)
return step, detailed_output, None
except Exception as e: except Exception as e:
LOG.exception( LOG.exception(
"Got an unexpected exception in step, marking task as failed", "Got an unexpected exception in step, marking task as failed",
@@ -1151,7 +1170,12 @@ class ForgeAgent:
output=detailed_agent_step_output.to_agent_step_output(), output=detailed_agent_step_output.to_agent_step_output(),
) )
return failed_step, detailed_agent_step_output.get_clean_detailed_output() return failed_step, detailed_agent_step_output.get_clean_detailed_output()
except (UnsupportedActionType, UnsupportedTaskType, FailedToParseActionInstruction): except (
UnsupportedActionType,
UnsupportedTaskType,
FailedToParseActionInstruction,
ScrapingFailed,
):
raise raise
except Exception as e: except Exception as e:
@@ -1382,18 +1406,18 @@ class ForgeAgent:
scrape_type=scrape_type, scrape_type=scrape_type,
) )
break break
except FailedToTakeScreenshot as e: except (FailedToTakeScreenshot, ScrapingFailed) as e:
if idx < len(SCRAPE_TYPE_ORDER) - 1: if idx < len(SCRAPE_TYPE_ORDER) - 1:
continue continue
LOG.error( LOG.error(
"Failed to take screenshot after two normal attempts and reload-page retry", f"{e.__class__.__name__} happened in two normal attempts and reload-page retry",
task_id=task.task_id, task_id=task.task_id,
step_id=step.step_id, step_id=step.step_id,
) )
raise e raise ScrapingFailed()
if scraped_page is None: if scraped_page is None:
raise EmptyScrapePage raise EmptyScrapePage()
await app.ARTIFACT_MANAGER.create_artifact( await app.ARTIFACT_MANAGER.create_artifact(
step=step, step=step,

View File

@@ -11,7 +11,7 @@ from pydantic import BaseModel, PrivateAttr
from skyvern.config import settings from skyvern.config import settings
from skyvern.constants import BUILDING_ELEMENT_TREE_TIMEOUT_MS, SKYVERN_DIR, SKYVERN_ID_ATTR from skyvern.constants import BUILDING_ELEMENT_TREE_TIMEOUT_MS, SKYVERN_DIR, SKYVERN_ID_ATTR
from skyvern.exceptions import FailedToTakeScreenshot, UnknownElementTreeFormat from skyvern.exceptions import FailedToTakeScreenshot, ScrapingFailed, UnknownElementTreeFormat
from skyvern.forge.sdk.api.crypto import calculate_sha256 from skyvern.forge.sdk.api.crypto import calculate_sha256
from skyvern.forge.sdk.core import skyvern_context from skyvern.forge.sdk.core import skyvern_context
from skyvern.webeye.browser_factory import BrowserState from skyvern.webeye.browser_factory import BrowserState
@@ -356,7 +356,7 @@ async def scrape_website(
if isinstance(e, FailedToTakeScreenshot): if isinstance(e, FailedToTakeScreenshot):
raise e raise e
else: else:
raise Exception("Scraping failed.") raise ScrapingFailed() from e
LOG.info("Scraping failed, will retry", num_retry=num_retry, url=url) LOG.info("Scraping failed, will retry", num_retry=num_retry, url=url)
return await scrape_website( return await scrape_website(
browser_state, browser_state,

View File

@@ -69,6 +69,7 @@ class SkyvernFrame:
path=file_path, path=file_path,
full_page=full_page, full_page=full_page,
timeout=timeout, timeout=timeout,
animations="disabled",
) )
else: else:
screenshot = await page.screenshot( screenshot = await page.screenshot(