fail task right away when scraping failed - no need to retry scraping failure at step level (#1997)

This commit is contained in:
Shuchang Zheng
2025-03-21 15:47:36 -07:00
committed by GitHub
parent ad2a915dc1
commit 4f9af0d797
5 changed files with 39 additions and 9 deletions

View File

@@ -17,7 +17,7 @@ class Settings(BaseSettings):
TEMP_PATH: str = "./temp"
BROWSER_ACTION_TIMEOUT_MS: int = 5000
BROWSER_SCREENSHOT_TIMEOUT_MS: int = 20000
BROWSER_LOADING_TIMEOUT_MS: int = 120000
BROWSER_LOADING_TIMEOUT_MS: int = 90000
OPTION_LOADING_TIMEOUT_MS: int = 600000
MAX_STEPS_PER_RUN: int = 10
MAX_STEPS_PER_TASK_V2: int = 25

View File

@@ -250,6 +250,11 @@ class EmptyScrapePage(SkyvernException):
super().__init__("Failed to scrape the page, returned an NONE result")
class ScrapingFailed(SkyvernException):
def __init__(self) -> None:
super().__init__("Scraping failed.")
class WorkflowRunContextNotInitialized(SkyvernException):
def __init__(self, workflow_run_id: str) -> None:
super().__init__(f"WorkflowRunContext not initialized for workflow run {workflow_run_id}")

View File

@@ -36,6 +36,7 @@ from skyvern.exceptions import (
MissingBrowserState,
MissingBrowserStatePage,
NoTOTPVerificationCodeFound,
ScrapingFailed,
SkyvernException,
StepTerminationError,
StepUnableToExecuteError,
@@ -678,7 +679,25 @@ class ForgeAgent:
close_browser_on_completion=close_browser_on_completion and browser_session_id is None,
)
return step, detailed_output, None
except ScrapingFailed:
LOG.warning(
"Scraping failed, marking the task as failed",
task_id=task.task_id,
step_id=step.step_id,
)
await self.fail_task(
task,
step,
"Skyvern failed to load the website. This usually happens when the website is not properly designed, and crashes the browser as a result.",
)
await self.clean_up_task(
task=task,
last_step=step,
api_key=api_key,
close_browser_on_completion=close_browser_on_completion and browser_session_id is None,
browser_session_id=browser_session_id,
)
return step, detailed_output, None
except Exception as e:
LOG.exception(
"Got an unexpected exception in step, marking task as failed",
@@ -1151,7 +1170,12 @@ class ForgeAgent:
output=detailed_agent_step_output.to_agent_step_output(),
)
return failed_step, detailed_agent_step_output.get_clean_detailed_output()
except (UnsupportedActionType, UnsupportedTaskType, FailedToParseActionInstruction):
except (
UnsupportedActionType,
UnsupportedTaskType,
FailedToParseActionInstruction,
ScrapingFailed,
):
raise
except Exception as e:
@@ -1382,18 +1406,18 @@ class ForgeAgent:
scrape_type=scrape_type,
)
break
except FailedToTakeScreenshot as e:
except (FailedToTakeScreenshot, ScrapingFailed) as e:
if idx < len(SCRAPE_TYPE_ORDER) - 1:
continue
LOG.error(
"Failed to take screenshot after two normal attempts and reload-page retry",
f"{e.__class__.__name__} happened in two normal attempts and reload-page retry",
task_id=task.task_id,
step_id=step.step_id,
)
raise e
raise ScrapingFailed()
if scraped_page is None:
raise EmptyScrapePage
raise EmptyScrapePage()
await app.ARTIFACT_MANAGER.create_artifact(
step=step,

View File

@@ -11,7 +11,7 @@ from pydantic import BaseModel, PrivateAttr
from skyvern.config import settings
from skyvern.constants import BUILDING_ELEMENT_TREE_TIMEOUT_MS, SKYVERN_DIR, SKYVERN_ID_ATTR
from skyvern.exceptions import FailedToTakeScreenshot, UnknownElementTreeFormat
from skyvern.exceptions import FailedToTakeScreenshot, ScrapingFailed, UnknownElementTreeFormat
from skyvern.forge.sdk.api.crypto import calculate_sha256
from skyvern.forge.sdk.core import skyvern_context
from skyvern.webeye.browser_factory import BrowserState
@@ -356,7 +356,7 @@ async def scrape_website(
if isinstance(e, FailedToTakeScreenshot):
raise e
else:
raise Exception("Scraping failed.")
raise ScrapingFailed() from e
LOG.info("Scraping failed, will retry", num_retry=num_retry, url=url)
return await scrape_website(
browser_state,

View File

@@ -69,6 +69,7 @@ class SkyvernFrame:
path=file_path,
full_page=full_page,
timeout=timeout,
animations="disabled",
)
else:
screenshot = await page.screenshot(