distinctify failed scrapes due to no url (#2977)

This commit is contained in:
Jonathan Dobson
2025-07-17 16:19:16 -04:00
committed by GitHub
parent 05a24026e2
commit c13c36f99e
3 changed files with 16 additions and 4 deletions

View File

@@ -264,10 +264,16 @@ class EmptyScrapePage(SkyvernException):
class ScrapingFailed(SkyvernException): class ScrapingFailed(SkyvernException):
def __init__(self) -> None: def __init__(self, *, reason: str | None = None) -> None:
self.reason = reason
super().__init__("Scraping failed.") super().__init__("Scraping failed.")
class ScrapingFailedNoUrl(ScrapingFailed):
def __init__(self) -> None:
super().__init__(reason="A URL is missing. Please ensure there is a URL for Skyvern to work with.")
class WorkflowRunContextNotInitialized(SkyvernException): class WorkflowRunContextNotInitialized(SkyvernException):
def __init__(self, workflow_run_id: str) -> None: def __init__(self, workflow_run_id: str) -> None:
super().__init__(f"WorkflowRunContext not initialized for workflow run {workflow_run_id}") super().__init__(f"WorkflowRunContext not initialized for workflow run {workflow_run_id}")

View File

@@ -749,17 +749,19 @@ class ForgeAgent:
close_browser_on_completion=close_browser_on_completion and browser_session_id is None, close_browser_on_completion=close_browser_on_completion and browser_session_id is None,
) )
return step, detailed_output, None return step, detailed_output, None
except ScrapingFailed: except ScrapingFailed as sfe:
LOG.warning( LOG.warning(
"Scraping failed, marking the task as failed", "Scraping failed, marking the task as failed",
task_id=task.task_id, task_id=task.task_id,
step_id=step.step_id, step_id=step.step_id,
exc_info=True, exc_info=True,
) )
await self.fail_task( await self.fail_task(
task, task,
step, step,
"Skyvern failed to load the website. This usually happens when the website is not properly designed, and crashes the browser as a result.", sfe.reason
or "Skyvern failed to load the website. This usually happens when the website is not properly designed, and crashes the browser as a result.",
) )
await self.clean_up_task( await self.clean_up_task(
task=task, task=task,

View File

@@ -13,7 +13,7 @@ from pydantic import BaseModel, PrivateAttr
from skyvern.config import settings from skyvern.config import settings
from skyvern.constants import BUILDING_ELEMENT_TREE_TIMEOUT_MS, DEFAULT_MAX_TOKENS, SKYVERN_DIR, SKYVERN_ID_ATTR from skyvern.constants import BUILDING_ELEMENT_TREE_TIMEOUT_MS, DEFAULT_MAX_TOKENS, SKYVERN_DIR, SKYVERN_ID_ATTR
from skyvern.exceptions import FailedToTakeScreenshot, ScrapingFailed, UnknownElementTreeFormat from skyvern.exceptions import FailedToTakeScreenshot, ScrapingFailed, ScrapingFailedNoUrl, UnknownElementTreeFormat
from skyvern.forge.sdk.api.crypto import calculate_sha256 from skyvern.forge.sdk.api.crypto import calculate_sha256
from skyvern.forge.sdk.core import skyvern_context from skyvern.forge.sdk.core import skyvern_context
from skyvern.forge.sdk.trace import TraceManager from skyvern.forge.sdk.trace import TraceManager
@@ -426,6 +426,10 @@ async def scrape_website(
:raises Exception: When scraping fails after maximum retries. :raises Exception: When scraping fails after maximum retries.
""" """
if not url.strip():
raise ScrapingFailedNoUrl()
try: try:
num_retry += 1 num_retry += 1
return await scrape_web_unsafe( return await scrape_web_unsafe(