distinctify failed scrapes due to no url (#2977)

This commit is contained in:
Jonathan Dobson
2025-07-17 16:19:16 -04:00
committed by GitHub
parent 05a24026e2
commit c13c36f99e
3 changed files with 16 additions and 4 deletions

View File

@@ -264,10 +264,16 @@ class EmptyScrapePage(SkyvernException):
class ScrapingFailed(SkyvernException):
def __init__(self) -> None:
def __init__(self, *, reason: str | None = None) -> None:
self.reason = reason
super().__init__("Scraping failed.")
class ScrapingFailedNoUrl(ScrapingFailed):
def __init__(self) -> None:
super().__init__(reason="A URL is missing. Please ensure there is a URL for Skyvern to work with.")
class WorkflowRunContextNotInitialized(SkyvernException):
def __init__(self, workflow_run_id: str) -> None:
super().__init__(f"WorkflowRunContext not initialized for workflow run {workflow_run_id}")

View File

@@ -749,17 +749,19 @@ class ForgeAgent:
close_browser_on_completion=close_browser_on_completion and browser_session_id is None,
)
return step, detailed_output, None
except ScrapingFailed:
except ScrapingFailed as sfe:
LOG.warning(
"Scraping failed, marking the task as failed",
task_id=task.task_id,
step_id=step.step_id,
exc_info=True,
)
await self.fail_task(
task,
step,
"Skyvern failed to load the website. This usually happens when the website is not properly designed, and crashes the browser as a result.",
sfe.reason
or "Skyvern failed to load the website. This usually happens when the website is not properly designed, and crashes the browser as a result.",
)
await self.clean_up_task(
task=task,

View File

@@ -13,7 +13,7 @@ from pydantic import BaseModel, PrivateAttr
from skyvern.config import settings
from skyvern.constants import BUILDING_ELEMENT_TREE_TIMEOUT_MS, DEFAULT_MAX_TOKENS, SKYVERN_DIR, SKYVERN_ID_ATTR
from skyvern.exceptions import FailedToTakeScreenshot, ScrapingFailed, UnknownElementTreeFormat
from skyvern.exceptions import FailedToTakeScreenshot, ScrapingFailed, ScrapingFailedNoUrl, UnknownElementTreeFormat
from skyvern.forge.sdk.api.crypto import calculate_sha256
from skyvern.forge.sdk.core import skyvern_context
from skyvern.forge.sdk.trace import TraceManager
@@ -426,6 +426,10 @@ async def scrape_website(
:raises Exception: When scraping fails after maximum retries.
"""
if not url.strip():
raise ScrapingFailedNoUrl()
try:
num_retry += 1
return await scrape_web_unsafe(