distinctify failed scrapes due to no url (#2977)
This commit is contained in:
@@ -264,10 +264,16 @@ class EmptyScrapePage(SkyvernException):
|
||||
|
||||
|
||||
class ScrapingFailed(SkyvernException):
|
||||
def __init__(self) -> None:
|
||||
def __init__(self, *, reason: str | None = None) -> None:
|
||||
self.reason = reason
|
||||
super().__init__("Scraping failed.")
|
||||
|
||||
|
||||
class ScrapingFailedNoUrl(ScrapingFailed):
|
||||
def __init__(self) -> None:
|
||||
super().__init__(reason="A URL is missing. Please ensure there is a URL for Skyvern to work with.")
|
||||
|
||||
|
||||
class WorkflowRunContextNotInitialized(SkyvernException):
|
||||
def __init__(self, workflow_run_id: str) -> None:
|
||||
super().__init__(f"WorkflowRunContext not initialized for workflow run {workflow_run_id}")
|
||||
|
||||
@@ -749,17 +749,19 @@ class ForgeAgent:
|
||||
close_browser_on_completion=close_browser_on_completion and browser_session_id is None,
|
||||
)
|
||||
return step, detailed_output, None
|
||||
except ScrapingFailed:
|
||||
except ScrapingFailed as sfe:
|
||||
LOG.warning(
|
||||
"Scraping failed, marking the task as failed",
|
||||
task_id=task.task_id,
|
||||
step_id=step.step_id,
|
||||
exc_info=True,
|
||||
)
|
||||
|
||||
await self.fail_task(
|
||||
task,
|
||||
step,
|
||||
"Skyvern failed to load the website. This usually happens when the website is not properly designed, and crashes the browser as a result.",
|
||||
sfe.reason
|
||||
or "Skyvern failed to load the website. This usually happens when the website is not properly designed, and crashes the browser as a result.",
|
||||
)
|
||||
await self.clean_up_task(
|
||||
task=task,
|
||||
|
||||
@@ -13,7 +13,7 @@ from pydantic import BaseModel, PrivateAttr
|
||||
|
||||
from skyvern.config import settings
|
||||
from skyvern.constants import BUILDING_ELEMENT_TREE_TIMEOUT_MS, DEFAULT_MAX_TOKENS, SKYVERN_DIR, SKYVERN_ID_ATTR
|
||||
from skyvern.exceptions import FailedToTakeScreenshot, ScrapingFailed, UnknownElementTreeFormat
|
||||
from skyvern.exceptions import FailedToTakeScreenshot, ScrapingFailed, ScrapingFailedNoUrl, UnknownElementTreeFormat
|
||||
from skyvern.forge.sdk.api.crypto import calculate_sha256
|
||||
from skyvern.forge.sdk.core import skyvern_context
|
||||
from skyvern.forge.sdk.trace import TraceManager
|
||||
@@ -426,6 +426,10 @@ async def scrape_website(
|
||||
|
||||
:raises Exception: When scraping fails after maximum retries.
|
||||
"""
|
||||
|
||||
if not url.strip():
|
||||
raise ScrapingFailedNoUrl()
|
||||
|
||||
try:
|
||||
num_retry += 1
|
||||
return await scrape_web_unsafe(
|
||||
|
||||
Reference in New Issue
Block a user