distinctify failed scrapes due to no url (#2977)
This commit is contained in:
@@ -264,10 +264,16 @@ class EmptyScrapePage(SkyvernException):
|
|||||||
|
|
||||||
|
|
||||||
class ScrapingFailed(SkyvernException):
|
class ScrapingFailed(SkyvernException):
|
||||||
def __init__(self) -> None:
|
def __init__(self, *, reason: str | None = None) -> None:
|
||||||
|
self.reason = reason
|
||||||
super().__init__("Scraping failed.")
|
super().__init__("Scraping failed.")
|
||||||
|
|
||||||
|
|
||||||
|
class ScrapingFailedNoUrl(ScrapingFailed):
|
||||||
|
def __init__(self) -> None:
|
||||||
|
super().__init__(reason="A URL is missing. Please ensure there is a URL for Skyvern to work with.")
|
||||||
|
|
||||||
|
|
||||||
class WorkflowRunContextNotInitialized(SkyvernException):
|
class WorkflowRunContextNotInitialized(SkyvernException):
|
||||||
def __init__(self, workflow_run_id: str) -> None:
|
def __init__(self, workflow_run_id: str) -> None:
|
||||||
super().__init__(f"WorkflowRunContext not initialized for workflow run {workflow_run_id}")
|
super().__init__(f"WorkflowRunContext not initialized for workflow run {workflow_run_id}")
|
||||||
|
|||||||
@@ -749,17 +749,19 @@ class ForgeAgent:
|
|||||||
close_browser_on_completion=close_browser_on_completion and browser_session_id is None,
|
close_browser_on_completion=close_browser_on_completion and browser_session_id is None,
|
||||||
)
|
)
|
||||||
return step, detailed_output, None
|
return step, detailed_output, None
|
||||||
except ScrapingFailed:
|
except ScrapingFailed as sfe:
|
||||||
LOG.warning(
|
LOG.warning(
|
||||||
"Scraping failed, marking the task as failed",
|
"Scraping failed, marking the task as failed",
|
||||||
task_id=task.task_id,
|
task_id=task.task_id,
|
||||||
step_id=step.step_id,
|
step_id=step.step_id,
|
||||||
exc_info=True,
|
exc_info=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
await self.fail_task(
|
await self.fail_task(
|
||||||
task,
|
task,
|
||||||
step,
|
step,
|
||||||
"Skyvern failed to load the website. This usually happens when the website is not properly designed, and crashes the browser as a result.",
|
sfe.reason
|
||||||
|
or "Skyvern failed to load the website. This usually happens when the website is not properly designed, and crashes the browser as a result.",
|
||||||
)
|
)
|
||||||
await self.clean_up_task(
|
await self.clean_up_task(
|
||||||
task=task,
|
task=task,
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ from pydantic import BaseModel, PrivateAttr
|
|||||||
|
|
||||||
from skyvern.config import settings
|
from skyvern.config import settings
|
||||||
from skyvern.constants import BUILDING_ELEMENT_TREE_TIMEOUT_MS, DEFAULT_MAX_TOKENS, SKYVERN_DIR, SKYVERN_ID_ATTR
|
from skyvern.constants import BUILDING_ELEMENT_TREE_TIMEOUT_MS, DEFAULT_MAX_TOKENS, SKYVERN_DIR, SKYVERN_ID_ATTR
|
||||||
from skyvern.exceptions import FailedToTakeScreenshot, ScrapingFailed, UnknownElementTreeFormat
|
from skyvern.exceptions import FailedToTakeScreenshot, ScrapingFailed, ScrapingFailedNoUrl, UnknownElementTreeFormat
|
||||||
from skyvern.forge.sdk.api.crypto import calculate_sha256
|
from skyvern.forge.sdk.api.crypto import calculate_sha256
|
||||||
from skyvern.forge.sdk.core import skyvern_context
|
from skyvern.forge.sdk.core import skyvern_context
|
||||||
from skyvern.forge.sdk.trace import TraceManager
|
from skyvern.forge.sdk.trace import TraceManager
|
||||||
@@ -426,6 +426,10 @@ async def scrape_website(
|
|||||||
|
|
||||||
:raises Exception: When scraping fails after maximum retries.
|
:raises Exception: When scraping fails after maximum retries.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
if not url.strip():
|
||||||
|
raise ScrapingFailedNoUrl()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
num_retry += 1
|
num_retry += 1
|
||||||
return await scrape_web_unsafe(
|
return await scrape_web_unsafe(
|
||||||
|
|||||||
Reference in New Issue
Block a user