try to fix screenshot timeout (#502)
This commit is contained in:
@@ -1,3 +1,4 @@
|
|||||||
|
from enum import StrEnum
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
# This is the attribute name used to tag interactable elements
|
# This is the attribute name used to tag interactable elements
|
||||||
@@ -6,3 +7,12 @@ SKYVERN_DIR = Path(__file__).parent
|
|||||||
REPO_ROOT_DIR = SKYVERN_DIR.parent
|
REPO_ROOT_DIR = SKYVERN_DIR.parent
|
||||||
|
|
||||||
INPUT_TEXT_TIMEOUT = 120000 # 2 minutes
|
INPUT_TEXT_TIMEOUT = 120000 # 2 minutes
|
||||||
|
|
||||||
|
|
||||||
|
class ScrapeType(StrEnum):
|
||||||
|
NORMAL = "normal"
|
||||||
|
STOPLOADING = "stoploading"
|
||||||
|
RELOAD = "reload"
|
||||||
|
|
||||||
|
|
||||||
|
SCRAPE_TYPE_ORDER = [ScrapeType.NORMAL, ScrapeType.STOPLOADING, ScrapeType.RELOAD]
|
||||||
|
|||||||
@@ -165,6 +165,20 @@ class FailedToNavigateToUrl(SkyvernException):
|
|||||||
super().__init__(f"Failed to navigate to url {url}. Error message: {error_message}")
|
super().__init__(f"Failed to navigate to url {url}. Error message: {error_message}")
|
||||||
|
|
||||||
|
|
||||||
|
class FailedToReloadPage(SkyvernException):
|
||||||
|
def __init__(self, url: str, error_message: str) -> None:
|
||||||
|
self.url = url
|
||||||
|
self.error_message = error_message
|
||||||
|
super().__init__(f"Failed to reload page url {url}. Error message: {error_message}")
|
||||||
|
|
||||||
|
|
||||||
|
class FailedToStopLoadingPage(SkyvernException):
|
||||||
|
def __init__(self, url: str, error_message: str) -> None:
|
||||||
|
self.url = url
|
||||||
|
self.error_message = error_message
|
||||||
|
super().__init__(f"Failed to stop loading page url {url}. Error message: {error_message}")
|
||||||
|
|
||||||
|
|
||||||
class UnexpectedTaskStatus(SkyvernException):
|
class UnexpectedTaskStatus(SkyvernException):
|
||||||
def __init__(self, task_id: str, status: str) -> None:
|
def __init__(self, task_id: str, status: str) -> None:
|
||||||
super().__init__(f"Unexpected task status {status} for task {task_id}")
|
super().__init__(f"Unexpected task status {status} for task {task_id}")
|
||||||
@@ -218,6 +232,11 @@ class FailedToTakeScreenshot(SkyvernException):
|
|||||||
super().__init__(f"Failed to take screenshot. Error message: {error_message}")
|
super().__init__(f"Failed to take screenshot. Error message: {error_message}")
|
||||||
|
|
||||||
|
|
||||||
|
class EmptyScrapePage(SkyvernException):
|
||||||
|
def __init__(self) -> None:
|
||||||
|
super().__init__("Failed to scrape the page, returned an NONE result")
|
||||||
|
|
||||||
|
|
||||||
class WorkflowRunContextNotInitialized(SkyvernException):
|
class WorkflowRunContextNotInitialized(SkyvernException):
|
||||||
def __init__(self, workflow_run_id: str) -> None:
|
def __init__(self, workflow_run_id: str) -> None:
|
||||||
super().__init__(f"WorkflowRunContext not initialized for workflow run {workflow_run_id}")
|
super().__init__(f"WorkflowRunContext not initialized for workflow run {workflow_run_id}")
|
||||||
|
|||||||
@@ -11,10 +11,13 @@ from playwright._impl._errors import TargetClosedError
|
|||||||
from playwright.async_api import Page
|
from playwright.async_api import Page
|
||||||
|
|
||||||
from skyvern import analytics
|
from skyvern import analytics
|
||||||
|
from skyvern.constants import SCRAPE_TYPE_ORDER, ScrapeType
|
||||||
from skyvern.exceptions import (
|
from skyvern.exceptions import (
|
||||||
BrowserStateMissingPage,
|
BrowserStateMissingPage,
|
||||||
|
EmptyScrapePage,
|
||||||
FailedToNavigateToUrl,
|
FailedToNavigateToUrl,
|
||||||
FailedToSendWebhook,
|
FailedToSendWebhook,
|
||||||
|
FailedToTakeScreenshot,
|
||||||
InvalidWorkflowTaskURLState,
|
InvalidWorkflowTaskURLState,
|
||||||
MissingBrowserStatePage,
|
MissingBrowserStatePage,
|
||||||
SkyvernException,
|
SkyvernException,
|
||||||
@@ -778,6 +781,36 @@ class ForgeAgent:
|
|||||||
)
|
)
|
||||||
return step, browser_state, detailed_output
|
return step, browser_state, detailed_output
|
||||||
|
|
||||||
|
async def _scrape_with_type(
|
||||||
|
self,
|
||||||
|
task: Task,
|
||||||
|
step: Step,
|
||||||
|
browser_state: BrowserState,
|
||||||
|
scrape_type: ScrapeType,
|
||||||
|
) -> ScrapedPage | None:
|
||||||
|
if scrape_type == ScrapeType.NORMAL:
|
||||||
|
pass
|
||||||
|
|
||||||
|
elif scrape_type == ScrapeType.STOPLOADING:
|
||||||
|
LOG.info(
|
||||||
|
"Try to stop loading the page before scraping",
|
||||||
|
task_id=task.task_id,
|
||||||
|
step_id=step.step_id,
|
||||||
|
)
|
||||||
|
await browser_state.stop_page_loading()
|
||||||
|
elif scrape_type == ScrapeType.RELOAD:
|
||||||
|
LOG.info(
|
||||||
|
"Try to reload the page before scraping",
|
||||||
|
task_id=task.task_id,
|
||||||
|
step_id=step.step_id,
|
||||||
|
)
|
||||||
|
await browser_state.reload_page()
|
||||||
|
|
||||||
|
return await scrape_website(
|
||||||
|
browser_state,
|
||||||
|
task.url,
|
||||||
|
)
|
||||||
|
|
||||||
async def _build_and_record_step_prompt(
|
async def _build_and_record_step_prompt(
|
||||||
self,
|
self,
|
||||||
task: Task,
|
task: Task,
|
||||||
@@ -788,10 +821,30 @@ class ForgeAgent:
|
|||||||
self.async_operation_pool.run_operation(task.task_id, AgentPhase.scrape)
|
self.async_operation_pool.run_operation(task.task_id, AgentPhase.scrape)
|
||||||
|
|
||||||
# Scrape the web page and get the screenshot and the elements
|
# Scrape the web page and get the screenshot and the elements
|
||||||
scraped_page = await scrape_website(
|
# HACK: try scrape_website three time to handle screenshot timeout
|
||||||
browser_state,
|
# first time: normal scrape to take screenshot
|
||||||
task.url,
|
# second time: stop window loading before scraping
|
||||||
)
|
# third time: reload the page before scraping
|
||||||
|
scraped_page: ScrapedPage | None = None
|
||||||
|
for scrape_type in SCRAPE_TYPE_ORDER:
|
||||||
|
try:
|
||||||
|
scraped_page = await self._scrape_with_type(
|
||||||
|
task=task, step=step, browser_state=browser_state, scrape_type=scrape_type
|
||||||
|
)
|
||||||
|
break
|
||||||
|
except FailedToTakeScreenshot as e:
|
||||||
|
if scrape_type == ScrapeType.RELOAD:
|
||||||
|
LOG.error(
|
||||||
|
"Failed to take screenshot after stop-loading and reload-page retry",
|
||||||
|
task_id=task.task_id,
|
||||||
|
step_id=step.step_id,
|
||||||
|
)
|
||||||
|
raise e
|
||||||
|
continue
|
||||||
|
|
||||||
|
if scraped_page is None:
|
||||||
|
raise EmptyScrapePage
|
||||||
|
|
||||||
await app.ARTIFACT_MANAGER.create_artifact(
|
await app.ARTIFACT_MANAGER.create_artifact(
|
||||||
step=step,
|
step=step,
|
||||||
artifact_type=ArtifactType.HTML_SCRAPE,
|
artifact_type=ArtifactType.HTML_SCRAPE,
|
||||||
|
|||||||
@@ -15,6 +15,8 @@ from pydantic import BaseModel
|
|||||||
from skyvern.config import settings
|
from skyvern.config import settings
|
||||||
from skyvern.exceptions import (
|
from skyvern.exceptions import (
|
||||||
FailedToNavigateToUrl,
|
FailedToNavigateToUrl,
|
||||||
|
FailedToReloadPage,
|
||||||
|
FailedToStopLoadingPage,
|
||||||
FailedToTakeScreenshot,
|
FailedToTakeScreenshot,
|
||||||
MissingBrowserStatePage,
|
MissingBrowserStatePage,
|
||||||
UnknownBrowserType,
|
UnknownBrowserType,
|
||||||
@@ -159,6 +161,12 @@ class BrowserState:
|
|||||||
self.page = page
|
self.page = page
|
||||||
self.browser_artifacts = browser_artifacts
|
self.browser_artifacts = browser_artifacts
|
||||||
|
|
||||||
|
def __assert_page(self) -> Page:
|
||||||
|
if self.page is not None:
|
||||||
|
return self.page
|
||||||
|
LOG.error("BrowserState has no page")
|
||||||
|
raise MissingBrowserStatePage()
|
||||||
|
|
||||||
async def _close_all_other_pages(self) -> None:
|
async def _close_all_other_pages(self) -> None:
|
||||||
if not self.browser_context or not self.page:
|
if not self.browser_context or not self.page:
|
||||||
return
|
return
|
||||||
@@ -261,6 +269,31 @@ class BrowserState:
|
|||||||
|
|
||||||
return self.page
|
return self.page
|
||||||
|
|
||||||
|
async def stop_page_loading(self) -> None:
|
||||||
|
page = self.__assert_page()
|
||||||
|
try:
|
||||||
|
await page.evaluate("window.stop()")
|
||||||
|
except Exception as e:
|
||||||
|
LOG.exception(f"Error while stop loading the page: {repr(e)}")
|
||||||
|
raise FailedToStopLoadingPage(url=page.url, error_message=repr(e))
|
||||||
|
|
||||||
|
async def reload_page(self) -> None:
|
||||||
|
page = self.__assert_page()
|
||||||
|
|
||||||
|
LOG.info(f"Reload page {page.url} and waiting for 5 seconds")
|
||||||
|
try:
|
||||||
|
start_time = time.time()
|
||||||
|
await page.reload(timeout=settings.BROWSER_LOADING_TIMEOUT_MS)
|
||||||
|
end_time = time.time()
|
||||||
|
LOG.info(
|
||||||
|
"Page loading time",
|
||||||
|
loading_time=end_time - start_time,
|
||||||
|
)
|
||||||
|
await asyncio.sleep(5)
|
||||||
|
except Exception as e:
|
||||||
|
LOG.exception(f"Error while reload url: {repr(e)}")
|
||||||
|
raise FailedToReloadPage(url=page.url, error_message=repr(e))
|
||||||
|
|
||||||
async def close(self, close_browser_on_completion: bool = True) -> None:
|
async def close(self, close_browser_on_completion: bool = True) -> None:
|
||||||
LOG.info("Closing browser state")
|
LOG.info("Closing browser state")
|
||||||
if self.browser_context and close_browser_on_completion:
|
if self.browser_context and close_browser_on_completion:
|
||||||
@@ -307,8 +340,5 @@ class BrowserState:
|
|||||||
raise FailedToTakeScreenshot(error_message=str(e)) from e
|
raise FailedToTakeScreenshot(error_message=str(e)) from e
|
||||||
|
|
||||||
async def take_screenshot(self, full_page: bool = False, file_path: str | None = None) -> bytes:
|
async def take_screenshot(self, full_page: bool = False, file_path: str | None = None) -> bytes:
|
||||||
if not self.page:
|
page = self.__assert_page()
|
||||||
LOG.error("BrowserState has no page")
|
return await self.take_screenshot_from_page(page, full_page, file_path)
|
||||||
raise MissingBrowserStatePage()
|
|
||||||
|
|
||||||
return await self.take_screenshot_from_page(self.page, full_page, file_path)
|
|
||||||
|
|||||||
@@ -10,7 +10,7 @@ from playwright.async_api import Frame, Page
|
|||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from skyvern.constants import SKYVERN_DIR, SKYVERN_ID_ATTR
|
from skyvern.constants import SKYVERN_DIR, SKYVERN_ID_ATTR
|
||||||
from skyvern.exceptions import UnknownElementTreeFormat
|
from skyvern.exceptions import FailedToTakeScreenshot, UnknownElementTreeFormat
|
||||||
from skyvern.forge.sdk.settings_manager import SettingsManager
|
from skyvern.forge.sdk.settings_manager import SettingsManager
|
||||||
from skyvern.webeye.browser_factory import BrowserState
|
from skyvern.webeye.browser_factory import BrowserState
|
||||||
|
|
||||||
@@ -169,7 +169,7 @@ async def scrape_website(
|
|||||||
try:
|
try:
|
||||||
num_retry += 1
|
num_retry += 1
|
||||||
return await scrape_web_unsafe(browser_state, url)
|
return await scrape_web_unsafe(browser_state, url)
|
||||||
except Exception:
|
except Exception as e:
|
||||||
# NOTE: MAX_SCRAPING_RETRIES is set to 0 in both staging and production
|
# NOTE: MAX_SCRAPING_RETRIES is set to 0 in both staging and production
|
||||||
if num_retry > SettingsManager.get_settings().MAX_SCRAPING_RETRIES:
|
if num_retry > SettingsManager.get_settings().MAX_SCRAPING_RETRIES:
|
||||||
LOG.error(
|
LOG.error(
|
||||||
@@ -178,7 +178,10 @@ async def scrape_website(
|
|||||||
url=url,
|
url=url,
|
||||||
exc_info=True,
|
exc_info=True,
|
||||||
)
|
)
|
||||||
raise Exception("Scraping failed.")
|
if isinstance(e, FailedToTakeScreenshot):
|
||||||
|
raise e
|
||||||
|
else:
|
||||||
|
raise Exception("Scraping failed.")
|
||||||
LOG.info("Scraping failed, will retry", num_retry=num_retry, url=url)
|
LOG.info("Scraping failed, will retry", num_retry=num_retry, url=url)
|
||||||
return await scrape_website(
|
return await scrape_website(
|
||||||
browser_state,
|
browser_state,
|
||||||
|
|||||||
Reference in New Issue
Block a user