From fcd22017b79960383ba74393699e34db9a5e1ed0 Mon Sep 17 00:00:00 2001 From: LawyZheng Date: Sat, 19 Jul 2025 13:18:12 +0800 Subject: [PATCH] make scraping timeout configurable (#2991) --- skyvern/config.py | 1 + skyvern/constants.py | 1 - skyvern/webeye/scraper/scraper.py | 7 +++++-- skyvern/webeye/utils/page.py | 23 ++++++++++++++++------- 4 files changed, 22 insertions(+), 10 deletions(-) diff --git a/skyvern/config.py b/skyvern/config.py index 0fa94e43..f4bebfa9 100644 --- a/skyvern/config.py +++ b/skyvern/config.py @@ -23,6 +23,7 @@ class Settings(BaseSettings): BROWSER_ACTION_TIMEOUT_MS: int = 5000 BROWSER_SCREENSHOT_TIMEOUT_MS: int = 20000 BROWSER_LOADING_TIMEOUT_MS: int = 90000 + BROWSER_SCRAPING_BUILDING_ELEMENT_TREE_TIMEOUT_MS: int = 60 * 1000 # 1 minute OPTION_LOADING_TIMEOUT_MS: int = 600000 MAX_STEPS_PER_RUN: int = 10 MAX_STEPS_PER_TASK_V2: int = 25 diff --git a/skyvern/constants.py b/skyvern/constants.py index 54f63131..283e2179 100644 --- a/skyvern/constants.py +++ b/skyvern/constants.py @@ -8,7 +8,6 @@ REPO_ROOT_DIR = SKYVERN_DIR.parent INPUT_TEXT_TIMEOUT = 120000 # 2 minutes PAGE_CONTENT_TIMEOUT = 300 # 5 mins -BUILDING_ELEMENT_TREE_TIMEOUT_MS = 60 * 1000 # 1 minute BROWSER_CLOSE_TIMEOUT = 180 # 3 minute BROWSER_DOWNLOAD_MAX_WAIT_TIME = 1200 # 20 minute BROWSER_DOWNLOAD_TIMEOUT = 600 # 10 minute diff --git a/skyvern/webeye/scraper/scraper.py b/skyvern/webeye/scraper/scraper.py index f6cba310..7b77eaf2 100644 --- a/skyvern/webeye/scraper/scraper.py +++ b/skyvern/webeye/scraper/scraper.py @@ -12,10 +12,11 @@ from playwright.async_api import ElementHandle, Frame, Locator, Page from pydantic import BaseModel, PrivateAttr from skyvern.config import settings -from skyvern.constants import BUILDING_ELEMENT_TREE_TIMEOUT_MS, DEFAULT_MAX_TOKENS, SKYVERN_DIR, SKYVERN_ID_ATTR +from skyvern.constants import DEFAULT_MAX_TOKENS, SKYVERN_DIR, SKYVERN_ID_ATTR from skyvern.exceptions import FailedToTakeScreenshot, ScrapingFailed, UnknownElementTreeFormat from skyvern.forge.sdk.api.crypto import calculate_sha256 from skyvern.forge.sdk.core import skyvern_context +from skyvern.forge.sdk.settings_manager import SettingsManager from skyvern.forge.sdk.trace import TraceManager from skyvern.utils.image_resizer import Resolution from skyvern.utils.token_counter import count_tokens @@ -765,7 +766,9 @@ class IncrementalScrapePage(ElementTreeBuilder): return js_script = "async () => await stopGlobalIncrementalObserver()" await SkyvernFrame.evaluate( - frame=self.skyvern_frame.get_frame(), expression=js_script, timeout_ms=BUILDING_ELEMENT_TREE_TIMEOUT_MS + frame=self.skyvern_frame.get_frame(), + expression=js_script, + timeout_ms=SettingsManager.get_settings().BROWSER_SCRAPING_BUILDING_ELEMENT_TREE_TIMEOUT_MS, ) async def get_incremental_elements_num(self) -> int: diff --git a/skyvern/webeye/utils/page.py b/skyvern/webeye/utils/page.py index f9e885c3..31963209 100644 --- a/skyvern/webeye/utils/page.py +++ b/skyvern/webeye/utils/page.py @@ -11,7 +11,7 @@ from PIL import Image from playwright._impl._errors import TimeoutError from playwright.async_api import ElementHandle, Frame, Page -from skyvern.constants import BUILDING_ELEMENT_TREE_TIMEOUT_MS, PAGE_CONTENT_TIMEOUT, SKYVERN_DIR +from skyvern.constants import PAGE_CONTENT_TIMEOUT, SKYVERN_DIR from skyvern.exceptions import FailedToTakeScreenshot from skyvern.forge.sdk.settings_manager import SettingsManager from skyvern.forge.sdk.trace import TraceManager @@ -376,7 +376,7 @@ class SkyvernFrame: scroll_y_px = await self.evaluate( frame=self.frame, expression=js_script, - timeout_ms=BUILDING_ELEMENT_TREE_TIMEOUT_MS, + timeout_ms=SettingsManager.get_settings().BROWSER_SCRAPING_BUILDING_ELEMENT_TREE_TIMEOUT_MS, arg=[draw_boxes, frame, frame_index], ) return scroll_y_px @@ -394,7 +394,7 @@ class SkyvernFrame: scroll_y_px = await self.evaluate( frame=self.frame, expression=js_script, - timeout_ms=BUILDING_ELEMENT_TREE_TIMEOUT_MS, + timeout_ms=SettingsManager.get_settings().BROWSER_SCRAPING_BUILDING_ELEMENT_TREE_TIMEOUT_MS, arg=[draw_boxes, frame, frame_index, need_overlap], ) return scroll_y_px @@ -405,14 +405,18 @@ class SkyvernFrame: :param page: Page instance to remove the bounding boxes from. """ js_script = "() => removeBoundingBoxes()" - await self.evaluate(frame=self.frame, expression=js_script, timeout_ms=BUILDING_ELEMENT_TREE_TIMEOUT_MS) + await self.evaluate( + frame=self.frame, + expression=js_script, + timeout_ms=SettingsManager.get_settings().BROWSER_SCRAPING_BUILDING_ELEMENT_TREE_TIMEOUT_MS, + ) async def build_elements_and_draw_bounding_boxes(self, frame: str, frame_index: int) -> None: js_script = "async ([frame, frame_index]) => await buildElementsAndDrawBoundingBoxes(frame, frame_index)" await self.evaluate( frame=self.frame, expression=js_script, - timeout_ms=BUILDING_ELEMENT_TREE_TIMEOUT_MS, + timeout_ms=SettingsManager.get_settings().BROWSER_SCRAPING_BUILDING_ELEMENT_TREE_TIMEOUT_MS, arg=[frame, frame_index], ) @@ -446,7 +450,10 @@ class SkyvernFrame: @TraceManager.traced_async() async def build_tree_from_body( - self, frame_name: str | None, frame_index: int, timeout_ms: float = BUILDING_ELEMENT_TREE_TIMEOUT_MS + self, + frame_name: str | None, + frame_index: int, + timeout_ms: float = SettingsManager.get_settings().BROWSER_SCRAPING_BUILDING_ELEMENT_TREE_TIMEOUT_MS, ) -> tuple[list[dict], list[dict]]: js_script = "async ([frame_name, frame_index]) => await buildTreeFromBody(frame_name, frame_index)" return await self.evaluate( @@ -455,7 +462,9 @@ class SkyvernFrame: @TraceManager.traced_async() async def get_incremental_element_tree( - self, wait_until_finished: bool = True, timeout_ms: float = BUILDING_ELEMENT_TREE_TIMEOUT_MS + self, + wait_until_finished: bool = True, + timeout_ms: float = SettingsManager.get_settings().BROWSER_SCRAPING_BUILDING_ELEMENT_TREE_TIMEOUT_MS, ) -> tuple[list[dict], list[dict]]: js_script = "async ([wait_until_finished]) => await getIncrementElements(wait_until_finished)" return await self.evaluate(