make scraping timeout configurable (#2991)

This commit is contained in:
LawyZheng
2025-07-19 13:18:12 +08:00
committed by GitHub
parent 0efd86fb89
commit fcd22017b7
4 changed files with 22 additions and 10 deletions

View File

@@ -23,6 +23,7 @@ class Settings(BaseSettings):
BROWSER_ACTION_TIMEOUT_MS: int = 5000
BROWSER_SCREENSHOT_TIMEOUT_MS: int = 20000
BROWSER_LOADING_TIMEOUT_MS: int = 90000
BROWSER_SCRAPING_BUILDING_ELEMENT_TREE_TIMEOUT_MS: int = 60 * 1000 # 1 minute
OPTION_LOADING_TIMEOUT_MS: int = 600000
MAX_STEPS_PER_RUN: int = 10
MAX_STEPS_PER_TASK_V2: int = 25

View File

@@ -8,7 +8,6 @@ REPO_ROOT_DIR = SKYVERN_DIR.parent
INPUT_TEXT_TIMEOUT = 120000 # 2 minutes
PAGE_CONTENT_TIMEOUT = 300 # 5 mins
BUILDING_ELEMENT_TREE_TIMEOUT_MS = 60 * 1000 # 1 minute
BROWSER_CLOSE_TIMEOUT = 180 # 3 minute
BROWSER_DOWNLOAD_MAX_WAIT_TIME = 1200 # 20 minute
BROWSER_DOWNLOAD_TIMEOUT = 600 # 10 minute

View File

@@ -12,10 +12,11 @@ from playwright.async_api import ElementHandle, Frame, Locator, Page
from pydantic import BaseModel, PrivateAttr
from skyvern.config import settings
from skyvern.constants import BUILDING_ELEMENT_TREE_TIMEOUT_MS, DEFAULT_MAX_TOKENS, SKYVERN_DIR, SKYVERN_ID_ATTR
from skyvern.constants import DEFAULT_MAX_TOKENS, SKYVERN_DIR, SKYVERN_ID_ATTR
from skyvern.exceptions import FailedToTakeScreenshot, ScrapingFailed, UnknownElementTreeFormat
from skyvern.forge.sdk.api.crypto import calculate_sha256
from skyvern.forge.sdk.core import skyvern_context
from skyvern.forge.sdk.settings_manager import SettingsManager
from skyvern.forge.sdk.trace import TraceManager
from skyvern.utils.image_resizer import Resolution
from skyvern.utils.token_counter import count_tokens
@@ -765,7 +766,9 @@ class IncrementalScrapePage(ElementTreeBuilder):
return
js_script = "async () => await stopGlobalIncrementalObserver()"
await SkyvernFrame.evaluate(
frame=self.skyvern_frame.get_frame(), expression=js_script, timeout_ms=BUILDING_ELEMENT_TREE_TIMEOUT_MS
frame=self.skyvern_frame.get_frame(),
expression=js_script,
timeout_ms=SettingsManager.get_settings().BROWSER_SCRAPING_BUILDING_ELEMENT_TREE_TIMEOUT_MS,
)
async def get_incremental_elements_num(self) -> int:

View File

@@ -11,7 +11,7 @@ from PIL import Image
from playwright._impl._errors import TimeoutError
from playwright.async_api import ElementHandle, Frame, Page
from skyvern.constants import BUILDING_ELEMENT_TREE_TIMEOUT_MS, PAGE_CONTENT_TIMEOUT, SKYVERN_DIR
from skyvern.constants import PAGE_CONTENT_TIMEOUT, SKYVERN_DIR
from skyvern.exceptions import FailedToTakeScreenshot
from skyvern.forge.sdk.settings_manager import SettingsManager
from skyvern.forge.sdk.trace import TraceManager
@@ -376,7 +376,7 @@ class SkyvernFrame:
scroll_y_px = await self.evaluate(
frame=self.frame,
expression=js_script,
timeout_ms=BUILDING_ELEMENT_TREE_TIMEOUT_MS,
timeout_ms=SettingsManager.get_settings().BROWSER_SCRAPING_BUILDING_ELEMENT_TREE_TIMEOUT_MS,
arg=[draw_boxes, frame, frame_index],
)
return scroll_y_px
@@ -394,7 +394,7 @@ class SkyvernFrame:
scroll_y_px = await self.evaluate(
frame=self.frame,
expression=js_script,
timeout_ms=BUILDING_ELEMENT_TREE_TIMEOUT_MS,
timeout_ms=SettingsManager.get_settings().BROWSER_SCRAPING_BUILDING_ELEMENT_TREE_TIMEOUT_MS,
arg=[draw_boxes, frame, frame_index, need_overlap],
)
return scroll_y_px
@@ -405,14 +405,18 @@ class SkyvernFrame:
:param page: Page instance to remove the bounding boxes from.
"""
js_script = "() => removeBoundingBoxes()"
await self.evaluate(frame=self.frame, expression=js_script, timeout_ms=BUILDING_ELEMENT_TREE_TIMEOUT_MS)
await self.evaluate(
frame=self.frame,
expression=js_script,
timeout_ms=SettingsManager.get_settings().BROWSER_SCRAPING_BUILDING_ELEMENT_TREE_TIMEOUT_MS,
)
async def build_elements_and_draw_bounding_boxes(self, frame: str, frame_index: int) -> None:
js_script = "async ([frame, frame_index]) => await buildElementsAndDrawBoundingBoxes(frame, frame_index)"
await self.evaluate(
frame=self.frame,
expression=js_script,
timeout_ms=BUILDING_ELEMENT_TREE_TIMEOUT_MS,
timeout_ms=SettingsManager.get_settings().BROWSER_SCRAPING_BUILDING_ELEMENT_TREE_TIMEOUT_MS,
arg=[frame, frame_index],
)
@@ -446,7 +450,10 @@ class SkyvernFrame:
@TraceManager.traced_async()
async def build_tree_from_body(
self, frame_name: str | None, frame_index: int, timeout_ms: float = BUILDING_ELEMENT_TREE_TIMEOUT_MS
self,
frame_name: str | None,
frame_index: int,
timeout_ms: float = SettingsManager.get_settings().BROWSER_SCRAPING_BUILDING_ELEMENT_TREE_TIMEOUT_MS,
) -> tuple[list[dict], list[dict]]:
js_script = "async ([frame_name, frame_index]) => await buildTreeFromBody(frame_name, frame_index)"
return await self.evaluate(
@@ -455,7 +462,9 @@ class SkyvernFrame:
@TraceManager.traced_async()
async def get_incremental_element_tree(
self, wait_until_finished: bool = True, timeout_ms: float = BUILDING_ELEMENT_TREE_TIMEOUT_MS
self,
wait_until_finished: bool = True,
timeout_ms: float = SettingsManager.get_settings().BROWSER_SCRAPING_BUILDING_ELEMENT_TREE_TIMEOUT_MS,
) -> tuple[list[dict], list[dict]]:
js_script = "async ([wait_until_finished]) => await getIncrementElements(wait_until_finished)"
return await self.evaluate(