make scraping timeout configurable (#2991)
This commit is contained in:
@@ -23,6 +23,7 @@ class Settings(BaseSettings):
|
|||||||
BROWSER_ACTION_TIMEOUT_MS: int = 5000
|
BROWSER_ACTION_TIMEOUT_MS: int = 5000
|
||||||
BROWSER_SCREENSHOT_TIMEOUT_MS: int = 20000
|
BROWSER_SCREENSHOT_TIMEOUT_MS: int = 20000
|
||||||
BROWSER_LOADING_TIMEOUT_MS: int = 90000
|
BROWSER_LOADING_TIMEOUT_MS: int = 90000
|
||||||
|
BROWSER_SCRAPING_BUILDING_ELEMENT_TREE_TIMEOUT_MS: int = 60 * 1000 # 1 minute
|
||||||
OPTION_LOADING_TIMEOUT_MS: int = 600000
|
OPTION_LOADING_TIMEOUT_MS: int = 600000
|
||||||
MAX_STEPS_PER_RUN: int = 10
|
MAX_STEPS_PER_RUN: int = 10
|
||||||
MAX_STEPS_PER_TASK_V2: int = 25
|
MAX_STEPS_PER_TASK_V2: int = 25
|
||||||
|
|||||||
@@ -8,7 +8,6 @@ REPO_ROOT_DIR = SKYVERN_DIR.parent
|
|||||||
|
|
||||||
INPUT_TEXT_TIMEOUT = 120000 # 2 minutes
|
INPUT_TEXT_TIMEOUT = 120000 # 2 minutes
|
||||||
PAGE_CONTENT_TIMEOUT = 300 # 5 mins
|
PAGE_CONTENT_TIMEOUT = 300 # 5 mins
|
||||||
BUILDING_ELEMENT_TREE_TIMEOUT_MS = 60 * 1000 # 1 minute
|
|
||||||
BROWSER_CLOSE_TIMEOUT = 180 # 3 minute
|
BROWSER_CLOSE_TIMEOUT = 180 # 3 minute
|
||||||
BROWSER_DOWNLOAD_MAX_WAIT_TIME = 1200 # 20 minute
|
BROWSER_DOWNLOAD_MAX_WAIT_TIME = 1200 # 20 minute
|
||||||
BROWSER_DOWNLOAD_TIMEOUT = 600 # 10 minute
|
BROWSER_DOWNLOAD_TIMEOUT = 600 # 10 minute
|
||||||
|
|||||||
@@ -12,10 +12,11 @@ from playwright.async_api import ElementHandle, Frame, Locator, Page
|
|||||||
from pydantic import BaseModel, PrivateAttr
|
from pydantic import BaseModel, PrivateAttr
|
||||||
|
|
||||||
from skyvern.config import settings
|
from skyvern.config import settings
|
||||||
from skyvern.constants import BUILDING_ELEMENT_TREE_TIMEOUT_MS, DEFAULT_MAX_TOKENS, SKYVERN_DIR, SKYVERN_ID_ATTR
|
from skyvern.constants import DEFAULT_MAX_TOKENS, SKYVERN_DIR, SKYVERN_ID_ATTR
|
||||||
from skyvern.exceptions import FailedToTakeScreenshot, ScrapingFailed, UnknownElementTreeFormat
|
from skyvern.exceptions import FailedToTakeScreenshot, ScrapingFailed, UnknownElementTreeFormat
|
||||||
from skyvern.forge.sdk.api.crypto import calculate_sha256
|
from skyvern.forge.sdk.api.crypto import calculate_sha256
|
||||||
from skyvern.forge.sdk.core import skyvern_context
|
from skyvern.forge.sdk.core import skyvern_context
|
||||||
|
from skyvern.forge.sdk.settings_manager import SettingsManager
|
||||||
from skyvern.forge.sdk.trace import TraceManager
|
from skyvern.forge.sdk.trace import TraceManager
|
||||||
from skyvern.utils.image_resizer import Resolution
|
from skyvern.utils.image_resizer import Resolution
|
||||||
from skyvern.utils.token_counter import count_tokens
|
from skyvern.utils.token_counter import count_tokens
|
||||||
@@ -765,7 +766,9 @@ class IncrementalScrapePage(ElementTreeBuilder):
|
|||||||
return
|
return
|
||||||
js_script = "async () => await stopGlobalIncrementalObserver()"
|
js_script = "async () => await stopGlobalIncrementalObserver()"
|
||||||
await SkyvernFrame.evaluate(
|
await SkyvernFrame.evaluate(
|
||||||
frame=self.skyvern_frame.get_frame(), expression=js_script, timeout_ms=BUILDING_ELEMENT_TREE_TIMEOUT_MS
|
frame=self.skyvern_frame.get_frame(),
|
||||||
|
expression=js_script,
|
||||||
|
timeout_ms=SettingsManager.get_settings().BROWSER_SCRAPING_BUILDING_ELEMENT_TREE_TIMEOUT_MS,
|
||||||
)
|
)
|
||||||
|
|
||||||
async def get_incremental_elements_num(self) -> int:
|
async def get_incremental_elements_num(self) -> int:
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ from PIL import Image
|
|||||||
from playwright._impl._errors import TimeoutError
|
from playwright._impl._errors import TimeoutError
|
||||||
from playwright.async_api import ElementHandle, Frame, Page
|
from playwright.async_api import ElementHandle, Frame, Page
|
||||||
|
|
||||||
from skyvern.constants import BUILDING_ELEMENT_TREE_TIMEOUT_MS, PAGE_CONTENT_TIMEOUT, SKYVERN_DIR
|
from skyvern.constants import PAGE_CONTENT_TIMEOUT, SKYVERN_DIR
|
||||||
from skyvern.exceptions import FailedToTakeScreenshot
|
from skyvern.exceptions import FailedToTakeScreenshot
|
||||||
from skyvern.forge.sdk.settings_manager import SettingsManager
|
from skyvern.forge.sdk.settings_manager import SettingsManager
|
||||||
from skyvern.forge.sdk.trace import TraceManager
|
from skyvern.forge.sdk.trace import TraceManager
|
||||||
@@ -376,7 +376,7 @@ class SkyvernFrame:
|
|||||||
scroll_y_px = await self.evaluate(
|
scroll_y_px = await self.evaluate(
|
||||||
frame=self.frame,
|
frame=self.frame,
|
||||||
expression=js_script,
|
expression=js_script,
|
||||||
timeout_ms=BUILDING_ELEMENT_TREE_TIMEOUT_MS,
|
timeout_ms=SettingsManager.get_settings().BROWSER_SCRAPING_BUILDING_ELEMENT_TREE_TIMEOUT_MS,
|
||||||
arg=[draw_boxes, frame, frame_index],
|
arg=[draw_boxes, frame, frame_index],
|
||||||
)
|
)
|
||||||
return scroll_y_px
|
return scroll_y_px
|
||||||
@@ -394,7 +394,7 @@ class SkyvernFrame:
|
|||||||
scroll_y_px = await self.evaluate(
|
scroll_y_px = await self.evaluate(
|
||||||
frame=self.frame,
|
frame=self.frame,
|
||||||
expression=js_script,
|
expression=js_script,
|
||||||
timeout_ms=BUILDING_ELEMENT_TREE_TIMEOUT_MS,
|
timeout_ms=SettingsManager.get_settings().BROWSER_SCRAPING_BUILDING_ELEMENT_TREE_TIMEOUT_MS,
|
||||||
arg=[draw_boxes, frame, frame_index, need_overlap],
|
arg=[draw_boxes, frame, frame_index, need_overlap],
|
||||||
)
|
)
|
||||||
return scroll_y_px
|
return scroll_y_px
|
||||||
@@ -405,14 +405,18 @@ class SkyvernFrame:
|
|||||||
:param page: Page instance to remove the bounding boxes from.
|
:param page: Page instance to remove the bounding boxes from.
|
||||||
"""
|
"""
|
||||||
js_script = "() => removeBoundingBoxes()"
|
js_script = "() => removeBoundingBoxes()"
|
||||||
await self.evaluate(frame=self.frame, expression=js_script, timeout_ms=BUILDING_ELEMENT_TREE_TIMEOUT_MS)
|
await self.evaluate(
|
||||||
|
frame=self.frame,
|
||||||
|
expression=js_script,
|
||||||
|
timeout_ms=SettingsManager.get_settings().BROWSER_SCRAPING_BUILDING_ELEMENT_TREE_TIMEOUT_MS,
|
||||||
|
)
|
||||||
|
|
||||||
async def build_elements_and_draw_bounding_boxes(self, frame: str, frame_index: int) -> None:
|
async def build_elements_and_draw_bounding_boxes(self, frame: str, frame_index: int) -> None:
|
||||||
js_script = "async ([frame, frame_index]) => await buildElementsAndDrawBoundingBoxes(frame, frame_index)"
|
js_script = "async ([frame, frame_index]) => await buildElementsAndDrawBoundingBoxes(frame, frame_index)"
|
||||||
await self.evaluate(
|
await self.evaluate(
|
||||||
frame=self.frame,
|
frame=self.frame,
|
||||||
expression=js_script,
|
expression=js_script,
|
||||||
timeout_ms=BUILDING_ELEMENT_TREE_TIMEOUT_MS,
|
timeout_ms=SettingsManager.get_settings().BROWSER_SCRAPING_BUILDING_ELEMENT_TREE_TIMEOUT_MS,
|
||||||
arg=[frame, frame_index],
|
arg=[frame, frame_index],
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -446,7 +450,10 @@ class SkyvernFrame:
|
|||||||
|
|
||||||
@TraceManager.traced_async()
|
@TraceManager.traced_async()
|
||||||
async def build_tree_from_body(
|
async def build_tree_from_body(
|
||||||
self, frame_name: str | None, frame_index: int, timeout_ms: float = BUILDING_ELEMENT_TREE_TIMEOUT_MS
|
self,
|
||||||
|
frame_name: str | None,
|
||||||
|
frame_index: int,
|
||||||
|
timeout_ms: float = SettingsManager.get_settings().BROWSER_SCRAPING_BUILDING_ELEMENT_TREE_TIMEOUT_MS,
|
||||||
) -> tuple[list[dict], list[dict]]:
|
) -> tuple[list[dict], list[dict]]:
|
||||||
js_script = "async ([frame_name, frame_index]) => await buildTreeFromBody(frame_name, frame_index)"
|
js_script = "async ([frame_name, frame_index]) => await buildTreeFromBody(frame_name, frame_index)"
|
||||||
return await self.evaluate(
|
return await self.evaluate(
|
||||||
@@ -455,7 +462,9 @@ class SkyvernFrame:
|
|||||||
|
|
||||||
@TraceManager.traced_async()
|
@TraceManager.traced_async()
|
||||||
async def get_incremental_element_tree(
|
async def get_incremental_element_tree(
|
||||||
self, wait_until_finished: bool = True, timeout_ms: float = BUILDING_ELEMENT_TREE_TIMEOUT_MS
|
self,
|
||||||
|
wait_until_finished: bool = True,
|
||||||
|
timeout_ms: float = SettingsManager.get_settings().BROWSER_SCRAPING_BUILDING_ELEMENT_TREE_TIMEOUT_MS,
|
||||||
) -> tuple[list[dict], list[dict]]:
|
) -> tuple[list[dict], list[dict]]:
|
||||||
js_script = "async ([wait_until_finished]) => await getIncrementElements(wait_until_finished)"
|
js_script = "async ([wait_until_finished]) => await getIncrementElements(wait_until_finished)"
|
||||||
return await self.evaluate(
|
return await self.evaluate(
|
||||||
|
|||||||
Reference in New Issue
Block a user