From 03d5383a427c70dacd43d4b202a1780e82582e71 Mon Sep 17 00:00:00 2001 From: Shuchang Zheng Date: Wed, 9 Apr 2025 11:36:27 -0700 Subject: [PATCH] fallback get incremental tree without waiting (#2118) --- skyvern/webeye/scraper/domUtils.js | 14 ++++++++------ skyvern/webeye/scraper/scraper.py | 18 +++++++++++++++--- 2 files changed, 23 insertions(+), 9 deletions(-) diff --git a/skyvern/webeye/scraper/domUtils.js b/skyvern/webeye/scraper/domUtils.js index 6d8ff8b3..1d0c45ae 100644 --- a/skyvern/webeye/scraper/domUtils.js +++ b/skyvern/webeye/scraper/domUtils.js @@ -2344,12 +2344,14 @@ async function stopGlobalIncrementalObserver() { window.globalDomDepthMap = new Map(); } -async function getIncrementElements() { - while ( - (await window.globalParsedElementCounter.get()) < - window.globalOneTimeIncrementElements.length - ) { - await asyncSleepFor(100); +async function getIncrementElements(wait_until_finished = true) { + if (wait_until_finished) { + while ( + (await window.globalParsedElementCounter.get()) < + window.globalOneTimeIncrementElements.length + ) { + await asyncSleepFor(100); + } } // cleanup the chidren tree, remove the duplicated element diff --git a/skyvern/webeye/scraper/scraper.py b/skyvern/webeye/scraper/scraper.py index 262f8adb..0cee02ca 100644 --- a/skyvern/webeye/scraper/scraper.py +++ b/skyvern/webeye/scraper/scraper.py @@ -6,6 +6,7 @@ from enum import StrEnum from typing import Any, Awaitable, Callable, Self import structlog +from playwright._impl._errors import TimeoutError from playwright.async_api import Frame, Locator, Page from pydantic import BaseModel, PrivateAttr @@ -675,9 +676,20 @@ class IncrementalScrapePage: frame = self.skyvern_frame.get_frame() js_script = "async () => await getIncrementElements()" - incremental_elements, incremental_tree = await SkyvernFrame.evaluate( - frame=frame, expression=js_script, timeout_ms=BUILDING_ELEMENT_TREE_TIMEOUT_MS - ) + try: + incremental_elements, incremental_tree = await SkyvernFrame.evaluate( + frame=frame, expression=js_script, timeout_ms=BUILDING_ELEMENT_TREE_TIMEOUT_MS + ) + except TimeoutError: + LOG.warning( + "Timeout to get incremental elements with wait_until_finished, going to get incremental elements without waiting", + ) + + js_script = "async () => await getIncrementElements(false)" + incremental_elements, incremental_tree = await SkyvernFrame.evaluate( + frame=frame, expression=js_script, timeout_ms=BUILDING_ELEMENT_TREE_TIMEOUT_MS + ) + # we listen the incremental elements seperated by frames, so all elements will be in the same SkyvernFrame self.id_to_css_dict, self.id_to_element_dict, _, _, _ = build_element_dict(incremental_elements)