fallback get incremental tree without waiting (#2118)

This commit is contained in:
Shuchang Zheng
2025-04-09 11:36:27 -07:00
committed by GitHub
parent c11d0f63eb
commit 03d5383a42
2 changed files with 23 additions and 9 deletions

View File

@@ -2344,12 +2344,14 @@ async function stopGlobalIncrementalObserver() {
window.globalDomDepthMap = new Map();
}
async function getIncrementElements() {
while (
(await window.globalParsedElementCounter.get()) <
window.globalOneTimeIncrementElements.length
) {
await asyncSleepFor(100);
async function getIncrementElements(wait_until_finished = true) {
if (wait_until_finished) {
while (
(await window.globalParsedElementCounter.get()) <
window.globalOneTimeIncrementElements.length
) {
await asyncSleepFor(100);
}
}
// cleanup the chidren tree, remove the duplicated element

View File

@@ -6,6 +6,7 @@ from enum import StrEnum
from typing import Any, Awaitable, Callable, Self
import structlog
from playwright._impl._errors import TimeoutError
from playwright.async_api import Frame, Locator, Page
from pydantic import BaseModel, PrivateAttr
@@ -675,9 +676,20 @@ class IncrementalScrapePage:
frame = self.skyvern_frame.get_frame()
js_script = "async () => await getIncrementElements()"
incremental_elements, incremental_tree = await SkyvernFrame.evaluate(
frame=frame, expression=js_script, timeout_ms=BUILDING_ELEMENT_TREE_TIMEOUT_MS
)
try:
incremental_elements, incremental_tree = await SkyvernFrame.evaluate(
frame=frame, expression=js_script, timeout_ms=BUILDING_ELEMENT_TREE_TIMEOUT_MS
)
except TimeoutError:
LOG.warning(
"Timeout to get incremental elements with wait_until_finished, going to get incremental elements without waiting",
)
js_script = "async () => await getIncrementElements(false)"
incremental_elements, incremental_tree = await SkyvernFrame.evaluate(
frame=frame, expression=js_script, timeout_ms=BUILDING_ELEMENT_TREE_TIMEOUT_MS
)
# we listen the incremental elements seperated by frames, so all elements will be in the same SkyvernFrame
self.id_to_css_dict, self.id_to_element_dict, _, _, _ = build_element_dict(incremental_elements)