stop building element tree again and again when drawing boudingbox (#3191)

This commit is contained in:
LawyZheng
2025-08-15 01:40:39 +08:00
committed by GitHub
parent 6bc499d8cd
commit 04fd540cd5
2 changed files with 31 additions and 1 deletions

View File

@@ -92,6 +92,7 @@ class Rect {
}
class DomUtils {
static elementListCache = [];
static visibleClientRectCache = new WeakMap();
//
// Bounds the rect by the current viewport dimensions. If the rect is offscreen or has a height or
@@ -877,7 +878,7 @@ function isInteractable(element, hoverStylesMap) {
return true;
}
const className = element.className.toString();
const className = element.className?.toString() ?? "";
if (tagName === "div" || tagName === "span") {
if (hasAngularClickBinding(element)) {
@@ -1784,6 +1785,7 @@ async function buildElementTree(
trimDuplicatedText(root);
});
DomUtils.elementListCache = elements;
return [elements, resultArray];
}
@@ -1804,6 +1806,11 @@ async function buildElementsAndDrawBoundingBoxes(
frame = "main.frame",
frame_index = undefined,
) {
if (DomUtils.elementListCache.length > 0) {
drawBoundingBoxes(DomUtils.elementListCache);
return;
}
_jsConsoleWarn("no element list cache, drawBoundingBoxes from scratch");
var elementsAndResultArray = await buildTreeFromBody(frame, frame_index);
drawBoundingBoxes(elementsAndResultArray[0]);
}
@@ -2065,6 +2072,13 @@ async function safeScrollToTop(
return window.scrollY;
}
function getScrollWidthAndHeight() {
return [
document.documentElement.scrollWidth,
document.documentElement.scrollHeight,
];
}
function getScrollXY() {
return [window.scrollX, window.scrollY];
}

View File

@@ -122,11 +122,23 @@ async def _scrolling_screenshots_helper(
positions: list[int] = []
if await skyvern_page.is_window_scrollable():
scroll_y_px_old = -30.0
_, initial_scroll_height = await skyvern_page.get_scroll_width_and_height()
scroll_y_px = await skyvern_page.scroll_to_top(draw_boxes=draw_boxes, frame=frame, frame_index=frame_index)
# Checking max number of screenshots to prevent infinite loop
# We are checking the difference between the old and new scroll_y_px to determine if we have reached the end of the
# page. If the difference is less than 25, we assume we have reached the end of the page.
while abs(scroll_y_px_old - scroll_y_px) > 25 and len(screenshots) < max_number:
# check if the scroll height changed, if so, rebuild the element tree
_, scroll_height = await skyvern_page.get_scroll_width_and_height()
if scroll_height != initial_scroll_height:
LOG.warning(
"Scroll height changed, rebuild the element tree",
scroll_height=scroll_height,
initial_scroll_height=initial_scroll_height,
)
await skyvern_page.build_tree_from_body(frame_name=frame, frame_index=frame_index)
initial_scroll_height = scroll_height
screenshot = await _current_viewpoint_screenshot_helper(page=skyvern_page.frame, mode=mode)
screenshots.append(screenshot)
positions.append(int(scroll_y_px))
@@ -347,6 +359,10 @@ class SkyvernFrame:
js_script = "() => getScrollXY()"
return await self.evaluate(frame=self.frame, expression=js_script)
async def get_scroll_width_and_height(self) -> tuple[int, int]:
js_script = "() => getScrollWidthAndHeight()"
return await self.evaluate(frame=self.frame, expression=js_script)
async def scroll_to_x_y(self, x: int, y: int) -> None:
js_script = "([x, y]) => scrollToXY(x, y)"
return await self.evaluate(frame=self.frame, expression=js_script, arg=[x, y])