diff --git a/skyvern/webeye/scraper/domUtils.js b/skyvern/webeye/scraper/domUtils.js index 42236a43..fad968be 100644 --- a/skyvern/webeye/scraper/domUtils.js +++ b/skyvern/webeye/scraper/domUtils.js @@ -92,6 +92,7 @@ class Rect { } class DomUtils { + static elementListCache = []; static visibleClientRectCache = new WeakMap(); // // Bounds the rect by the current viewport dimensions. If the rect is offscreen or has a height or @@ -877,7 +878,7 @@ function isInteractable(element, hoverStylesMap) { return true; } - const className = element.className.toString(); + const className = element.className?.toString() ?? ""; if (tagName === "div" || tagName === "span") { if (hasAngularClickBinding(element)) { @@ -1784,6 +1785,7 @@ async function buildElementTree( trimDuplicatedText(root); }); + DomUtils.elementListCache = elements; return [elements, resultArray]; } @@ -1804,6 +1806,11 @@ async function buildElementsAndDrawBoundingBoxes( frame = "main.frame", frame_index = undefined, ) { + if (DomUtils.elementListCache.length > 0) { + drawBoundingBoxes(DomUtils.elementListCache); + return; + } + _jsConsoleWarn("no element list cache, drawBoundingBoxes from scratch"); var elementsAndResultArray = await buildTreeFromBody(frame, frame_index); drawBoundingBoxes(elementsAndResultArray[0]); } @@ -2065,6 +2072,13 @@ async function safeScrollToTop( return window.scrollY; } +function getScrollWidthAndHeight() { + return [ + document.documentElement.scrollWidth, + document.documentElement.scrollHeight, + ]; +} + function getScrollXY() { return [window.scrollX, window.scrollY]; } diff --git a/skyvern/webeye/utils/page.py b/skyvern/webeye/utils/page.py index e107c19e..450ffb3d 100644 --- a/skyvern/webeye/utils/page.py +++ b/skyvern/webeye/utils/page.py @@ -122,11 +122,23 @@ async def _scrolling_screenshots_helper( positions: list[int] = [] if await skyvern_page.is_window_scrollable(): scroll_y_px_old = -30.0 + _, initial_scroll_height = await skyvern_page.get_scroll_width_and_height() scroll_y_px = await skyvern_page.scroll_to_top(draw_boxes=draw_boxes, frame=frame, frame_index=frame_index) # Checking max number of screenshots to prevent infinite loop # We are checking the difference between the old and new scroll_y_px to determine if we have reached the end of the # page. If the difference is less than 25, we assume we have reached the end of the page. while abs(scroll_y_px_old - scroll_y_px) > 25 and len(screenshots) < max_number: + # check if the scroll height changed, if so, rebuild the element tree + _, scroll_height = await skyvern_page.get_scroll_width_and_height() + if scroll_height != initial_scroll_height: + LOG.warning( + "Scroll height changed, rebuild the element tree", + scroll_height=scroll_height, + initial_scroll_height=initial_scroll_height, + ) + await skyvern_page.build_tree_from_body(frame_name=frame, frame_index=frame_index) + initial_scroll_height = scroll_height + screenshot = await _current_viewpoint_screenshot_helper(page=skyvern_page.frame, mode=mode) screenshots.append(screenshot) positions.append(int(scroll_y_px)) @@ -347,6 +359,10 @@ class SkyvernFrame: js_script = "() => getScrollXY()" return await self.evaluate(frame=self.frame, expression=js_script) + async def get_scroll_width_and_height(self) -> tuple[int, int]: + js_script = "() => getScrollWidthAndHeight()" + return await self.evaluate(frame=self.frame, expression=js_script) + async def scroll_to_x_y(self, x: int, y: int) -> None: js_script = "([x, y]) => scrollToXY(x, y)" return await self.evaluate(frame=self.frame, expression=js_script, arg=[x, y])