From 069597e52e854f14cdd2db92740e18531be37b04 Mon Sep 17 00:00:00 2001 From: LawyZheng Date: Wed, 4 Sep 2024 02:31:04 +0800 Subject: [PATCH] stop scrolling when cant scroll (#771) --- skyvern/webeye/scraper/domUtils.js | 30 ++++++++++++++--- skyvern/webeye/utils/page.py | 54 ++++++++++++++++++++---------- 2 files changed, 63 insertions(+), 21 deletions(-) diff --git a/skyvern/webeye/scraper/domUtils.js b/skyvern/webeye/scraper/domUtils.js index c2624503..d89e2e78 100644 --- a/skyvern/webeye/scraper/domUtils.js +++ b/skyvern/webeye/scraper/domUtils.js @@ -1372,6 +1372,11 @@ function drawBoundingBoxes(elements) { addHintMarkersToPage(hintMarkers); } +function buildElementsAndDrawBoundingBoxes() { + var elementsAndResultArray = buildTreeFromBody(); + drawBoundingBoxes(elementsAndResultArray[0]); +} + function captchaSolvedCallback() { console.log("captcha solved"); if (!window["captchaSolvedCounter"]) { @@ -1556,8 +1561,7 @@ function scrollToTop(draw_boxes) { removeBoundingBoxes(); window.scroll({ left: 0, top: 0, behavior: "instant" }); if (draw_boxes) { - var elementsAndResultArray = buildTreeFromBody(); - drawBoundingBoxes(elementsAndResultArray[0]); + buildElementsAndDrawBoundingBoxes(); } return window.scrollY; } @@ -1572,12 +1576,30 @@ function scrollToNextPage(draw_boxes) { behavior: "instant", }); if (draw_boxes) { - var elementsAndResultArray = buildTreeFromBody(); - drawBoundingBoxes(elementsAndResultArray[0]); + buildElementsAndDrawBoundingBoxes(); } return window.scrollY; } +function isWindowScrollable() { + // Check if the body's overflow style is set to hidden + const bodyOverflow = window.getComputedStyle(document.body).overflow; + const htmlOverflow = window.getComputedStyle( + document.documentElement, + ).overflow; + + // Check if the document height is greater than the window height + const isScrollable = + document.documentElement.scrollHeight > window.innerHeight; + + // If the overflow is set to 'hidden' or there is no content to scroll, return false + if (bodyOverflow === "hidden" || htmlOverflow === "hidden" || !isScrollable) { + return false; + } + + return true; +} + function scrollToElementBottom(element, page_by_page = false) { const top = page_by_page ? element.clientHeight + element.scrollTop diff --git a/skyvern/webeye/utils/page.py b/skyvern/webeye/utils/page.py index fbfbb889..5ed9d92a 100644 --- a/skyvern/webeye/utils/page.py +++ b/skyvern/webeye/utils/page.py @@ -92,25 +92,37 @@ class SkyvernFrame: assert isinstance(skyvern_page.frame, Page) screenshots: List[bytes] = [] - scroll_y_px_old = -30.0 - scroll_y_px = await skyvern_page.scroll_to_top(draw_boxes=draw_boxes) - # Checking max number of screenshots to prevent infinite loop - # We are checking the difference between the old and new scroll_y_px to determine if we have reached the end of the - # page. If the difference is less than 25, we assume we have reached the end of the page. - while abs(scroll_y_px_old - scroll_y_px) > 25 and len(screenshots) < max_number: + if await skyvern_page.is_window_scrollable(): + scroll_y_px_old = -30.0 + scroll_y_px = await skyvern_page.scroll_to_top(draw_boxes=draw_boxes) + # Checking max number of screenshots to prevent infinite loop + # We are checking the difference between the old and new scroll_y_px to determine if we have reached the end of the + # page. If the difference is less than 25, we assume we have reached the end of the page. + while abs(scroll_y_px_old - scroll_y_px) > 25 and len(screenshots) < max_number: + screenshot = await SkyvernFrame.take_screenshot(page=skyvern_page.frame, full_page=False) + screenshots.append(screenshot) + scroll_y_px_old = scroll_y_px + LOG.debug("Scrolling to next page", url=url, num_screenshots=len(screenshots)) + scroll_y_px = await skyvern_page.scroll_to_next_page(draw_boxes=draw_boxes) + LOG.debug( + "Scrolled to next page", + scroll_y_px=scroll_y_px, + scroll_y_px_old=scroll_y_px_old, + ) + if draw_boxes: + await skyvern_page.remove_bounding_boxes() + await skyvern_page.scroll_to_top(draw_boxes=False) + else: + if draw_boxes: + await skyvern_page.build_elements_and_draw_bounding_boxes() + + LOG.debug("Page is not scrollable", url=url, num_screenshots=len(screenshots)) screenshot = await SkyvernFrame.take_screenshot(page=skyvern_page.frame, full_page=False) screenshots.append(screenshot) - scroll_y_px_old = scroll_y_px - LOG.debug("Scrolling to next page", url=url, num_screenshots=len(screenshots)) - scroll_y_px = await skyvern_page.scroll_to_next_page(draw_boxes=draw_boxes) - LOG.debug( - "Scrolled to next page", - scroll_y_px=scroll_y_px, - scroll_y_px_old=scroll_y_px_old, - ) - if draw_boxes: - await skyvern_page.remove_bounding_boxes() - await skyvern_page.scroll_to_top(draw_boxes=False) + + if draw_boxes: + await skyvern_page.remove_bounding_boxes() + return screenshots @staticmethod @@ -205,3 +217,11 @@ class SkyvernFrame: """ js_script = "() => removeBoundingBoxes()" await self.frame.evaluate(js_script) + + async def build_elements_and_draw_bounding_boxes(self) -> None: + js_script = "() => buildElementsAndDrawBoundingBoxes()" + await self.frame.evaluate(js_script) + + async def is_window_scrollable(self) -> bool: + js_script = "() => isWindowScrollable()" + return await self.frame.evaluate(js_script)