diff --git a/skyvern/forge/agent.py b/skyvern/forge/agent.py index 6d57e688..b9d74bce 100644 --- a/skyvern/forge/agent.py +++ b/skyvern/forge/agent.py @@ -1326,8 +1326,27 @@ class ForgeAgent: digit=current_text, ) + # Skip sleep and post-action artifacts for page-level SCROLL to preserve + # scroll-driven JS state. Many pages enable buttons only while scrolled to + # bottom (e.g. T&C "Agree" buttons) and re-disable them after any delay or + # programmatic scroll. Sub-container scrolls (strategies 1 & 2) don't affect + # page position, so they keep normal sleep and artifact recording. + is_page_level_scroll = action.action_type == ActionType.SCROLL and any( + r.success and isinstance(r.data, dict) and r.data.get("page_level_scroll") for r in results + ) + if is_page_level_scroll: + wait_time = 0.0 + await asyncio.sleep(wait_time) - await self.record_artifacts_after_action(task, step, browser_state, engine, action) + if not is_page_level_scroll: + await self.record_artifacts_after_action(task, step, browser_state, engine, action) + else: + LOG.info( + "Skipping post-action artifacts for page-level scroll", + step_order=step.order, + step_retry=step.retry_index, + action_idx=action_idx, + ) for result in results: result.step_retry_number = step.retry_index result.step_order = step.order diff --git a/skyvern/webeye/actions/handler.py b/skyvern/webeye/actions/handler.py index cede8278..2f9f1083 100644 --- a/skyvern/webeye/actions/handler.py +++ b/skyvern/webeye/actions/handler.py @@ -726,13 +726,29 @@ async def handle_click_action( ) return [ActionFailure(InteractWithDisabledElement(skyvern_element.get_id()))] - try: - await skyvern_element.scroll_into_view() - except Exception: + # Skip scroll_into_view when a page-level SCROLL just completed on THIS element. + # The scroll positioned the page at the bottom to enable T&C buttons; + # scroll_into_view() would use programmatic window.scroll() to center the + # element, moving the page away from the bottom and re-disabling the button. + # Uses element ID matching (not a boolean) so unrelated clicks aren't affected. + skip_scroll_into_view = await page.evaluate( + "(id) => { const v = window.__skyvernPageScrolledElementId;" + " window.__skyvernPageScrolledElementId = null; return v === id; }", + action.element_id, + ) + if skip_scroll_into_view: LOG.info( - "Failed to scroll into view, ignore it and continue executing", + "Skipping scroll_into_view after page-level scroll to preserve scroll position", element_id=skyvern_element.get_id(), ) + else: + try: + await skyvern_element.scroll_into_view() + except Exception: + LOG.info( + "Failed to scroll into view, ignore it and continue executing", + element_id=skyvern_element.get_id(), + ) if action.download: results = await handle_click_to_download_file_action(action, page, scraped_page, task, step) @@ -2189,13 +2205,15 @@ async def handle_scroll_action( # Element-based scrolling from extract-action prompt. Uses # scrollNearestScrollableContainer() from domUtils.js which walks the DOM to find # the nearest scrollable ancestor or sibling container relative to the element. + # Returns: truthy value if scrolled (true for sub-container, "page" for page-level), + # false if nothing was scrollable. scroll_direction = "down" if action.scroll_y >= 0 else "up" - scrolled = False + scroll_result = False dom = DomUtil(scraped_page=scraped_page, page=page) skyvern_element = await dom.safe_get_skyvern_element_by_id(action.element_id) if skyvern_element: try: - scrolled = await skyvern_element.locator.evaluate( + scroll_result = await skyvern_element.locator.evaluate( "(el, direction) => scrollNearestScrollableContainer(el, direction)", scroll_direction, ) @@ -2207,7 +2225,58 @@ async def handle_scroll_action( ) else: LOG.warning("Could not resolve element for scroll action", element_id=action.element_id) - if not scrolled: + + if scroll_result == "page": + # No scrollable sub-container found, but the page itself is scrollable. + # Use incremental mouse.wheel events at the center of the viewport to + # simulate natural user scrolling. This fires native wheel/scroll events + # that page JavaScript (IntersectionObserver, scroll listeners, etc.) can + # detect — unlike programmatic window.scrollTo() or keyboard shortcuts + # which many pages ignore. + LOG.info( + "Page-level scroll, using mouse wheel at viewport center", + element_id=action.element_id, + direction=scroll_direction, + ) + viewport = page.viewport_size + center_x = viewport["width"] // 2 if viewport else 640 + center_y = viewport["height"] // 2 if viewport else 360 + await page.mouse.move(center_x, center_y) + wheel_delta = 500 if scroll_direction == "down" else -500 + # Dynamically compute iterations based on remaining scrollable distance + # so we reach the bottom even on very long T&C pages. + scroll_info = await page.evaluate( + "() => ({ scrollHeight: document.documentElement.scrollHeight," + " scrollTop: window.pageYOffset, innerHeight: window.innerHeight })" + ) + if scroll_direction == "down": + remaining = scroll_info["scrollHeight"] - scroll_info["scrollTop"] - scroll_info["innerHeight"] + else: + remaining = scroll_info["scrollTop"] + iterations = max(1, min(int(remaining / abs(wheel_delta)) + 1, 50)) + LOG.info( + "Page-level scroll iterations", + remaining_px=remaining, + iterations=iterations, + wheel_delta=wheel_delta, + ) + for _ in range(iterations): + await page.mouse.wheel(0, wheel_delta) + await page.wait_for_timeout(100) + # Wait for page JS to process scroll events (e.g. enabling buttons) + await page.wait_for_timeout(500) + + # Record which element was just page-level scrolled. The click handler + # checks this to skip scroll_into_view() for the SAME element, which + # would use programmatic window.scroll() to center it — undoing the + # scroll position that enables buttons on T&C pages. Using the element + # ID (not a boolean) ensures unrelated clicks aren't affected. + await page.evaluate( + "(id) => { window.__skyvernPageScrolledElementId = id; }", + action.element_id, + ) + return [ActionSuccess(data={"page_level_scroll": True})] + elif not scroll_result: LOG.warning( "Could not find scrollable container near element, falling back to mouse wheel", element_id=action.element_id, diff --git a/skyvern/webeye/scraper/domUtils.js b/skyvern/webeye/scraper/domUtils.js index 055fd908..b75d40f1 100644 --- a/skyvern/webeye/scraper/domUtils.js +++ b/skyvern/webeye/scraper/domUtils.js @@ -2328,10 +2328,12 @@ function isWindowScrollable() { /** * Find the nearest scrollable container relative to the given element and scroll it. - * Two strategies: + * Three strategies in priority order: * 1) Walk up from element to find a scrollable ancestor (element is inside container) * 2) Walk up the DOM checking siblings at each level (element is beside container) - * Returns true if a scrollable container was found and scrolled, false otherwise. + * 3) Fall back to page-level scrolling (for pages where the body itself scrolls, + * e.g. T&C pages with no scrollable sub-container) + * Returns: true (sub-container scrolled), "page" (page-level scroll needed), or false (nothing scrollable). */ function scrollNearestScrollableContainer(element, direction) { function isContainerScrollable(node) { @@ -2367,13 +2369,27 @@ function scrollNearestScrollableContainer(element, direction) { } } - if (!target) return false; - if (direction === "down") { - target.scrollTop = target.scrollHeight; - } else { - target.scrollTop = 0; + // Scroll the sub-container if found + if (target) { + if (direction === "down") { + target.scrollTop = target.scrollHeight; + } else { + target.scrollTop = 0; + } + return true; } - return true; + + // Strategy 3: fall back to page-level scrolling when no sub-container exists. + // Many pages (e.g. T&C agreements) render content inline with the page body + // as the only scrollable area. Return "page" to signal the Python handler to + // use mouse.wheel events for native browser scrolling, which reliably triggers + // page JavaScript (IntersectionObserver, scroll listeners) that programmatic + // scrollTo/scrollTop methods may not. + if (isWindowScrollable()) { + return "page"; + } + + return false; } function scrollToElementBottom(element, page_by_page = false) {