Fix page-level SCROLL: preserve scroll position for T&C buttons (#SKY-7924) (#4772)

This commit is contained in:
pedrohsdb
2026-02-17 16:57:11 -08:00
committed by GitHub
parent a71493bbad
commit 03a1f9d5ba
3 changed files with 120 additions and 16 deletions

View File

@@ -1326,8 +1326,27 @@ class ForgeAgent:
digit=current_text, digit=current_text,
) )
# Skip sleep and post-action artifacts for page-level SCROLL to preserve
# scroll-driven JS state. Many pages enable buttons only while scrolled to
# bottom (e.g. T&C "Agree" buttons) and re-disable them after any delay or
# programmatic scroll. Sub-container scrolls (strategies 1 & 2) don't affect
# page position, so they keep normal sleep and artifact recording.
is_page_level_scroll = action.action_type == ActionType.SCROLL and any(
r.success and isinstance(r.data, dict) and r.data.get("page_level_scroll") for r in results
)
if is_page_level_scroll:
wait_time = 0.0
await asyncio.sleep(wait_time) await asyncio.sleep(wait_time)
await self.record_artifacts_after_action(task, step, browser_state, engine, action) if not is_page_level_scroll:
await self.record_artifacts_after_action(task, step, browser_state, engine, action)
else:
LOG.info(
"Skipping post-action artifacts for page-level scroll",
step_order=step.order,
step_retry=step.retry_index,
action_idx=action_idx,
)
for result in results: for result in results:
result.step_retry_number = step.retry_index result.step_retry_number = step.retry_index
result.step_order = step.order result.step_order = step.order

View File

@@ -726,13 +726,29 @@ async def handle_click_action(
) )
return [ActionFailure(InteractWithDisabledElement(skyvern_element.get_id()))] return [ActionFailure(InteractWithDisabledElement(skyvern_element.get_id()))]
try: # Skip scroll_into_view when a page-level SCROLL just completed on THIS element.
await skyvern_element.scroll_into_view() # The scroll positioned the page at the bottom to enable T&C buttons;
except Exception: # scroll_into_view() would use programmatic window.scroll() to center the
# element, moving the page away from the bottom and re-disabling the button.
# Uses element ID matching (not a boolean) so unrelated clicks aren't affected.
skip_scroll_into_view = await page.evaluate(
"(id) => { const v = window.__skyvernPageScrolledElementId;"
" window.__skyvernPageScrolledElementId = null; return v === id; }",
action.element_id,
)
if skip_scroll_into_view:
LOG.info( LOG.info(
"Failed to scroll into view, ignore it and continue executing", "Skipping scroll_into_view after page-level scroll to preserve scroll position",
element_id=skyvern_element.get_id(), element_id=skyvern_element.get_id(),
) )
else:
try:
await skyvern_element.scroll_into_view()
except Exception:
LOG.info(
"Failed to scroll into view, ignore it and continue executing",
element_id=skyvern_element.get_id(),
)
if action.download: if action.download:
results = await handle_click_to_download_file_action(action, page, scraped_page, task, step) results = await handle_click_to_download_file_action(action, page, scraped_page, task, step)
@@ -2189,13 +2205,15 @@ async def handle_scroll_action(
# Element-based scrolling from extract-action prompt. Uses # Element-based scrolling from extract-action prompt. Uses
# scrollNearestScrollableContainer() from domUtils.js which walks the DOM to find # scrollNearestScrollableContainer() from domUtils.js which walks the DOM to find
# the nearest scrollable ancestor or sibling container relative to the element. # the nearest scrollable ancestor or sibling container relative to the element.
# Returns: truthy value if scrolled (true for sub-container, "page" for page-level),
# false if nothing was scrollable.
scroll_direction = "down" if action.scroll_y >= 0 else "up" scroll_direction = "down" if action.scroll_y >= 0 else "up"
scrolled = False scroll_result = False
dom = DomUtil(scraped_page=scraped_page, page=page) dom = DomUtil(scraped_page=scraped_page, page=page)
skyvern_element = await dom.safe_get_skyvern_element_by_id(action.element_id) skyvern_element = await dom.safe_get_skyvern_element_by_id(action.element_id)
if skyvern_element: if skyvern_element:
try: try:
scrolled = await skyvern_element.locator.evaluate( scroll_result = await skyvern_element.locator.evaluate(
"(el, direction) => scrollNearestScrollableContainer(el, direction)", "(el, direction) => scrollNearestScrollableContainer(el, direction)",
scroll_direction, scroll_direction,
) )
@@ -2207,7 +2225,58 @@ async def handle_scroll_action(
) )
else: else:
LOG.warning("Could not resolve element for scroll action", element_id=action.element_id) LOG.warning("Could not resolve element for scroll action", element_id=action.element_id)
if not scrolled:
if scroll_result == "page":
# No scrollable sub-container found, but the page itself is scrollable.
# Use incremental mouse.wheel events at the center of the viewport to
# simulate natural user scrolling. This fires native wheel/scroll events
# that page JavaScript (IntersectionObserver, scroll listeners, etc.) can
# detect — unlike programmatic window.scrollTo() or keyboard shortcuts
# which many pages ignore.
LOG.info(
"Page-level scroll, using mouse wheel at viewport center",
element_id=action.element_id,
direction=scroll_direction,
)
viewport = page.viewport_size
center_x = viewport["width"] // 2 if viewport else 640
center_y = viewport["height"] // 2 if viewport else 360
await page.mouse.move(center_x, center_y)
wheel_delta = 500 if scroll_direction == "down" else -500
# Dynamically compute iterations based on remaining scrollable distance
# so we reach the bottom even on very long T&C pages.
scroll_info = await page.evaluate(
"() => ({ scrollHeight: document.documentElement.scrollHeight,"
" scrollTop: window.pageYOffset, innerHeight: window.innerHeight })"
)
if scroll_direction == "down":
remaining = scroll_info["scrollHeight"] - scroll_info["scrollTop"] - scroll_info["innerHeight"]
else:
remaining = scroll_info["scrollTop"]
iterations = max(1, min(int(remaining / abs(wheel_delta)) + 1, 50))
LOG.info(
"Page-level scroll iterations",
remaining_px=remaining,
iterations=iterations,
wheel_delta=wheel_delta,
)
for _ in range(iterations):
await page.mouse.wheel(0, wheel_delta)
await page.wait_for_timeout(100)
# Wait for page JS to process scroll events (e.g. enabling buttons)
await page.wait_for_timeout(500)
# Record which element was just page-level scrolled. The click handler
# checks this to skip scroll_into_view() for the SAME element, which
# would use programmatic window.scroll() to center it — undoing the
# scroll position that enables buttons on T&C pages. Using the element
# ID (not a boolean) ensures unrelated clicks aren't affected.
await page.evaluate(
"(id) => { window.__skyvernPageScrolledElementId = id; }",
action.element_id,
)
return [ActionSuccess(data={"page_level_scroll": True})]
elif not scroll_result:
LOG.warning( LOG.warning(
"Could not find scrollable container near element, falling back to mouse wheel", "Could not find scrollable container near element, falling back to mouse wheel",
element_id=action.element_id, element_id=action.element_id,

View File

@@ -2328,10 +2328,12 @@ function isWindowScrollable() {
/** /**
* Find the nearest scrollable container relative to the given element and scroll it. * Find the nearest scrollable container relative to the given element and scroll it.
* Two strategies: * Three strategies in priority order:
* 1) Walk up from element to find a scrollable ancestor (element is inside container) * 1) Walk up from element to find a scrollable ancestor (element is inside container)
* 2) Walk up the DOM checking siblings at each level (element is beside container) * 2) Walk up the DOM checking siblings at each level (element is beside container)
* Returns true if a scrollable container was found and scrolled, false otherwise. * 3) Fall back to page-level scrolling (for pages where the body itself scrolls,
* e.g. T&C pages with no scrollable sub-container)
* Returns: true (sub-container scrolled), "page" (page-level scroll needed), or false (nothing scrollable).
*/ */
function scrollNearestScrollableContainer(element, direction) { function scrollNearestScrollableContainer(element, direction) {
function isContainerScrollable(node) { function isContainerScrollable(node) {
@@ -2367,13 +2369,27 @@ function scrollNearestScrollableContainer(element, direction) {
} }
} }
if (!target) return false; // Scroll the sub-container if found
if (direction === "down") { if (target) {
target.scrollTop = target.scrollHeight; if (direction === "down") {
} else { target.scrollTop = target.scrollHeight;
target.scrollTop = 0; } else {
target.scrollTop = 0;
}
return true;
} }
return true;
// Strategy 3: fall back to page-level scrolling when no sub-container exists.
// Many pages (e.g. T&C agreements) render content inline with the page body
// as the only scrollable area. Return "page" to signal the Python handler to
// use mouse.wheel events for native browser scrolling, which reliably triggers
// page JavaScript (IntersectionObserver, scroll listeners) that programmatic
// scrollTo/scrollTop methods may not.
if (isWindowScrollable()) {
return "page";
}
return false;
} }
function scrollToElementBottom(element, page_by_page = false) { function scrollToElementBottom(element, page_by_page = false) {