Fix page-level SCROLL: preserve scroll position for T&C buttons (#SKY-7924) (#4772)
This commit is contained in:
@@ -1326,8 +1326,27 @@ class ForgeAgent:
|
|||||||
digit=current_text,
|
digit=current_text,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Skip sleep and post-action artifacts for page-level SCROLL to preserve
|
||||||
|
# scroll-driven JS state. Many pages enable buttons only while scrolled to
|
||||||
|
# bottom (e.g. T&C "Agree" buttons) and re-disable them after any delay or
|
||||||
|
# programmatic scroll. Sub-container scrolls (strategies 1 & 2) don't affect
|
||||||
|
# page position, so they keep normal sleep and artifact recording.
|
||||||
|
is_page_level_scroll = action.action_type == ActionType.SCROLL and any(
|
||||||
|
r.success and isinstance(r.data, dict) and r.data.get("page_level_scroll") for r in results
|
||||||
|
)
|
||||||
|
if is_page_level_scroll:
|
||||||
|
wait_time = 0.0
|
||||||
|
|
||||||
await asyncio.sleep(wait_time)
|
await asyncio.sleep(wait_time)
|
||||||
await self.record_artifacts_after_action(task, step, browser_state, engine, action)
|
if not is_page_level_scroll:
|
||||||
|
await self.record_artifacts_after_action(task, step, browser_state, engine, action)
|
||||||
|
else:
|
||||||
|
LOG.info(
|
||||||
|
"Skipping post-action artifacts for page-level scroll",
|
||||||
|
step_order=step.order,
|
||||||
|
step_retry=step.retry_index,
|
||||||
|
action_idx=action_idx,
|
||||||
|
)
|
||||||
for result in results:
|
for result in results:
|
||||||
result.step_retry_number = step.retry_index
|
result.step_retry_number = step.retry_index
|
||||||
result.step_order = step.order
|
result.step_order = step.order
|
||||||
|
|||||||
@@ -726,13 +726,29 @@ async def handle_click_action(
|
|||||||
)
|
)
|
||||||
return [ActionFailure(InteractWithDisabledElement(skyvern_element.get_id()))]
|
return [ActionFailure(InteractWithDisabledElement(skyvern_element.get_id()))]
|
||||||
|
|
||||||
try:
|
# Skip scroll_into_view when a page-level SCROLL just completed on THIS element.
|
||||||
await skyvern_element.scroll_into_view()
|
# The scroll positioned the page at the bottom to enable T&C buttons;
|
||||||
except Exception:
|
# scroll_into_view() would use programmatic window.scroll() to center the
|
||||||
|
# element, moving the page away from the bottom and re-disabling the button.
|
||||||
|
# Uses element ID matching (not a boolean) so unrelated clicks aren't affected.
|
||||||
|
skip_scroll_into_view = await page.evaluate(
|
||||||
|
"(id) => { const v = window.__skyvernPageScrolledElementId;"
|
||||||
|
" window.__skyvernPageScrolledElementId = null; return v === id; }",
|
||||||
|
action.element_id,
|
||||||
|
)
|
||||||
|
if skip_scroll_into_view:
|
||||||
LOG.info(
|
LOG.info(
|
||||||
"Failed to scroll into view, ignore it and continue executing",
|
"Skipping scroll_into_view after page-level scroll to preserve scroll position",
|
||||||
element_id=skyvern_element.get_id(),
|
element_id=skyvern_element.get_id(),
|
||||||
)
|
)
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
await skyvern_element.scroll_into_view()
|
||||||
|
except Exception:
|
||||||
|
LOG.info(
|
||||||
|
"Failed to scroll into view, ignore it and continue executing",
|
||||||
|
element_id=skyvern_element.get_id(),
|
||||||
|
)
|
||||||
|
|
||||||
if action.download:
|
if action.download:
|
||||||
results = await handle_click_to_download_file_action(action, page, scraped_page, task, step)
|
results = await handle_click_to_download_file_action(action, page, scraped_page, task, step)
|
||||||
@@ -2189,13 +2205,15 @@ async def handle_scroll_action(
|
|||||||
# Element-based scrolling from extract-action prompt. Uses
|
# Element-based scrolling from extract-action prompt. Uses
|
||||||
# scrollNearestScrollableContainer() from domUtils.js which walks the DOM to find
|
# scrollNearestScrollableContainer() from domUtils.js which walks the DOM to find
|
||||||
# the nearest scrollable ancestor or sibling container relative to the element.
|
# the nearest scrollable ancestor or sibling container relative to the element.
|
||||||
|
# Returns: truthy value if scrolled (true for sub-container, "page" for page-level),
|
||||||
|
# false if nothing was scrollable.
|
||||||
scroll_direction = "down" if action.scroll_y >= 0 else "up"
|
scroll_direction = "down" if action.scroll_y >= 0 else "up"
|
||||||
scrolled = False
|
scroll_result = False
|
||||||
dom = DomUtil(scraped_page=scraped_page, page=page)
|
dom = DomUtil(scraped_page=scraped_page, page=page)
|
||||||
skyvern_element = await dom.safe_get_skyvern_element_by_id(action.element_id)
|
skyvern_element = await dom.safe_get_skyvern_element_by_id(action.element_id)
|
||||||
if skyvern_element:
|
if skyvern_element:
|
||||||
try:
|
try:
|
||||||
scrolled = await skyvern_element.locator.evaluate(
|
scroll_result = await skyvern_element.locator.evaluate(
|
||||||
"(el, direction) => scrollNearestScrollableContainer(el, direction)",
|
"(el, direction) => scrollNearestScrollableContainer(el, direction)",
|
||||||
scroll_direction,
|
scroll_direction,
|
||||||
)
|
)
|
||||||
@@ -2207,7 +2225,58 @@ async def handle_scroll_action(
|
|||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
LOG.warning("Could not resolve element for scroll action", element_id=action.element_id)
|
LOG.warning("Could not resolve element for scroll action", element_id=action.element_id)
|
||||||
if not scrolled:
|
|
||||||
|
if scroll_result == "page":
|
||||||
|
# No scrollable sub-container found, but the page itself is scrollable.
|
||||||
|
# Use incremental mouse.wheel events at the center of the viewport to
|
||||||
|
# simulate natural user scrolling. This fires native wheel/scroll events
|
||||||
|
# that page JavaScript (IntersectionObserver, scroll listeners, etc.) can
|
||||||
|
# detect — unlike programmatic window.scrollTo() or keyboard shortcuts
|
||||||
|
# which many pages ignore.
|
||||||
|
LOG.info(
|
||||||
|
"Page-level scroll, using mouse wheel at viewport center",
|
||||||
|
element_id=action.element_id,
|
||||||
|
direction=scroll_direction,
|
||||||
|
)
|
||||||
|
viewport = page.viewport_size
|
||||||
|
center_x = viewport["width"] // 2 if viewport else 640
|
||||||
|
center_y = viewport["height"] // 2 if viewport else 360
|
||||||
|
await page.mouse.move(center_x, center_y)
|
||||||
|
wheel_delta = 500 if scroll_direction == "down" else -500
|
||||||
|
# Dynamically compute iterations based on remaining scrollable distance
|
||||||
|
# so we reach the bottom even on very long T&C pages.
|
||||||
|
scroll_info = await page.evaluate(
|
||||||
|
"() => ({ scrollHeight: document.documentElement.scrollHeight,"
|
||||||
|
" scrollTop: window.pageYOffset, innerHeight: window.innerHeight })"
|
||||||
|
)
|
||||||
|
if scroll_direction == "down":
|
||||||
|
remaining = scroll_info["scrollHeight"] - scroll_info["scrollTop"] - scroll_info["innerHeight"]
|
||||||
|
else:
|
||||||
|
remaining = scroll_info["scrollTop"]
|
||||||
|
iterations = max(1, min(int(remaining / abs(wheel_delta)) + 1, 50))
|
||||||
|
LOG.info(
|
||||||
|
"Page-level scroll iterations",
|
||||||
|
remaining_px=remaining,
|
||||||
|
iterations=iterations,
|
||||||
|
wheel_delta=wheel_delta,
|
||||||
|
)
|
||||||
|
for _ in range(iterations):
|
||||||
|
await page.mouse.wheel(0, wheel_delta)
|
||||||
|
await page.wait_for_timeout(100)
|
||||||
|
# Wait for page JS to process scroll events (e.g. enabling buttons)
|
||||||
|
await page.wait_for_timeout(500)
|
||||||
|
|
||||||
|
# Record which element was just page-level scrolled. The click handler
|
||||||
|
# checks this to skip scroll_into_view() for the SAME element, which
|
||||||
|
# would use programmatic window.scroll() to center it — undoing the
|
||||||
|
# scroll position that enables buttons on T&C pages. Using the element
|
||||||
|
# ID (not a boolean) ensures unrelated clicks aren't affected.
|
||||||
|
await page.evaluate(
|
||||||
|
"(id) => { window.__skyvernPageScrolledElementId = id; }",
|
||||||
|
action.element_id,
|
||||||
|
)
|
||||||
|
return [ActionSuccess(data={"page_level_scroll": True})]
|
||||||
|
elif not scroll_result:
|
||||||
LOG.warning(
|
LOG.warning(
|
||||||
"Could not find scrollable container near element, falling back to mouse wheel",
|
"Could not find scrollable container near element, falling back to mouse wheel",
|
||||||
element_id=action.element_id,
|
element_id=action.element_id,
|
||||||
|
|||||||
@@ -2328,10 +2328,12 @@ function isWindowScrollable() {
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* Find the nearest scrollable container relative to the given element and scroll it.
|
* Find the nearest scrollable container relative to the given element and scroll it.
|
||||||
* Two strategies:
|
* Three strategies in priority order:
|
||||||
* 1) Walk up from element to find a scrollable ancestor (element is inside container)
|
* 1) Walk up from element to find a scrollable ancestor (element is inside container)
|
||||||
* 2) Walk up the DOM checking siblings at each level (element is beside container)
|
* 2) Walk up the DOM checking siblings at each level (element is beside container)
|
||||||
* Returns true if a scrollable container was found and scrolled, false otherwise.
|
* 3) Fall back to page-level scrolling (for pages where the body itself scrolls,
|
||||||
|
* e.g. T&C pages with no scrollable sub-container)
|
||||||
|
* Returns: true (sub-container scrolled), "page" (page-level scroll needed), or false (nothing scrollable).
|
||||||
*/
|
*/
|
||||||
function scrollNearestScrollableContainer(element, direction) {
|
function scrollNearestScrollableContainer(element, direction) {
|
||||||
function isContainerScrollable(node) {
|
function isContainerScrollable(node) {
|
||||||
@@ -2367,13 +2369,27 @@ function scrollNearestScrollableContainer(element, direction) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!target) return false;
|
// Scroll the sub-container if found
|
||||||
if (direction === "down") {
|
if (target) {
|
||||||
target.scrollTop = target.scrollHeight;
|
if (direction === "down") {
|
||||||
} else {
|
target.scrollTop = target.scrollHeight;
|
||||||
target.scrollTop = 0;
|
} else {
|
||||||
|
target.scrollTop = 0;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
return true;
|
|
||||||
|
// Strategy 3: fall back to page-level scrolling when no sub-container exists.
|
||||||
|
// Many pages (e.g. T&C agreements) render content inline with the page body
|
||||||
|
// as the only scrollable area. Return "page" to signal the Python handler to
|
||||||
|
// use mouse.wheel events for native browser scrolling, which reliably triggers
|
||||||
|
// page JavaScript (IntersectionObserver, scroll listeners) that programmatic
|
||||||
|
// scrollTo/scrollTop methods may not.
|
||||||
|
if (isWindowScrollable()) {
|
||||||
|
return "page";
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
function scrollToElementBottom(element, page_by_page = false) {
|
function scrollToElementBottom(element, page_by_page = false) {
|
||||||
|
|||||||
Reference in New Issue
Block a user