feat: add hover action support (#3994)
Co-authored-by: LawyZheng <lawyzheng1106@gmail.com>
This commit is contained in:
@@ -411,6 +411,44 @@ function hasASPClientControl() {
|
||||
return typeof ASPxClientControl !== "undefined";
|
||||
}
|
||||
|
||||
// Check if element is only visible on hover (e.g., hover-only buttons)
|
||||
function isHoverOnlyElement(element) {
|
||||
// Check for common hover-only patterns in class names
|
||||
const className = element.className?.toString() ?? "";
|
||||
const parentClassName = element.parentElement?.className?.toString() ?? "";
|
||||
|
||||
// Common hover-only class patterns
|
||||
if (
|
||||
className.includes("hover-") ||
|
||||
className.includes("-hover") ||
|
||||
parentClassName.includes("hover-") ||
|
||||
parentClassName.includes("-hover")
|
||||
) {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Check if parent has hover-related attributes or classes that might reveal this element
|
||||
let parent = element.parentElement;
|
||||
let depth = 0;
|
||||
// Cap recursion to avoid walking the entire tree and bloating prompts
|
||||
const maxDepth = 5;
|
||||
while (parent && parent !== document.body && depth < maxDepth) {
|
||||
const parentClass = parent.className?.toString() ?? "";
|
||||
if (
|
||||
parentClass.includes("hover") ||
|
||||
parentClass.includes("card") ||
|
||||
parentClass.includes("item")
|
||||
) {
|
||||
// This element might be revealed on parent hover
|
||||
return true;
|
||||
}
|
||||
parent = parent.parentElement;
|
||||
depth += 1;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// from playwright: https://github.com/microsoft/playwright/blob/1b65f26f0287c0352e76673bc5f85bc36c934b55/packages/playwright-core/src/server/injected/domUtils.ts#L100-L119
|
||||
// NOTE: According this logic, some elements with aria-hidden won't be considered as invisible. And the result shows they are indeed interactable.
|
||||
function isElementVisible(element) {
|
||||
@@ -450,6 +488,10 @@ function isElementVisible(element) {
|
||||
if (!isElementStyleVisibilityVisible(element, style)) return false;
|
||||
const rect = element.getBoundingClientRect();
|
||||
if (rect.width <= 0 || rect.height <= 0) {
|
||||
// Check if this element might be visible on hover before marking as invisible
|
||||
if (isHoverOnlyElement(element)) {
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -824,7 +866,12 @@ function isInteractable(element, hoverStylesMap) {
|
||||
// https://developer.mozilla.org/en-US/docs/Web/CSS/pointer-events#none
|
||||
const elementPointerEvent = getElementComputedStyle(element)?.pointerEvents;
|
||||
if (elementPointerEvent === "none" && !element.disabled) {
|
||||
return false;
|
||||
// Some CTAs stay hidden until the parent is hovered
|
||||
// When we can infer that the element is revealed on hover, keep it interactable so the agent
|
||||
// has a chance to hover the parent before clicking.
|
||||
if (!isHoverOnlyElement(element)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (isInteractableInput(element, hoverStylesMap)) {
|
||||
@@ -1569,6 +1616,7 @@ async function buildElementObject(
|
||||
frame: frame,
|
||||
frame_index: window.GlobalSkyvernFrameIndex,
|
||||
interactable: interactable,
|
||||
hoverOnly: isHoverOnlyElement(element),
|
||||
tagName: elementTagNameLower,
|
||||
attributes: attrs,
|
||||
beforePseudoText: getPseudoContent(element, "::before"),
|
||||
|
||||
@@ -2,6 +2,7 @@ import asyncio
|
||||
import copy
|
||||
import json
|
||||
from collections import defaultdict
|
||||
from typing import Any
|
||||
|
||||
import structlog
|
||||
from playwright._impl._errors import TimeoutError
|
||||
@@ -92,6 +93,14 @@ def load_js_script() -> str:
|
||||
JS_FUNCTION_DEFS = load_js_script()
|
||||
|
||||
|
||||
# function to convert JSON element to HTML
|
||||
def build_attribute(key: str, value: Any) -> str:
|
||||
if isinstance(value, bool) or isinstance(value, int):
|
||||
return f'{key}="{str(value).lower()}"'
|
||||
|
||||
return f'{key}="{str(value)}"' if value else key
|
||||
|
||||
|
||||
def clean_element_before_hashing(element: dict) -> dict:
|
||||
def clean_nested(element: dict) -> dict:
|
||||
element_cleaned = {key: value for key, value in element.items() if key not in {"id", "rect", "frame_index"}}
|
||||
@@ -125,7 +134,7 @@ def build_element_dict(
|
||||
|
||||
for element in elements:
|
||||
element_id: str = element.get("id", "")
|
||||
# get_interactable_element_tree marks each interactable element with a unique_id attribute
|
||||
# get_interactable_element_tree marks each interactable element with a SKYVERN_ID_ATTR attribute
|
||||
id_to_css_dict[element_id] = f"[{SKYVERN_ID_ATTR}='{element_id}']"
|
||||
id_to_element_dict[element_id] = element
|
||||
id_to_frame_dict[element_id] = element["frame"]
|
||||
@@ -409,16 +418,18 @@ async def add_frame_interactable_elements(
|
||||
# it will get stuck when we `frame.evaluate()` on an invisible iframe
|
||||
if not await frame_element.is_visible():
|
||||
return elements, element_tree
|
||||
unique_id = await frame_element.get_attribute("unique_id")
|
||||
if not unique_id:
|
||||
skyvern_id = await frame_element.get_attribute(SKYVERN_ID_ATTR)
|
||||
if not skyvern_id:
|
||||
LOG.info(
|
||||
"No unique_id found for frame, skipping",
|
||||
"No Skyvern id found for frame, skipping",
|
||||
frame_index=frame_index,
|
||||
attr=SKYVERN_ID_ATTR,
|
||||
)
|
||||
return elements, element_tree
|
||||
except Exception:
|
||||
LOG.warning(
|
||||
"Unable to get unique_id from frame_element",
|
||||
"Unable to get Skyvern id from frame_element",
|
||||
attr=SKYVERN_ID_ATTR,
|
||||
exc_info=True,
|
||||
)
|
||||
return elements, element_tree
|
||||
@@ -427,11 +438,11 @@ async def add_frame_interactable_elements(
|
||||
await skyvern_frame.safe_wait_for_animation_end()
|
||||
|
||||
frame_elements, frame_element_tree = await skyvern_frame.build_tree_from_body(
|
||||
frame_name=unique_id, frame_index=frame_index
|
||||
frame_name=skyvern_id, frame_index=frame_index
|
||||
)
|
||||
|
||||
for element in elements:
|
||||
if element["id"] == unique_id:
|
||||
if element["id"] == skyvern_id:
|
||||
element["children"] = frame_element_tree
|
||||
|
||||
elements = elements + frame_elements
|
||||
@@ -638,6 +649,9 @@ def _should_keep_unique_id(element: dict) -> bool:
|
||||
# 1. no readonly attr and not disable attr and no interactable
|
||||
# 2. readonly=false and disable=false and interactable=false
|
||||
|
||||
if element.get("hoverOnly"):
|
||||
return True
|
||||
|
||||
attributes = element.get("attributes", {})
|
||||
if (
|
||||
"disabled" not in attributes
|
||||
|
||||
Reference in New Issue
Block a user