refactor chain click (#1186)

This commit is contained in:
LawyZheng
2024-11-14 02:33:44 +08:00
committed by GitHub
parent 82ea39f7c4
commit d107c3d4db
5 changed files with 280 additions and 68 deletions

View File

@@ -357,8 +357,8 @@ class InputActionOnSelect2Dropdown(SkyvernException):
class FailToClick(SkyvernException):
def __init__(self, element_id: str, anchor: str = "self"):
super().__init__(f"Failed to click({anchor}). element_id={element_id}")
def __init__(self, element_id: str, msg: str, anchor: str = "self"):
super().__init__(f"Failed to click({anchor}). element_id={element_id}, error_msg={msg}")
class FailToSelectByLabel(SkyvernException):

View File

@@ -1138,14 +1138,15 @@ async def chain_click(
LOG.info("Chain click: main element click succeeded", action=action, locator=locator)
return [ActionSuccess()]
except Exception:
action_results: list[ActionResult] = [ActionFailure(FailToClick(action.element_id))]
except Exception as e:
action_results: list[ActionResult] = [ActionFailure(FailToClick(action.element_id, msg=str(e)))]
if skyvern_element.get_tag_name() == "label":
LOG.info(
"Chain click: it's a label element. going to try for-click",
task_id=task.task_id,
action=action,
element=str(skyvern_element),
locator=locator,
)
try:
@@ -1155,45 +1156,71 @@ async def chain_click(
await bound_element.get_locator().click(timeout=timeout)
action_results.append(ActionSuccess())
return action_results
except Exception:
action_results.append(ActionFailure(FailToClick(action.element_id, anchor="for")))
if skyvern_element.get_tag_name() == InteractiveElement.INPUT:
except Exception as e:
action_results.append(ActionFailure(FailToClick(action.element_id, anchor="for", msg=str(e))))
else:
LOG.info(
"Chain click: it's an input element. going to try sibling click",
"Chain click: it's a non-label element. going to find the bound label element by attribute id and click",
task_id=task.task_id,
action=action,
element=str(skyvern_element),
locator=locator,
)
sibling_action_result = await click_sibling_of_input(locator, timeout=timeout)
action_results.append(sibling_action_result)
if isinstance(sibling_action_result, ActionSuccess):
return action_results
try:
if bound_locator := await skyvern_element.find_bound_label_by_attr_id():
await bound_locator.click(timeout=timeout)
action_results.append(ActionSuccess())
return action_results
except Exception as e:
action_results.append(ActionFailure(FailToClick(action.element_id, anchor="attr_id", msg=str(e))))
if not await skyvern_element.is_visible():
LOG.info(
"Chain click: exit since the element is not visible on the page anymore",
taks_id=task.task_id,
action=action,
element=str(skyvern_element),
locator=locator,
)
return action_results
blocking_element = await skyvern_element.find_blocking_element(
dom=DomUtil(scraped_page=scraped_page, page=page)
)
if blocking_element is None:
LOG.info(
"Chain click: exit since the element is not blocking by any skyvern element",
taks_id=task.task_id,
action=action,
element=str(skyvern_element),
locator=locator,
)
return action_results
try:
parent_locator = locator.locator("..")
await parent_locator.click(timeout=timeout)
LOG.info(
"Chain click: successfully clicked parent element",
LOG.debug(
"Chain click: verifying the blocking element is parent or sibling of the target element",
taks_id=task.task_id,
action=action,
parent_locator=parent_locator,
element=str(blocking_element),
locator=locator,
)
action_results.append(ActionSuccess(interacted_with_parent=True))
except Exception:
LOG.warning(
"Failed to click parent element",
action=action,
parent_locator=parent_locator,
exc_info=True,
)
action_results.append(
ActionFailure(
FailToClick(action.element_id, anchor="parent"),
interacted_with_parent=True,
if await blocking_element.is_parent_of(
await skyvern_element.get_element_handler()
) or await blocking_element.is_sibling_of(await skyvern_element.get_element_handler()):
LOG.info(
"Chain click: element is blocked by other elements, going to click on the blocking element",
taks_id=task.task_id,
action=action,
element=str(blocking_element),
locator=locator,
)
)
# We don't raise exception here because we do log the exception, and return ActionFailure as the last action
await blocking_element.get_locator().click(timeout=timeout)
action_results.append(ActionSuccess())
return action_results
except Exception as e:
action_results.append(ActionFailure(FailToClick(action.element_id, anchor="blocking_element", msg=str(e))))
return action_results
finally:
@@ -2235,35 +2262,6 @@ def get_checkbox_id_in_label_children(scraped_page: ScrapedPage, element_id: str
return None
async def click_sibling_of_input(
locator: Locator,
timeout: int,
) -> ActionResult:
try:
input_element = locator.first
parent_locator = locator.locator("..")
if input_element:
input_id = await input_element.get_attribute("id")
sibling_label_css = f'label[for="{input_id}"]'
label_locator = parent_locator.locator(sibling_label_css)
await label_locator.click(timeout=timeout)
LOG.info(
"Successfully clicked sibling label of input element",
sibling_label_css=sibling_label_css,
)
return ActionSuccess(interacted_with_sibling=True)
# Should never get here
return ActionFailure(
exception=Exception("Failed while trying to click sibling of input element"),
interacted_with_sibling=True,
)
except Exception:
LOG.warning("Failed to click sibling label of input element", exc_info=True)
return ActionFailure(
exception=Exception("Failed while trying to click sibling of input element"),
)
async def extract_information_for_navigation_goal(
task: Task,
step: Step,

View File

@@ -1,3 +1,6 @@
// we only use chromium browser for now
let browserNameForWorkarounds = "chromium";
// Commands for manipulating rects.
// Want to debug this? Run chromium, go to sources, and create a new snippet with the code in domUtils.js
class Rect {
@@ -197,14 +200,31 @@ function getElementComputedStyle(element, pseudo) {
: undefined;
}
// from playwright
// from playwright: https://github.com/microsoft/playwright/blob/1b65f26f0287c0352e76673bc5f85bc36c934b55/packages/playwright-core/src/server/injected/domUtils.ts#L76-L98
function isElementStyleVisibilityVisible(element, style) {
style = style ?? getElementComputedStyle(element);
if (!style) return true;
// Element.checkVisibility checks for content-visibility and also looks at
// styles up the flat tree including user-agent ShadowRoots, such as the
// details element for example.
// All the browser implement it, but WebKit has a bug which prevents us from using it:
// https://bugs.webkit.org/show_bug.cgi?id=264733
// @ts-ignore
if (
!element.checkVisibility({ checkOpacity: false, checkVisibilityCSS: false })
)
return false;
Element.prototype.checkVisibility &&
browserNameForWorkarounds !== "webkit"
) {
if (!element.checkVisibility()) return false;
} else {
// Manual workaround for WebKit that does not have checkVisibility.
const detailsOrSummary = element.closest("details,summary");
if (
detailsOrSummary !== element &&
detailsOrSummary?.nodeName === "DETAILS" &&
!detailsOrSummary.open
)
return false;
}
if (style.visibility !== "visible") return false;
// TODO: support style.clipPath and style.clipRule?
@@ -220,7 +240,7 @@ function hasASPClientControl() {
return typeof ASPxClientControl !== "undefined";
}
// from playwright
// from playwright: https://github.com/microsoft/playwright/blob/1b65f26f0287c0352e76673bc5f85bc36c934b55/packages/playwright-core/src/server/injected/domUtils.ts#L100-L119
function isElementVisible(element) {
// TODO: This is a hack to not check visibility for option elements
// because they are not visible by default. We check their parent instead for visibility.
@@ -249,7 +269,8 @@ function isElementVisible(element) {
isElementVisible(child)
)
return true;
// skipping other nodes including text
if (child.nodeType === 3 /* Node.TEXT_NODE */ && isVisibleTextNode(child))
return true;
}
return false;
}
@@ -273,6 +294,144 @@ function isElementVisible(element) {
return true;
}
// from playwright: https://github.com/microsoft/playwright/blob/1b65f26f0287c0352e76673bc5f85bc36c934b55/packages/playwright-core/src/server/injected/domUtils.ts#L121-L127
function isVisibleTextNode(node) {
// https://stackoverflow.com/questions/1461059/is-there-an-equivalent-to-getboundingclientrect-for-text-nodes
const range = node.ownerDocument.createRange();
range.selectNode(node);
const rect = range.getBoundingClientRect();
if (rect.width <= 0 || rect.height <= 0) {
return false;
}
// if the center point of the element is not in the page, we tag it as an non-interactable element
// FIXME: sometimes there could be an overflow element blocking the default scrolling, making Y coordinate be wrong. So we currently only check for X
const center_x = (rect.left + rect.width) / 2 + window.scrollX;
if (center_x < 0) {
return false;
}
// const center_y = (rect.top + rect.height) / 2 + window.scrollY;
// if (center_x < 0 || center_y < 0) {
// return false;
// }
return true;
}
// from playwright: https://github.com/microsoft/playwright/blob/d685763c491e06be38d05675ef529f5c230388bb/packages/playwright-core/src/server/injected/domUtils.ts#L37-L44
function parentElementOrShadowHost(element) {
if (element.parentElement) return element.parentElement;
if (!element.parentNode) return;
if (
element.parentNode.nodeType === 11 /* Node.DOCUMENT_FRAGMENT_NODE */ &&
element.parentNode.host
)
return element.parentNode.host;
}
// from playwright: https://github.com/microsoft/playwright/blob/d685763c491e06be38d05675ef529f5c230388bb/packages/playwright-core/src/server/injected/domUtils.ts#L46-L52
function enclosingShadowRootOrDocument(element) {
let node = element;
while (node.parentNode) node = node.parentNode;
if (
node.nodeType === 11 /* Node.DOCUMENT_FRAGMENT_NODE */ ||
node.nodeType === 9 /* Node.DOCUMENT_NODE */
)
return node;
}
// from playwright: https://github.com/microsoft/playwright/blob/d685763c491e06be38d05675ef529f5c230388bb/packages/playwright-core/src/server/injected/injectedScript.ts#L799-L859
function expectHitTarget(hitPoint, targetElement) {
const roots = [];
// Get all component roots leading to the target element.
// Go from the bottom to the top to make it work with closed shadow roots.
let parentElement = targetElement;
while (parentElement) {
const root = enclosingShadowRootOrDocument(parentElement);
if (!root) break;
roots.push(root);
if (root.nodeType === 9 /* Node.DOCUMENT_NODE */) break;
parentElement = root.host;
}
// Hit target in each component root should point to the next component root.
// Hit target in the last component root should point to the target or its descendant.
let hitElement;
for (let index = roots.length - 1; index >= 0; index--) {
const root = roots[index];
// All browsers have different behavior around elementFromPoint and elementsFromPoint.
// https://github.com/w3c/csswg-drafts/issues/556
// http://crbug.com/1188919
const elements = root.elementsFromPoint(hitPoint.x, hitPoint.y);
const singleElement = root.elementFromPoint(hitPoint.x, hitPoint.y);
if (
singleElement &&
elements[0] &&
parentElementOrShadowHost(singleElement) === elements[0]
) {
const style = window.getComputedStyle(singleElement);
if (style?.display === "contents") {
// Workaround a case where elementsFromPoint misses the inner-most element with display:contents.
// https://bugs.chromium.org/p/chromium/issues/detail?id=1342092
elements.unshift(singleElement);
}
}
if (
elements[0] &&
elements[0].shadowRoot === root &&
elements[1] === singleElement
) {
// Workaround webkit but where first two elements are swapped:
// <host>
// #shadow root
// <target>
// elementsFromPoint produces [<host>, <target>], while it should be [<target>, <host>]
// In this case, just ignore <host>.
elements.shift();
}
const innerElement = elements[0];
if (!innerElement) break;
hitElement = innerElement;
if (index && innerElement !== roots[index - 1].host) break;
}
// Check whether hit target is the target or its descendant.
const hitParents = [];
while (hitElement && hitElement !== targetElement) {
hitParents.push(hitElement);
hitElement = parentElementOrShadowHost(hitElement);
}
if (hitElement === targetElement) return null;
return hitParents[0] || document.documentElement;
}
function isParent(parent, child) {
return parent.contains(child);
}
function isSibling(el1, el2) {
return el1.parentElement === el2.parentElement;
}
function getBlockElementUniqueID(element) {
const rect = element.getBoundingClientRect();
const hitElement = expectHitTarget(
{
x: rect.left + rect.width / 2,
y: rect.top + rect.height / 2,
},
element,
);
if (!hitElement) {
return "";
}
return hitElement.getAttribute("unique_id") ?? "";
}
function isHidden(element) {
const style = getElementComputedStyle(element);
if (style?.display === "none") {

View File

@@ -127,6 +127,9 @@ class SkyvernElement:
self.locator = locator
self.hash_value = hash_value
def __repr__(self) -> str:
return f"SkyvernElement({str(self.__static_element)})"
def build_HTML(self, need_trim_element: bool = True, need_skyvern_attrs: bool = True) -> str:
element_dict = self.get_element_dict()
if need_trim_element:
@@ -263,6 +266,22 @@ class SkyvernElement:
async def is_selectable(self) -> bool:
return self.get_selectable() or self.get_tag_name() in SELECTABLE_ELEMENT
async def is_visible(self) -> bool:
skyvern_frame = await SkyvernFrame.create_instance(self.get_frame())
return await skyvern_frame.get_element_visible(await self.get_element_handler())
async def is_parent_of(self, target: ElementHandle) -> bool:
skyvern_frame = await SkyvernFrame.create_instance(self.get_frame())
return await skyvern_frame.is_parent(await self.get_element_handler(), target)
async def is_child_of(self, target: ElementHandle) -> bool:
skyvern_frame = await SkyvernFrame.create_instance(self.get_frame())
return await skyvern_frame.is_parent(target, await self.get_element_handler())
async def is_sibling_of(self, target: ElementHandle) -> bool:
skyvern_frame = await SkyvernFrame.create_instance(self.get_frame())
return await skyvern_frame.is_sibling(await self.get_element_handler(), target)
def get_element_dict(self) -> dict:
return self.__static_element
@@ -301,6 +320,13 @@ class SkyvernElement:
assert handler is not None
return handler
async def find_blocking_element(self, dom: DomUtil) -> SkyvernElement | None:
skyvern_frame = await SkyvernFrame.create_instance(self.get_frame())
blocking_element_id = await skyvern_frame.get_blocking_element_id(await self.get_element_handler())
if not blocking_element_id:
return None
return await dom.get_skyvern_element_by_id(blocking_element_id)
def find_element_id_in_label_children(self, element_type: InteractiveElement) -> str | None:
tag_name = self.get_tag_name()
if tag_name != "label":
@@ -354,6 +380,23 @@ class SkyvernElement:
return await dom.get_skyvern_element_by_id(unique_id)
async def find_bound_label_by_attr_id(
self, timeout: float = SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS
) -> Locator | None:
if self.get_tag_name() == "label":
return None
element_id: str = await self.get_attr("id", timeout=timeout)
if not element_id:
return None
locator = self.get_frame().locator(f"label[for='{element_id}']")
cnt = await locator.count()
if cnt == 1:
return locator
return None
async def find_selectable_child(self, dom: DomUtil) -> SkyvernElement | None:
# BFS to find the first selectable child
index = 0

View File

@@ -182,6 +182,10 @@ class SkyvernFrame:
js_script = "(element) => checkDisabledFromStyle(element)"
return await self.evaluate(frame=self.frame, expression=js_script, arg=element)
async def get_blocking_element_id(self, element: ElementHandle) -> str:
js_script = "(element) => getBlockElementUniqueID(element)"
return await self.evaluate(frame=self.frame, expression=js_script, arg=element)
async def scroll_to_top(self, draw_boxes: bool) -> float:
"""
Scroll to the top of the page and take a screenshot.
@@ -224,6 +228,14 @@ class SkyvernFrame:
js_script = "() => isWindowScrollable()"
return await self.evaluate(frame=self.frame, expression=js_script)
async def is_parent(self, parent: ElementHandle, child: ElementHandle) -> bool:
js_script = "([parent, child]) => isParent(parent, child)"
return await self.evaluate(frame=self.frame, expression=js_script, arg=[parent, child])
async def is_sibling(self, el1: ElementHandle, el2: ElementHandle) -> bool:
js_script = "([el1, el2]) => isSibling(el1, el2)"
return await self.evaluate(frame=self.frame, expression=js_script, arg=[el1, el2])
async def has_ASP_client_control(self) -> bool:
js_script = "() => hasASPClientControl()"
return await self.evaluate(frame=self.frame, expression=js_script)