general selection bugfix (#743)

This commit is contained in:
LawyZheng
2024-08-28 14:51:05 +08:00
committed by GitHub
parent c27771b574
commit 6f4ff4339f
5 changed files with 129 additions and 87 deletions

View File

@@ -360,6 +360,7 @@ async def handle_input_text_action(
result = await select_from_dropdown(
action=select_action,
page=page,
dom=dom,
skyvern_frame=skyvern_frame,
incremental_scraped=incremental_scraped,
element_trees=incremental_element,
@@ -378,10 +379,11 @@ async def handle_input_text_action(
)
except Exception as e:
await skyvern_element.scroll_into_view()
await skyvern_element.get_locator().press("Escape", timeout=timeout)
LOG.exception("Failed to do custom selection transformed from input action")
return [ActionFailure(exception=e)]
finally:
await skyvern_element.press_key("Escape")
await skyvern_element.blur()
await incremental_scraped.stop_listen_dom_increment()
# force to move focus back to the element
@@ -614,12 +616,19 @@ async def handle_select_option_action(
try:
await incremental_scraped.start_listen_dom_increment()
await skyvern_element.get_locator().focus(timeout=timeout)
await skyvern_element.focus()
if tag_name == InteractiveElement.INPUT:
await skyvern_element.get_locator().press("ArrowDown", timeout=timeout)
else:
try:
await skyvern_element.get_locator().click(timeout=timeout)
except Exception:
LOG.info(
"fail to open dropdown by clicking, try to press ArrowDown to open",
element_id=skyvern_element.get_id(),
task_id=task.task_id,
step_id=step.step_id,
)
await skyvern_element.focus()
await skyvern_element.press_key("ArrowDown")
# wait 5s for options to load
await asyncio.sleep(5)
@@ -634,6 +643,7 @@ async def handle_select_option_action(
result = await select_from_dropdown(
action=action,
page=page,
dom=dom,
skyvern_frame=skyvern_frame,
incremental_scraped=incremental_scraped,
element_trees=incremental_element,
@@ -1160,6 +1170,7 @@ async def input_or_auto_complete_input(
async def select_from_dropdown(
action: SelectOptionAction,
page: Page,
dom: DomUtil,
skyvern_frame: SkyvernFrame,
incremental_scraped: IncrementalScrapePage,
element_trees: list[dict],
@@ -1185,7 +1196,9 @@ async def select_from_dropdown(
if not force_select and dropdown_menu_element is None:
return None
if dropdown_menu_element and dropdown_menu_element.get_scrollable():
if dropdown_menu_element and await skyvern_frame.get_element_scrollable(
await dropdown_menu_element.get_element_handler()
):
await scroll_down_to_load_all_options(
dropdown_menu_element=dropdown_menu_element,
skyvern_frame=skyvern_frame,
@@ -1198,13 +1211,7 @@ async def select_from_dropdown(
trimmed_element_tree = await incremental_scraped.get_incremental_element_tree(
app.AGENT_FUNCTION.cleanup_element_tree_factory(step=step, task=task)
)
if dropdown_menu_element:
# if there's a dropdown menu detected, only elements in the dropdown should be sent to LLM
dropdown_id = dropdown_menu_element.get_id()
for head_node in trimmed_element_tree:
if head_node.get("id") == dropdown_id:
trimmed_element_tree = [head_node]
break
trimmed_element_tree = remove_exist_elements(dom=dom, element_tree=trimmed_element_tree)
html = incremental_scraped.build_html_tree(element_tree=trimmed_element_tree)
@@ -1319,10 +1326,12 @@ async def scroll_down_to_load_all_options(
scroll_pace += scroll_interval
else:
await skyvern_frame.scroll_to_element_bottom(dropdown_menu_element_handle)
# wait for the options to be fully loaded
await asyncio.sleep(2)
# scoll a little back and scoll down to trigger the loading
await page.mouse.wheel(0, -20)
await page.mouse.wheel(0, 20)
await page.mouse.wheel(0, -1e-5)
await page.mouse.wheel(0, 1e-5)
# wait for while to load new options
await asyncio.sleep(10)

View File

@@ -936,7 +936,6 @@ function buildElementObject(frame, element, interactable) {
elementTagNameLower === "select" ||
isSelect2Dropdown(element) ||
isSelect2MultiChoice(element),
isScrollable: isScrollable(element),
};
let isInShadowRoot = element.getRootNode() instanceof ShadowRoot;
@@ -1571,7 +1570,7 @@ function scrollToElementBottom(element) {
element.scroll({
top: element.scrollHeight,
left: 0,
behavior: "instant",
behavior: "smooth",
});
}
@@ -1619,11 +1618,33 @@ if (window.globalOneTimeIncrementElements === undefined) {
window.globalOneTimeIncrementElements = [];
}
if (window.globalDomDepthMap === undefined) {
window.globalDomDepthMap = new Map();
}
function addIncrementalNodeToMap(parentNode, childrenNode) {
// calculate the depth of targetNode element for sorting
const depth = getElementDomDepth(parentNode);
let newNodesTreeList = [];
if (window.globalDomDepthMap.has(depth)) {
newNodesTreeList = window.globalDomDepthMap.get(depth);
}
for (const child of childrenNode) {
const [_, newNodeTree] = buildElementTree(child, "", false);
if (newNodeTree.length > 0) {
newNodesTreeList.push(...newNodeTree);
}
}
window.globalDomDepthMap.set(depth, newNodesTreeList);
}
if (window.globalObserverForDOMIncrement === undefined) {
window.globalObserverForDOMIncrement = new MutationObserver(function (
mutationsList,
observer,
) {
// TODO: how to detect duplicated recreate element?
for (const mutation of mutationsList) {
if (mutation.type === "attributes") {
if (mutation.attributeName === "style") {
@@ -1637,6 +1658,7 @@ if (window.globalObserverForDOMIncrement === undefined) {
targetNode: node,
newNodes: [node],
});
addIncrementalNodeToMap(node, [node]);
}
}
@@ -1660,6 +1682,7 @@ if (window.globalObserverForDOMIncrement === undefined) {
if (newNodes.length > 0) {
changedNode.newNodes = newNodes;
window.globalOneTimeIncrementElements.push(changedNode);
addIncrementalNodeToMap(changedNode.targetNode, changedNode.newNodes);
}
}
}
@@ -1667,6 +1690,7 @@ if (window.globalObserverForDOMIncrement === undefined) {
}
function startGlobalIncrementalObserver() {
window.globalDomDepthMap = new Map();
window.globalOneTimeIncrementElements = [];
window.globalObserverForDOMIncrement.takeRecords(); // cleanup the older data
window.globalObserverForDOMIncrement.observe(document.body, {
@@ -1679,63 +1703,50 @@ function startGlobalIncrementalObserver() {
}
function stopGlobalIncrementalObserver() {
window.globalDomDepthMap = new Map();
window.globalObserverForDOMIncrement.disconnect();
window.globalObserverForDOMIncrement.takeRecords(); // cleanup the older data
window.globalOneTimeIncrementElements = [];
}
function getIncrementElements(frame) {
const domDepthMap = new Map();
for (const element of window.globalOneTimeIncrementElements) {
// calculate the depth of targetNode element for sorting
const depth = getElementDomDepth(element.targetNode);
let newNodesTreeList = [];
if (domDepthMap.has(depth)) {
newNodesTreeList = domDepthMap.get(depth);
}
for (const child of element.newNodes) {
const [_, newNodeTree] = buildElementTree(child, frame, false);
if (newNodeTree.length > 0) {
newNodesTreeList.push(...newNodeTree);
}
}
domDepthMap.set(depth, newNodesTreeList);
}
function getIncrementElements() {
// cleanup the chidren tree, remove the duplicated element
// search starting from the shallowest node:
// 1. if deeper, the node could only be the children of the shallower one or no related one.
// 2. if depth is same, the node could only be duplicated one or no related one.
const idToElement = new Map();
const cleanedTreeList = [];
const sortedDepth = Array.from(domDepthMap.keys()).sort();
const sortedDepth = Array.from(window.globalDomDepthMap.keys()).sort(
(a, b) => a - b,
);
for (let idx = 0; idx < sortedDepth.length; idx++) {
const depth = sortedDepth[idx];
const treeList = domDepthMap.get(depth);
const treeList = window.globalDomDepthMap.get(depth);
const removeDupAndConcatChildren = (element) => {
const children = element.children;
if (idToElement.has(element.id)) {
element = idToElement.get(element.id);
for (let i = 0; i < children.length; i++) {
const child = children[i];
if (!idToElement.get(child.id)) {
element.children.push(child);
}
}
}
idToElement.set(element.id, element);
for (let i = 0; i < children.length; i++) {
const child = children[i];
removeDupAndConcatChildren(child);
}
};
for (const treeHeadElement of treeList) {
// check if the element is existed
if (idToElement.has(treeHeadElement.id)) {
continue;
}
cleanedTreeList.push(treeHeadElement);
// flatten the tree
let pendingElements = [treeHeadElement];
let curIndex = 0;
while (curIndex < pendingElements.length) {
const curElement = pendingElements[curIndex];
if (idToElement.has(curElement.id)) {
curIndex++;
continue;
}
idToElement.set(curElement.id, curElement);
pendingElements.push(...curElement.children);
curIndex++;
if (!idToElement.has(treeHeadElement.id)) {
cleanedTreeList.push(treeHeadElement);
}
removeDupAndConcatChildren(treeHeadElement);
}
}

View File

@@ -6,7 +6,7 @@ from enum import StrEnum
from typing import Any, Awaitable, Callable
import structlog
from playwright.async_api import Frame, Page
from playwright.async_api import Frame, Locator, Page
from pydantic import BaseModel
from skyvern.constants import SKYVERN_DIR, SKYVERN_ID_ATTR
@@ -388,13 +388,12 @@ async def get_interactable_element_tree(
class IncrementalScrapePage:
id_to_element_dict: dict[str, dict] = {}
id_to_css_dict: dict[str, str]
elements: list[dict]
element_tree: list[dict]
element_tree_trimmed: list[dict]
def __init__(self, skyvern_frame: SkyvernFrame) -> None:
self.id_to_element_dict: dict[str, dict] = dict()
self.id_to_css_dict: dict[str, str] = dict()
self.elements: list[dict] = list()
self.element_tree: list[dict] = list()
self.element_tree_trimmed: list[dict] = list()
self.skyvern_frame = skyvern_frame
async def get_incremental_element_tree(
@@ -403,19 +402,7 @@ class IncrementalScrapePage:
) -> list[dict]:
frame = self.skyvern_frame.get_frame()
frame_id = "main.frame"
if isinstance(frame, Frame):
try:
frame_element = await frame.frame_element()
frame_id = await frame_element.get_attribute("unique_id")
except Exception:
# TODO: do we really care about the frame_id ?
LOG.warning(
"Unable to get frame_element",
exc_info=True,
)
js_script = f"() => getIncrementElements('{frame_id}')"
js_script = "() => getIncrementElements()"
incremental_elements, incremental_tree = await frame.evaluate(js_script)
# we listen the incremental elements seperated by frames, so all elements will be in the same SkyvernFrame
self.id_to_css_dict, self.id_to_element_dict, _ = build_element_dict(incremental_elements)
@@ -442,6 +429,25 @@ class IncrementalScrapePage:
js_script = "() => window.globalOneTimeIncrementElements.length"
return await self.skyvern_frame.get_frame().evaluate(js_script)
async def select_one_element_by_value(self, value: str) -> Locator | None:
for element in self.elements:
element_id = element.get("id", "")
if not element_id:
continue
if not element.get("interactable", False):
continue
text = element.get("text", "")
if text != value:
continue
locator = self.skyvern_frame.get_frame().locator(f'[{SKYVERN_ID_ATTR}="{element_id}"]')
if await locator.count() > 0:
return locator
return None
def build_html_tree(self, element_tree: list[dict] | None = None) -> str:
return "".join([json_to_html(element) for element in (element_tree or self.element_tree_trimmed)])

View File

@@ -7,7 +7,7 @@ from enum import StrEnum
from random import uniform
import structlog
from playwright.async_api import Frame, FrameLocator, Locator, Page, TimeoutError
from playwright.async_api import ElementHandle, Frame, FrameLocator, Locator, Page, TimeoutError
from skyvern.constants import SKYVERN_ID_ATTR
from skyvern.exceptions import (
@@ -199,9 +199,6 @@ class SkyvernElement:
def get_element_dict(self) -> dict:
return self.__static_element
def get_scrollable(self) -> bool:
return self.__static_element.get("isScrollable", False)
def get_selectable(self) -> bool:
return self.__static_element.get("isSelectable", False)
@@ -230,6 +227,13 @@ class SkyvernElement:
def get_locator(self) -> Locator:
return self.locator
async def get_element_handler(
self, timeout: float = SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS
) -> ElementHandle:
handler = await self.locator.element_handle(timeout=timeout)
assert handler is not None
return handler
async def get_select2_dropdown(self) -> Select2Dropdown:
if not await self.is_select2_dropdown():
raise ElementIsNotSelect2Dropdown(self.get_id(), self.__static_element)
@@ -328,6 +332,9 @@ class SkyvernElement:
return await self.locator.get_attribute(attr_name, timeout=timeout)
async def focus(self, timeout: float = SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS) -> None:
await self.get_locator().focus(timeout=timeout)
async def input_sequentially(
self, text: str, default_timeout: float = SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS
) -> None:
@@ -340,6 +347,11 @@ class SkyvernElement:
await self.press_fill(text, timeout=default_timeout)
async def press_key(
self, key: str, timeout: float = SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS
) -> None:
await self.get_locator().press(key=key, timeout=timeout)
async def press_fill(
self, text: str, timeout: float = SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS
) -> None:
@@ -375,11 +387,11 @@ class SkyvernElement:
click_x, click_y = await self.move_mouse_to(page=page, timeout=timeout)
await page.mouse.click(click_x, click_y)
async def blur(self) -> None:
await self.get_frame().evaluate("(element) => element.blur()", await self.get_element_handler())
async def scroll_into_view(self, timeout: float = SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS) -> None:
element_handler = await self.get_locator().element_handle()
if element_handler is None:
LOG.warning("element handler is None. ", element_id=self.get_id())
return
element_handler = await self.get_element_handler(timeout=timeout)
try:
await element_handler.scroll_into_view_if_needed(timeout=timeout)
except TimeoutError:
@@ -387,8 +399,8 @@ class SkyvernElement:
"Timeout to execute scrolling into view, try to re-focus to locate the element",
element_id=self.get_id(),
)
await self.get_frame().evaluate("(element) => element.blur()", element_handler)
await self.get_locator().focus(timeout=timeout)
await self.blur()
await self.focus(timeout=timeout)
await asyncio.sleep(2) # wait for scrolling into the target

View File

@@ -172,6 +172,10 @@ class SkyvernFrame:
js_script = "([frame, element, interactable]) => buildElementObject(frame, element, interactable)"
return await self.frame.evaluate(js_script, [frame, element, interactable])
async def get_element_scrollable(self, element: ElementHandle) -> bool:
js_script = "(element) => isScrollable(element)"
return await self.frame.evaluate(js_script, element)
async def scroll_to_top(self, draw_boxes: bool) -> float:
"""
Scroll to the top of the page and take a screenshot.