improve selection dom listener performance (#1667)

This commit is contained in:
Shuchang Zheng
2025-01-28 21:14:31 +08:00
committed by GitHub
parent 185fc330a4
commit 0fa11a484b
5 changed files with 188 additions and 59 deletions

View File

@@ -0,0 +1,34 @@
Confirm if the user has finished the multi-level selection based on the screenshot, user details, the HTML elements and select history provided in the list.
Reply in JSON format with the following keys:
{
"page_info": str, // Think step by step. Describe the page information you parsed from the HTML elements. Your action should be based on the current page information.
"think": str, // Think step by step. Describe how you think the user has finished the multi-level selection.
"confidence_float": float, // The confidence of the action. Pick a number between 0.0 and 1.0. 0.0 means no confidence, 1.0 means full confidence
"is_finished": bool, // True if the user has finished the multi-level selection, False otherwise.
}
User goal:
```
{{ navigation_goal }}
```
User details:
```
{{ navigation_payload_str }}
```
HTML elements:
```
{{ elements }}
```
Select History:
```
{{ select_history }}
```
Current datetime, ISO format:
```
{{ local_datetime }}
```

View File

@@ -161,7 +161,7 @@ def check_disappeared_element_id_in_incremental_factory(
incre_page=incremental_scraped, element_id=element_id
)
except Exception:
LOG.info(
LOG.debug(
"Failed to create skyvern element, going to drop the element from incremental tree",
exc_info=True,
element_id=element_id,
@@ -681,23 +681,25 @@ async def handle_input_text_action(
)
await skyvern_element.scroll_into_view()
finally:
blocking_element, exist = await skyvern_element.find_blocking_element(
dom=dom, incremental_page=incremental_scraped
)
if blocking_element and exist:
LOG.info(
"Find a blocking element to the current element, going to blur the blocking element first",
task_id=task.task_id,
step_id=step.step_id,
blocking_element=blocking_element.get_locator(),
if await skyvern_element.is_visible():
blocking_element, exist = await skyvern_element.find_blocking_element(
dom=dom, incremental_page=incremental_scraped
)
if await blocking_element.get_locator().count():
await blocking_element.press_key("Escape")
if await blocking_element.get_locator().count():
await blocking_element.blur()
if blocking_element and exist:
LOG.info(
"Find a blocking element to the current element, going to blur the blocking element first",
task_id=task.task_id,
step_id=step.step_id,
blocking_element=blocking_element.get_locator(),
)
if await blocking_element.get_locator().count():
await blocking_element.press_key("Escape")
if await blocking_element.get_locator().count():
await blocking_element.blur()
await skyvern_element.press_key("Escape")
await skyvern_element.blur()
if await skyvern_element.is_visible():
await skyvern_element.press_key("Escape")
await skyvern_element.blur()
await incremental_scraped.stop_listen_dom_increment()
# force to move focus back to the element
@@ -1098,6 +1100,7 @@ async def handle_select_option_action(
except Exception:
LOG.info(
"fail to open dropdown by clicking, try to press ArrowDown to open",
exc_info=True,
element_id=skyvern_element.get_id(),
task_id=task.task_id,
step_id=step.step_id,
@@ -1154,7 +1157,12 @@ async def handle_select_option_action(
results.append(ActionFailure(exception=e))
return results
finally:
if is_open and len(results) > 0 and not isinstance(results[-1], ActionSuccess):
if (
await skyvern_element.is_visible()
and is_open
and len(results) > 0
and not isinstance(results[-1], ActionSuccess)
):
await skyvern_element.scroll_into_view()
await skyvern_element.coordinate_click(page=page)
await skyvern_element.press_key("Escape")
@@ -1207,11 +1215,16 @@ async def handle_select_option_action(
return results
finally:
if is_open and len(results) > 0 and not isinstance(results[-1], ActionSuccess):
if (
await skyvern_element.is_visible()
and is_open
and len(results) > 0
and not isinstance(results[-1], ActionSuccess)
):
await skyvern_element.scroll_into_view()
await skyvern_element.coordinate_click(page=page)
await skyvern_element.press_key("Escape")
is_open = False
await skyvern_element.blur()
await incremental_scraped.stop_listen_dom_increment()
@@ -2013,6 +2026,23 @@ async def sequentially_select_from_dropdown(
)
return single_select_result.action_result, values[-1] if len(values) > 0 else None
# it's for typing. it's been verified in `single_select_result.is_done()`
assert single_select_result.dropdown_menu is not None
screenshot = await single_select_result.dropdown_menu.get_locator().screenshot(
timeout=settings.BROWSER_SCREENSHOT_TIMEOUT_MS
)
prompt = prompt_engine.load_prompt(
"confirm-multi-selection-finish",
navigation_goal=task.navigation_goal,
navigation_payload_str=json.dumps(task.navigation_payload),
elements="".join(json_to_html(element) for element in secondary_increment_element),
select_history=json.dumps(build_sequential_select_history(select_history)),
local_datetime=datetime.now(ensure_context().tz_info).isoformat(),
)
json_response = await app.SECONDARY_LLM_API_HANDLER(prompt=prompt, screenshots=[screenshot], step=step)
if json_response.get("is_finished", False):
return single_select_result.action_result, values[-1] if len(values) > 0 else None
return select_history[-1].action_result if len(select_history) > 0 else None, values[-1] if len(
values
) > 0 else None
@@ -2292,6 +2322,9 @@ async def locate_dropdown_menu(
step: Step,
task: Task,
) -> SkyvernElement | None:
if not await current_anchor_element.is_visible():
return None
skyvern_frame = incremental_scraped.skyvern_frame
for idx, element_dict in enumerate(incremental_scraped.element_tree):

View File

@@ -2044,25 +2044,60 @@ function isClassNameIncludesHidden(className) {
return className.toLowerCase().includes("hide");
}
function addIncrementalNodeToMap(parentNode, childrenNode) {
// calculate the depth of targetNode element for sorting
const depth = getElementDomDepth(parentNode);
let newNodesTreeList = [];
if (window.globalDomDepthMap.has(depth)) {
newNodesTreeList = window.globalDomDepthMap.get(depth);
function waitForNextFrame() {
return new Promise((resolve) => {
requestAnimationFrame(() => resolve());
});
}
function sleep(ms) {
return new Promise((resolve) => setTimeout(resolve, ms));
}
class SafeCounter {
constructor() {
this.value = 0;
this.lock = Promise.resolve();
}
for (const child of childrenNode) {
const [_, newNodeTree] = buildElementTree(child, "", true);
if (newNodeTree.length > 0) {
newNodesTreeList.push(...newNodeTree);
}
async add() {
await this.lock;
this.lock = new Promise((resolve) => {
this.value += 1;
resolve();
});
}
window.globalDomDepthMap.set(depth, newNodesTreeList);
async get() {
await this.lock;
return this.value;
}
}
async function addIncrementalNodeToMap(parentNode, childrenNode) {
// make the dom parser async
await waitForNextFrame();
if (window.globalListnerFlag) {
// calculate the depth of targetNode element for sorting
const depth = getElementDomDepth(parentNode);
let newNodesTreeList = [];
if (window.globalDomDepthMap.has(depth)) {
newNodesTreeList = window.globalDomDepthMap.get(depth);
}
for (const child of childrenNode) {
const [_, newNodeTree] = buildElementTree(child, "", true);
if (newNodeTree.length > 0) {
newNodesTreeList.push(...newNodeTree);
}
}
window.globalDomDepthMap.set(depth, newNodesTreeList);
}
await window.globalParsedElementCounter.add();
}
if (window.globalObserverForDOMIncrement === undefined) {
window.globalObserverForDOMIncrement = new MutationObserver(function (
window.globalObserverForDOMIncrement = new MutationObserver(async function (
mutationsList,
observer,
) {
@@ -2076,13 +2111,14 @@ if (window.globalObserverForDOMIncrement === undefined) {
targetNode: node,
newNodes: [node],
});
addIncrementalNodeToMap(node, [node]);
await addIncrementalNodeToMap(node, [node]);
}
}
if (mutation.attributeName === "style") {
// TODO: need to confirm that elemnent is hidden previously
const node = mutation.target;
if (node.nodeType === Node.TEXT_NODE) continue;
if (node.tagName.toLowerCase() === "body") continue;
const newStyle = getElementComputedStyle(node);
const newDisplay = newStyle?.display;
if (newDisplay !== "none") {
@@ -2090,7 +2126,7 @@ if (window.globalObserverForDOMIncrement === undefined) {
targetNode: node,
newNodes: [node],
});
addIncrementalNodeToMap(node, [node]);
await addIncrementalNodeToMap(node, [node]);
}
}
if (mutation.attributeName === "class") {
@@ -2110,7 +2146,7 @@ if (window.globalObserverForDOMIncrement === undefined) {
targetNode: node,
newNodes: [node],
});
addIncrementalNodeToMap(node, [node]);
await addIncrementalNodeToMap(node, [node]);
}
}
}
@@ -2122,26 +2158,30 @@ if (window.globalObserverForDOMIncrement === undefined) {
targetNode: node, // TODO: for future usage, when we want to parse new elements into a tree
};
let newNodes = [];
if (
node.tagName.toLowerCase() === "ul" ||
(node.tagName.toLowerCase() === "div" &&
node.hasAttribute("role") &&
node.getAttribute("role").toLowerCase() === "listbox")
) {
newNodes.push(node);
} else {
if (mutation.addedNodes && mutation.addedNodes.length > 0) {
for (const node of mutation.addedNodes) {
// skip the text nodes, they won't be interactable
if (node.nodeType === Node.TEXT_NODE) continue;
newNodes.push(node);
}
if (mutation.addedNodes && mutation.addedNodes.length > 0) {
for (const node of mutation.addedNodes) {
// skip the text nodes, they won't be interactable
if (node.nodeType === Node.TEXT_NODE) continue;
newNodes.push(node);
}
}
if (
newNodes.length == 0 &&
(node.tagName.toLowerCase() === "ul" ||
(node.tagName.toLowerCase() === "div" &&
node.hasAttribute("role") &&
node.getAttribute("role").toLowerCase() === "listbox"))
) {
newNodes.push(node);
}
if (newNodes.length > 0) {
changedNode.newNodes = newNodes;
window.globalOneTimeIncrementElements.push(changedNode);
addIncrementalNodeToMap(changedNode.targetNode, changedNode.newNodes);
await addIncrementalNodeToMap(
changedNode.targetNode,
changedNode.newNodes,
);
}
}
}
@@ -2149,8 +2189,10 @@ if (window.globalObserverForDOMIncrement === undefined) {
}
function startGlobalIncrementalObserver() {
window.globalListnerFlag = true;
window.globalDomDepthMap = new Map();
window.globalOneTimeIncrementElements = [];
window.globalParsedElementCounter = new SafeCounter();
window.globalObserverForDOMIncrement.takeRecords(); // cleanup the older data
window.globalObserverForDOMIncrement.observe(document.body, {
attributes: true,
@@ -2161,14 +2203,28 @@ function startGlobalIncrementalObserver() {
});
}
function stopGlobalIncrementalObserver() {
window.globalDomDepthMap = new Map();
async function stopGlobalIncrementalObserver() {
window.globalListnerFlag = false;
window.globalObserverForDOMIncrement.disconnect();
window.globalObserverForDOMIncrement.takeRecords(); // cleanup the older data
while (
(await window.globalParsedElementCounter.get()) <
window.globalOneTimeIncrementElements.length
) {
await sleep(100);
}
window.globalOneTimeIncrementElements = [];
window.globalDomDepthMap = new Map();
}
function getIncrementElements() {
async function getIncrementElements() {
while (
(await window.globalParsedElementCounter.get()) <
window.globalOneTimeIncrementElements.length
) {
await sleep(100);
}
// cleanup the chidren tree, remove the duplicated element
// search starting from the shallowest node:
// 1. if deeper, the node could only be the children of the shallower one or no related one.

View File

@@ -554,7 +554,7 @@ class IncrementalScrapePage:
) -> list[dict]:
frame = self.skyvern_frame.get_frame()
js_script = "() => getIncrementElements()"
js_script = "async () => await getIncrementElements()"
incremental_elements, incremental_tree = await SkyvernFrame.evaluate(
frame=frame, expression=js_script, timeout_ms=BUILDING_ELEMENT_TREE_TIMEOUT_MS
)
@@ -580,8 +580,10 @@ class IncrementalScrapePage:
js_script = "() => window.globalObserverForDOMIncrement === undefined"
if await SkyvernFrame.evaluate(frame=self.skyvern_frame.get_frame(), expression=js_script):
return
js_script = "() => stopGlobalIncrementalObserver()"
await SkyvernFrame.evaluate(frame=self.skyvern_frame.get_frame(), expression=js_script)
js_script = "async () => await stopGlobalIncrementalObserver()"
await SkyvernFrame.evaluate(
frame=self.skyvern_frame.get_frame(), expression=js_script, timeout_ms=BUILDING_ELEMENT_TREE_TIMEOUT_MS
)
async def get_incremental_elements_num(self) -> int:
js_script = "() => window.globalOneTimeIncrementElements.length"

View File

@@ -107,11 +107,11 @@ class SkyvernElement:
num_elements = await locator.count()
if num_elements < 1:
LOG.warning("No elements found with css. Validation failed.", css=css_selector, element_id=element_id)
LOG.debug("No elements found with css. Validation failed.", css=css_selector, element_id=element_id)
raise MissingElement(selector=css_selector, element_id=element_id)
elif num_elements > 1:
LOG.warning(
LOG.debug(
"Multiple elements found with css. Expected 1. Validation failed.",
num_elements=num_elements,
selector=css_selector,
@@ -584,13 +584,17 @@ class SkyvernElement:
await page.mouse.click(click_x, click_y)
async def blur(self) -> None:
if not await self.is_visible():
return
await SkyvernFrame.evaluate(
frame=self.get_frame(), expression="(element) => element.blur()", arg=await self.get_element_handler()
)
async def scroll_into_view(self, timeout: float = settings.BROWSER_ACTION_TIMEOUT_MS) -> None:
element_handler = await self.get_element_handler(timeout=timeout)
if not await self.is_visible():
return
try:
element_handler = await self.get_element_handler(timeout=timeout)
await element_handler.scroll_into_view_if_needed(timeout=timeout)
except TimeoutError:
LOG.info(