generate element xpath (#2335)

This commit is contained in:
Shuchang Zheng
2025-05-13 11:11:16 -07:00
committed by GitHub
parent b7d4af4b61
commit d3ea8ef85b
2 changed files with 71 additions and 8 deletions

View File

@@ -1421,7 +1421,12 @@ async function buildElementTree(
return [];
}
}
async function processElement(element, parentId) {
async function processElement(
element,
parentId,
parent_xpath,
current_node_index,
) {
if (element === null) {
_jsConsoleLog("get a null element");
return;
@@ -1438,6 +1443,20 @@ async function buildElementTree(
return;
}
let current_xpath = null;
if (parent_xpath) {
// ignore the namespace, otherwise the xpath sometimes won't find anything, specially for SVG elements
current_xpath =
parent_xpath +
"/" +
'*[name()="' +
tagName +
'"]' +
"[" +
current_node_index +
"]";
}
// if element is an "a" tag and has a target="_blank" attribute, remove the target attribute
// We're doing this so that skyvern can do all the navigation in a single page/tab and not open new tab
if (tagName === "a") {
@@ -1446,10 +1465,10 @@ async function buildElementTree(
}
}
let children = [];
let shadowDOMchildren = [];
// sometimes the shadowRoot is not visible, but the elemnets in the shadowRoot are visible
if (element.shadowRoot) {
children = getChildElements(element.shadowRoot);
shadowDOMchildren = getChildElements(element.shadowRoot);
}
const isVisible = isElementVisible(element);
if (isVisible && !isHidden(element) && !isScriptOrStyle(element)) {
@@ -1500,6 +1519,7 @@ async function buildElementTree(
}
if (elementObj) {
elementObj.xpath = current_xpath;
elements.push(elementObj);
// If the element is interactable but has no interactable parent,
// then it starts a new tree, so add it to the result array
@@ -1520,10 +1540,35 @@ async function buildElementTree(
}
}
children = children.concat(getChildElements(element));
const children = getChildElements(element);
const xpathMap = new Map();
for (let i = 0; i < children.length; i++) {
const childElement = children[i];
await processElement(childElement, parentId);
const tagName = childElement?.tagName?.toLowerCase();
if (!tagName) {
_jsConsoleLog("get a null tagName");
continue;
}
let current_node_index = xpathMap.get(tagName);
if (current_node_index == undefined) {
current_node_index = 1;
} else {
current_node_index = current_node_index + 1;
}
xpathMap.set(tagName, current_node_index);
await processElement(
childElement,
parentId,
current_xpath,
current_node_index,
);
}
// FIXME: xpath won't work when the element is in shadow DOM
for (let i = 0; i < shadowDOMchildren.length; i++) {
const childElement = shadowDOMchildren[i];
await processElement(childElement, parentId, null, 0);
}
return;
}
@@ -1732,8 +1777,13 @@ async function buildElementTree(
return trimmedResults;
};
let current_xpath = null;
if (starter === document.body) {
current_xpath = "/html[1]";
}
// setup before parsing the dom
await processElement(starter, null);
await processElement(starter, null, current_xpath, 1);
for (var element of elements) {
if (

View File

@@ -93,6 +93,7 @@ class SkyvernElement:
When you try to interact with these elements by python, you are supposed to use this class as an interface.
"""
# TODO: support to create SkyvernElement from incremental page by xpath
@classmethod
async def create_from_incremental(cls, incre_page: IncrementalScrapePage, element_id: str) -> SkyvernElement:
element_dict = incre_page.id_to_element_dict.get(element_id)
@@ -773,8 +774,20 @@ class DomUtil:
num_elements = await locator.count()
if num_elements < 1:
LOG.warning("No elements found with css. Validation failed.", css=css, element_id=element_id)
raise MissingElement(selector=css, element_id=element_id)
xpath: str | None = element.get("xpath")
if not xpath:
LOG.warning("No elements found with css. Validation failed.", css=css, element_id=element_id)
raise MissingElement(selector=css, element_id=element_id)
else:
# WARNING: current xpath is based on the tag name.
# It can only represent the element possition in the DOM tree with tag name, it's not 100% reliable.
# As long as the current possition has the same element with the tag name, the locator can be found.
# (maybe) we should validate the element hash to make sure the element is the same?
LOG.warning("Fallback to locator element by xpath.", xpath=xpath, element_id=element_id)
locator = frame_content.locator(f"xpath={xpath}")
num_elements = await locator.count()
if num_elements < 1:
raise MissingElement(selector=xpath, element_id=element_id)
elif num_elements > 1:
LOG.warning(