generate element xpath (#2335)
This commit is contained in:
@@ -1421,7 +1421,12 @@ async function buildElementTree(
|
||||
return [];
|
||||
}
|
||||
}
|
||||
async function processElement(element, parentId) {
|
||||
async function processElement(
|
||||
element,
|
||||
parentId,
|
||||
parent_xpath,
|
||||
current_node_index,
|
||||
) {
|
||||
if (element === null) {
|
||||
_jsConsoleLog("get a null element");
|
||||
return;
|
||||
@@ -1438,6 +1443,20 @@ async function buildElementTree(
|
||||
return;
|
||||
}
|
||||
|
||||
let current_xpath = null;
|
||||
if (parent_xpath) {
|
||||
// ignore the namespace, otherwise the xpath sometimes won't find anything, specially for SVG elements
|
||||
current_xpath =
|
||||
parent_xpath +
|
||||
"/" +
|
||||
'*[name()="' +
|
||||
tagName +
|
||||
'"]' +
|
||||
"[" +
|
||||
current_node_index +
|
||||
"]";
|
||||
}
|
||||
|
||||
// if element is an "a" tag and has a target="_blank" attribute, remove the target attribute
|
||||
// We're doing this so that skyvern can do all the navigation in a single page/tab and not open new tab
|
||||
if (tagName === "a") {
|
||||
@@ -1446,10 +1465,10 @@ async function buildElementTree(
|
||||
}
|
||||
}
|
||||
|
||||
let children = [];
|
||||
let shadowDOMchildren = [];
|
||||
// sometimes the shadowRoot is not visible, but the elemnets in the shadowRoot are visible
|
||||
if (element.shadowRoot) {
|
||||
children = getChildElements(element.shadowRoot);
|
||||
shadowDOMchildren = getChildElements(element.shadowRoot);
|
||||
}
|
||||
const isVisible = isElementVisible(element);
|
||||
if (isVisible && !isHidden(element) && !isScriptOrStyle(element)) {
|
||||
@@ -1500,6 +1519,7 @@ async function buildElementTree(
|
||||
}
|
||||
|
||||
if (elementObj) {
|
||||
elementObj.xpath = current_xpath;
|
||||
elements.push(elementObj);
|
||||
// If the element is interactable but has no interactable parent,
|
||||
// then it starts a new tree, so add it to the result array
|
||||
@@ -1520,10 +1540,35 @@ async function buildElementTree(
|
||||
}
|
||||
}
|
||||
|
||||
children = children.concat(getChildElements(element));
|
||||
const children = getChildElements(element);
|
||||
const xpathMap = new Map();
|
||||
|
||||
for (let i = 0; i < children.length; i++) {
|
||||
const childElement = children[i];
|
||||
await processElement(childElement, parentId);
|
||||
const tagName = childElement?.tagName?.toLowerCase();
|
||||
if (!tagName) {
|
||||
_jsConsoleLog("get a null tagName");
|
||||
continue;
|
||||
}
|
||||
let current_node_index = xpathMap.get(tagName);
|
||||
if (current_node_index == undefined) {
|
||||
current_node_index = 1;
|
||||
} else {
|
||||
current_node_index = current_node_index + 1;
|
||||
}
|
||||
xpathMap.set(tagName, current_node_index);
|
||||
await processElement(
|
||||
childElement,
|
||||
parentId,
|
||||
current_xpath,
|
||||
current_node_index,
|
||||
);
|
||||
}
|
||||
|
||||
// FIXME: xpath won't work when the element is in shadow DOM
|
||||
for (let i = 0; i < shadowDOMchildren.length; i++) {
|
||||
const childElement = shadowDOMchildren[i];
|
||||
await processElement(childElement, parentId, null, 0);
|
||||
}
|
||||
return;
|
||||
}
|
||||
@@ -1732,8 +1777,13 @@ async function buildElementTree(
|
||||
return trimmedResults;
|
||||
};
|
||||
|
||||
let current_xpath = null;
|
||||
if (starter === document.body) {
|
||||
current_xpath = "/html[1]";
|
||||
}
|
||||
|
||||
// setup before parsing the dom
|
||||
await processElement(starter, null);
|
||||
await processElement(starter, null, current_xpath, 1);
|
||||
|
||||
for (var element of elements) {
|
||||
if (
|
||||
|
||||
@@ -93,6 +93,7 @@ class SkyvernElement:
|
||||
When you try to interact with these elements by python, you are supposed to use this class as an interface.
|
||||
"""
|
||||
|
||||
# TODO: support to create SkyvernElement from incremental page by xpath
|
||||
@classmethod
|
||||
async def create_from_incremental(cls, incre_page: IncrementalScrapePage, element_id: str) -> SkyvernElement:
|
||||
element_dict = incre_page.id_to_element_dict.get(element_id)
|
||||
@@ -773,8 +774,20 @@ class DomUtil:
|
||||
|
||||
num_elements = await locator.count()
|
||||
if num_elements < 1:
|
||||
LOG.warning("No elements found with css. Validation failed.", css=css, element_id=element_id)
|
||||
raise MissingElement(selector=css, element_id=element_id)
|
||||
xpath: str | None = element.get("xpath")
|
||||
if not xpath:
|
||||
LOG.warning("No elements found with css. Validation failed.", css=css, element_id=element_id)
|
||||
raise MissingElement(selector=css, element_id=element_id)
|
||||
else:
|
||||
# WARNING: current xpath is based on the tag name.
|
||||
# It can only represent the element possition in the DOM tree with tag name, it's not 100% reliable.
|
||||
# As long as the current possition has the same element with the tag name, the locator can be found.
|
||||
# (maybe) we should validate the element hash to make sure the element is the same?
|
||||
LOG.warning("Fallback to locator element by xpath.", xpath=xpath, element_id=element_id)
|
||||
locator = frame_content.locator(f"xpath={xpath}")
|
||||
num_elements = await locator.count()
|
||||
if num_elements < 1:
|
||||
raise MissingElement(selector=xpath, element_id=element_id)
|
||||
|
||||
elif num_elements > 1:
|
||||
LOG.warning(
|
||||
|
||||
Reference in New Issue
Block a user