generate element xpath (#2335)
This commit is contained in:
@@ -1421,7 +1421,12 @@ async function buildElementTree(
|
|||||||
return [];
|
return [];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
async function processElement(element, parentId) {
|
async function processElement(
|
||||||
|
element,
|
||||||
|
parentId,
|
||||||
|
parent_xpath,
|
||||||
|
current_node_index,
|
||||||
|
) {
|
||||||
if (element === null) {
|
if (element === null) {
|
||||||
_jsConsoleLog("get a null element");
|
_jsConsoleLog("get a null element");
|
||||||
return;
|
return;
|
||||||
@@ -1438,6 +1443,20 @@ async function buildElementTree(
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let current_xpath = null;
|
||||||
|
if (parent_xpath) {
|
||||||
|
// ignore the namespace, otherwise the xpath sometimes won't find anything, specially for SVG elements
|
||||||
|
current_xpath =
|
||||||
|
parent_xpath +
|
||||||
|
"/" +
|
||||||
|
'*[name()="' +
|
||||||
|
tagName +
|
||||||
|
'"]' +
|
||||||
|
"[" +
|
||||||
|
current_node_index +
|
||||||
|
"]";
|
||||||
|
}
|
||||||
|
|
||||||
// if element is an "a" tag and has a target="_blank" attribute, remove the target attribute
|
// if element is an "a" tag and has a target="_blank" attribute, remove the target attribute
|
||||||
// We're doing this so that skyvern can do all the navigation in a single page/tab and not open new tab
|
// We're doing this so that skyvern can do all the navigation in a single page/tab and not open new tab
|
||||||
if (tagName === "a") {
|
if (tagName === "a") {
|
||||||
@@ -1446,10 +1465,10 @@ async function buildElementTree(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let children = [];
|
let shadowDOMchildren = [];
|
||||||
// sometimes the shadowRoot is not visible, but the elemnets in the shadowRoot are visible
|
// sometimes the shadowRoot is not visible, but the elemnets in the shadowRoot are visible
|
||||||
if (element.shadowRoot) {
|
if (element.shadowRoot) {
|
||||||
children = getChildElements(element.shadowRoot);
|
shadowDOMchildren = getChildElements(element.shadowRoot);
|
||||||
}
|
}
|
||||||
const isVisible = isElementVisible(element);
|
const isVisible = isElementVisible(element);
|
||||||
if (isVisible && !isHidden(element) && !isScriptOrStyle(element)) {
|
if (isVisible && !isHidden(element) && !isScriptOrStyle(element)) {
|
||||||
@@ -1500,6 +1519,7 @@ async function buildElementTree(
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (elementObj) {
|
if (elementObj) {
|
||||||
|
elementObj.xpath = current_xpath;
|
||||||
elements.push(elementObj);
|
elements.push(elementObj);
|
||||||
// If the element is interactable but has no interactable parent,
|
// If the element is interactable but has no interactable parent,
|
||||||
// then it starts a new tree, so add it to the result array
|
// then it starts a new tree, so add it to the result array
|
||||||
@@ -1520,10 +1540,35 @@ async function buildElementTree(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
children = children.concat(getChildElements(element));
|
const children = getChildElements(element);
|
||||||
|
const xpathMap = new Map();
|
||||||
|
|
||||||
for (let i = 0; i < children.length; i++) {
|
for (let i = 0; i < children.length; i++) {
|
||||||
const childElement = children[i];
|
const childElement = children[i];
|
||||||
await processElement(childElement, parentId);
|
const tagName = childElement?.tagName?.toLowerCase();
|
||||||
|
if (!tagName) {
|
||||||
|
_jsConsoleLog("get a null tagName");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
let current_node_index = xpathMap.get(tagName);
|
||||||
|
if (current_node_index == undefined) {
|
||||||
|
current_node_index = 1;
|
||||||
|
} else {
|
||||||
|
current_node_index = current_node_index + 1;
|
||||||
|
}
|
||||||
|
xpathMap.set(tagName, current_node_index);
|
||||||
|
await processElement(
|
||||||
|
childElement,
|
||||||
|
parentId,
|
||||||
|
current_xpath,
|
||||||
|
current_node_index,
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
// FIXME: xpath won't work when the element is in shadow DOM
|
||||||
|
for (let i = 0; i < shadowDOMchildren.length; i++) {
|
||||||
|
const childElement = shadowDOMchildren[i];
|
||||||
|
await processElement(childElement, parentId, null, 0);
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
@@ -1732,8 +1777,13 @@ async function buildElementTree(
|
|||||||
return trimmedResults;
|
return trimmedResults;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let current_xpath = null;
|
||||||
|
if (starter === document.body) {
|
||||||
|
current_xpath = "/html[1]";
|
||||||
|
}
|
||||||
|
|
||||||
// setup before parsing the dom
|
// setup before parsing the dom
|
||||||
await processElement(starter, null);
|
await processElement(starter, null, current_xpath, 1);
|
||||||
|
|
||||||
for (var element of elements) {
|
for (var element of elements) {
|
||||||
if (
|
if (
|
||||||
|
|||||||
@@ -93,6 +93,7 @@ class SkyvernElement:
|
|||||||
When you try to interact with these elements by python, you are supposed to use this class as an interface.
|
When you try to interact with these elements by python, you are supposed to use this class as an interface.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
# TODO: support to create SkyvernElement from incremental page by xpath
|
||||||
@classmethod
|
@classmethod
|
||||||
async def create_from_incremental(cls, incre_page: IncrementalScrapePage, element_id: str) -> SkyvernElement:
|
async def create_from_incremental(cls, incre_page: IncrementalScrapePage, element_id: str) -> SkyvernElement:
|
||||||
element_dict = incre_page.id_to_element_dict.get(element_id)
|
element_dict = incre_page.id_to_element_dict.get(element_id)
|
||||||
@@ -773,8 +774,20 @@ class DomUtil:
|
|||||||
|
|
||||||
num_elements = await locator.count()
|
num_elements = await locator.count()
|
||||||
if num_elements < 1:
|
if num_elements < 1:
|
||||||
LOG.warning("No elements found with css. Validation failed.", css=css, element_id=element_id)
|
xpath: str | None = element.get("xpath")
|
||||||
raise MissingElement(selector=css, element_id=element_id)
|
if not xpath:
|
||||||
|
LOG.warning("No elements found with css. Validation failed.", css=css, element_id=element_id)
|
||||||
|
raise MissingElement(selector=css, element_id=element_id)
|
||||||
|
else:
|
||||||
|
# WARNING: current xpath is based on the tag name.
|
||||||
|
# It can only represent the element possition in the DOM tree with tag name, it's not 100% reliable.
|
||||||
|
# As long as the current possition has the same element with the tag name, the locator can be found.
|
||||||
|
# (maybe) we should validate the element hash to make sure the element is the same?
|
||||||
|
LOG.warning("Fallback to locator element by xpath.", xpath=xpath, element_id=element_id)
|
||||||
|
locator = frame_content.locator(f"xpath={xpath}")
|
||||||
|
num_elements = await locator.count()
|
||||||
|
if num_elements < 1:
|
||||||
|
raise MissingElement(selector=xpath, element_id=element_id)
|
||||||
|
|
||||||
elif num_elements > 1:
|
elif num_elements > 1:
|
||||||
LOG.warning(
|
LOG.warning(
|
||||||
|
|||||||
Reference in New Issue
Block a user