optimize domutils (#1656)

This commit is contained in:
Shuchang Zheng
2025-01-27 22:01:15 +08:00
committed by GitHub
parent 7f2413e06d
commit f01502bdde
2 changed files with 96 additions and 134 deletions

View File

@@ -598,6 +598,10 @@ function isInteractable(element, hoverStylesMap) {
return false;
}
if (tagName === "frame") {
return false;
}
if (tagName === "a" && element.href) {
return true;
}
@@ -996,7 +1000,7 @@ function getElementContext(element, stopped_element) {
let childContext = "";
if (child.nodeType === Node.TEXT_NODE && isElementVisible(element)) {
if (!element.hasAttribute("unique_id")) {
childContext = getVisibleText(child).trim();
childContext = getElementText(child).trim();
}
} else if (child.nodeType === Node.ELEMENT_NODE) {
if (!child.hasAttribute("unique_id") && isElementVisible(child)) {
@@ -1033,13 +1037,30 @@ function getVisibleText(element) {
return visibleText.join(" ");
}
// only get text from element itself
function getElementText(element) {
if (element.nodeType === Node.TEXT_NODE) {
return element.data.trim();
}
let visibleText = [];
for (let i = 0; i < element.childNodes.length; i++) {
var node = element.childNodes[i];
let nodeText = "";
if (node.nodeType === Node.TEXT_NODE && (nodeText = node.data.trim())) {
visibleText.push(nodeText);
}
}
return visibleText.join(";");
}
function getElementContent(element, skipped_element = null) {
// DFS to get all the text content from all the nodes under the element
if (skipped_element && element === skipped_element) {
return "";
}
let textContent = getVisibleText(element);
let textContent = getElementText(element);
let nodeContent = "";
// if element has children, then build a list of text and join with a semicolon
if (element.childNodes.length > 0) {
@@ -1048,7 +1069,7 @@ function getElementContent(element, skipped_element = null) {
for (var child of element.childNodes) {
let childText = "";
if (child.nodeType === Node.TEXT_NODE) {
childText = getVisibleText(child).trim();
childText = getElementText(child).trim();
if (childText.length > 0) {
nodeTextContentList.push(childText);
}
@@ -1207,7 +1228,7 @@ function buildElementObject(frame, element, interactable, purgeable = false) {
tagName: elementTagNameLower,
attributes: attrs,
beforePseudoText: getPseudoContent(element, "::before"),
text: getElementContent(element),
text: getElementText(element),
afterPseudoText: getPseudoContent(element, "::after"),
children: [],
rect: DomUtils.getVisibleClientRect(element, true),
@@ -1279,148 +1300,93 @@ function buildElementTree(starter = document.body, frame, full_tree = false) {
return;
}
const tagName = element.tagName.toLowerCase();
// skip proccessing option element as they are already added to the select.options
if (element.tagName.toLowerCase() === "option") {
if (tagName === "option") {
return;
}
// if element is an "a" tag and has a target="_blank" attribute, remove the target attribute
// We're doing this so that skyvern can do all the navigation in a single page/tab and not open new tab
if (element.tagName.toLowerCase() === "a") {
if (tagName === "a") {
if (element.getAttribute("target") === "_blank") {
element.removeAttribute("target");
}
}
// Check if the element is interactable
var interactable = isInteractable(element, hoverStylesMap);
if (interactable || element.tagName.toLowerCase() === "frameset") {
var elementObj = buildElementObject(frame, element, interactable);
elements.push(elementObj);
// If the element is interactable but has no interactable parent,
// then it starts a new tree, so add it to the result array
// and set its id as the interactable parent id for the next elements
// under it
if (parentId === null) {
resultArray.push(elementObj);
}
// If the element is interactable and has an interactable parent,
// then add it to the children of the parent
else {
// TODO: use dict/object so that we access these in O(1) instead
elements
.find((element) => element.id === parentId)
.children.push(elementObj);
}
// Recursively process the children of the element
const children = getChildElements(element);
for (let i = 0; i < children.length; i++) {
const childElement = children[i];
processElement(childElement, elementObj.id);
}
return elementObj;
} else if (
element.tagName.toLowerCase() === "iframe" ||
element.tagName.toLowerCase() === "frame"
) {
let iframeElementObject = buildElementObject(frame, element, false);
elements.push(iframeElementObject);
resultArray.push(iframeElementObject);
} else if (element.shadowRoot) {
// shadow host element
let shadowHostElement = buildElementObject(frame, element, false);
elements.push(shadowHostElement);
resultArray.push(shadowHostElement);
const children = getChildElements(element.shadowRoot);
for (let i = 0; i < children.length; i++) {
const childElement = children[i];
processElement(childElement, shadowHostElement.id);
}
const selfChildren = getChildElements(element);
for (let i = 0; i < selfChildren.length; i++) {
const childElement = selfChildren[i];
processElement(childElement, shadowHostElement.id);
}
} else {
// For a non-interactable element, if it has direct text, we also tagged
// it with unique_id, but with interatable=false in the element.
// After that, process its children
// and check if any of them are interactable
let interactableChildren = [];
if (
isElementVisible(element) &&
!isHidden(element) &&
!isScriptOrStyle(element)
let children = [];
const isVisible = isElementVisible(element);
if (isVisible && !isHidden(element) && !isScriptOrStyle(element)) {
const interactable = isInteractable(element, hoverStylesMap);
let elementObj = null;
let isParentSVG = null;
if (interactable) {
elementObj = buildElementObject(frame, element, interactable);
} else if (
tagName === "frameset" ||
tagName === "iframe" ||
tagName === "frame"
) {
let elementObj = null;
let isParentSVG = element.closest("svg");
if (element.tagName.toLowerCase() === "svg") {
// if element is <svg> we save all attributes and its children
elementObj = buildElementObject(frame, element, false);
} else if (isParentSVG && isParentSVG.getAttribute("unique_id")) {
// if elemnet is the children of the <svg> with an unique_id
elementObj = buildElementObject(frame, element, false);
} else if (isTableRelatedElement(element)) {
// build all table related elements into skyvern element
// we need these elements to preserve the DOM structure
elementObj = buildElementObject(frame, element, false);
} else if (hasBeforeOrAfterPseudoContent(element)) {
elementObj = buildElementObject(frame, element, false);
} else if (full_tree) {
// when building full tree, we only get text from element itself
// elements without text are purgeable
elementObj = buildElementObject(frame, element, false, true);
let textContent = "";
if (isElementVisible(element)) {
for (let i = 0; i < element.childNodes.length; i++) {
var node = element.childNodes[i];
if (node.nodeType === Node.TEXT_NODE) {
textContent += node.data.trim();
}
}
}
elementObj.text = textContent;
if (textContent.length > 0) {
elementObj.purgeable = false;
}
} else {
// character length limit for non-interactable elements should be 5000
// we don't use element context in HTML format,
// so we need to make sure we parse all text node to avoid missing text in HTML.
let textContent = "";
for (let i = 0; i < element.childNodes.length; i++) {
var node = element.childNodes[i];
if (node.nodeType === Node.TEXT_NODE) {
textContent += getVisibleText(node).trim();
}
}
if (textContent && textContent.length <= 5000) {
elementObj = buildElementObject(frame, element, false);
}
}
if (elementObj !== null) {
elements.push(elementObj);
if (parentId === null) {
resultArray.push(elementObj);
} else {
// TODO: use dict/object so that we access these in O(1) instead
elements
.find((element) => element.id === parentId)
.children.push(elementObj);
}
parentId = elementObj.id;
elementObj = buildElementObject(frame, element, interactable);
} else if (element.shadowRoot) {
elementObj = buildElementObject(frame, element, interactable);
children = getChildElements(element.shadowRoot);
} else if (isTableRelatedElement(element)) {
// build all table related elements into skyvern element
// we need these elements to preserve the DOM structure
elementObj = buildElementObject(frame, element, interactable);
} else if (hasBeforeOrAfterPseudoContent(element)) {
elementObj = buildElementObject(frame, element, interactable);
} else if (tagName === "svg") {
elementObj = buildElementObject(frame, element, interactable);
} else if (
(isParentSVG = element.closest("svg")) &&
isParentSVG.getAttribute("unique_id")
) {
// if elemnet is the children of the <svg> with an unique_id
elementObj = buildElementObject(frame, element, interactable);
} else if (
getElementText(element).length > 0 &&
getElementText(element).length <= 5000
) {
elementObj = buildElementObject(frame, element, interactable);
} else if (full_tree) {
// when building full tree, we only get text from element itself
// elements without text are purgeable
elementObj = buildElementObject(frame, element, interactable, true);
if (elementObj.text.length > 0) {
elementObj.purgeable = false;
}
}
const children = getChildElements(element);
for (let i = 0; i < children.length; i++) {
const childElement = children[i];
processElement(childElement, parentId);
if (elementObj) {
elements.push(elementObj);
// If the element is interactable but has no interactable parent,
// then it starts a new tree, so add it to the result array
// and set its id as the interactable parent id for the next elements
// under it
if (parentId === null) {
resultArray.push(elementObj);
}
// If the element is interactable and has an interactable parent,
// then add it to the children of the parent
else {
// TODO: use dict/object so that we access these in O(1) instead
elements
.find((element) => element.id === parentId)
.children.push(elementObj);
}
parentId = elementObj.id;
}
}
children = children.concat(getChildElements(element));
for (let i = 0; i < children.length; i++) {
const childElement = children[i];
processElement(childElement, parentId);
}
return;
}
const getContextByParent = (element, ctx) => {

View File

@@ -500,11 +500,7 @@ async def get_interactable_element_tree_in_frame(
for element in elements:
if element["id"] == unique_id:
element["children"] = frame_elements
for element_tree_item in element_tree:
if element_tree_item["id"] == unique_id:
element_tree_item["children"] = frame_element_tree
element["children"] = frame_element_tree
elements = elements + frame_elements