support svg element (#508)

This commit is contained in:
LawyZheng
2024-06-25 12:16:10 +08:00
committed by GitHub
parent 6a472da51e
commit 73d9f17bd0
4 changed files with 30 additions and 11 deletions

View File

@@ -727,6 +727,9 @@ async function buildTreeFromBody(frame = "main.frame", open_select = false) {
text: getElementContent(element),
children: [],
rect: DomUtils.getVisibleClientRect(element, true),
// don't trim any attr of this element if keepAllAttr=True
keepAllAttr:
elementTagNameLower === "svg" || element.closest("svg") !== null,
};
// get options for select element or for listbox element
@@ -859,19 +862,30 @@ async function buildTreeFromBody(frame = "main.frame", open_select = false) {
!isHidden(element) &&
!isScriptOrStyle(element)
) {
let textContent = "";
for (let i = 0; i < element.childNodes.length; i++) {
var node = element.childNodes[i];
if (node.nodeType === Node.TEXT_NODE) {
textContent += getVisibleText(node).trim();
let elementObj = null;
if (element.tagName.toLowerCase() === "svg") {
// if element is <svg> we save all attributes and its children
elementObj = buildElementObject(element, false);
} else if (element.closest("svg")) {
// if elemnet is the children of <svg>
elementObj = buildElementObject(element, false);
} else {
// character length limit for non-interactable elements should be 5000
// we don't use element context in HTML format,
// so we need to make sure we parse all text node to avoid missing text in HTML.
let textContent = "";
for (let i = 0; i < element.childNodes.length; i++) {
var node = element.childNodes[i];
if (node.nodeType === Node.TEXT_NODE) {
textContent += getVisibleText(node).trim();
}
}
if (textContent && textContent.length <= 5000) {
elementObj = buildElementObject(element, false);
}
}
// character length limit for non-interactable elements should be 5000
// we don't use element context in HTML format,
// so we need to make sure we parse all text node to avoid missing text in HTML.
if (textContent && textContent.length <= 5000) {
var elementObj = await buildElementObject(element, false);
if (elementObj !== null) {
elements.push(elementObj);
if (parentId === null) {
resultArray.push(elementObj);

View File

@@ -453,13 +453,16 @@ def trim_element_tree(elements: list[dict]) -> list[dict]:
if not queue_ele.get("interactable"):
del queue_ele["id"]
if "attributes" in queue_ele:
if "attributes" in queue_ele and not queue_ele.get("keepAllAttr", False):
tag_name = queue_ele["tagName"] if "tagName" in queue_ele else ""
new_attributes = _trimmed_attributes(tag_name, queue_ele["attributes"])
if new_attributes:
queue_ele["attributes"] = new_attributes
else:
del queue_ele["attributes"]
# remove the tag, don't need it in the HTML tree
del queue_ele["keepAllAttr"]
if "children" in queue_ele:
queue.extend(queue_ele["children"])
if not queue_ele["children"]: