Update domutils with getVisibleText (#460)

This commit is contained in:
Kerem Yilmaz
2024-06-11 22:33:37 -07:00
committed by GitHub
parent f3478ffc65
commit 8e21c8b39e

View File

@@ -480,12 +480,12 @@ function getElementContext(element) {
// if the element already has a context, then add it to the list first // if the element already has a context, then add it to the list first
for (var child of element.childNodes) { for (var child of element.childNodes) {
let childContext = ""; let childContext = "";
if (child.nodeType === Node.TEXT_NODE) { if (child.nodeType === Node.TEXT_NODE && isElementVisible(element)) {
if (!element.hasAttribute("unique_id")) { if (!element.hasAttribute("unique_id")) {
childContext = child.data.trim(); childContext = getVisibleText(child).trim();
} }
} else if (child.nodeType === Node.ELEMENT_NODE) { } else if (child.nodeType === Node.ELEMENT_NODE) {
if (!child.hasAttribute("unique_id")) { if (!child.hasAttribute("unique_id") && isElementVisible(child)) {
childContext = getElementContext(child); childContext = getElementContext(child);
} }
} }
@@ -496,13 +496,36 @@ function getElementContext(element) {
return fullContext.join(";"); return fullContext.join(";");
} }
function getVisibleText(element) {
let visibleText = [];
function collectVisibleText(node) {
if (
node.nodeType === Node.TEXT_NODE &&
isElementVisible(node.parentElement)
) {
const trimmedText = node.data.trim();
if (trimmedText.length > 0) {
visibleText.push(trimmedText);
}
} else if (node.nodeType === Node.ELEMENT_NODE && isElementVisible(node)) {
for (let child of node.childNodes) {
collectVisibleText(child);
}
}
}
collectVisibleText(element);
return visibleText.join(" ");
}
function getElementContent(element, skipped_element = null) { function getElementContent(element, skipped_element = null) {
// DFS to get all the text content from all the nodes under the element // DFS to get all the text content from all the nodes under the element
if (skipped_element && element === skipped_element) { if (skipped_element && element === skipped_element) {
return ""; return "";
} }
let textContent = element.textContent; let textContent = getVisibleText(element);
let nodeContent = ""; let nodeContent = "";
// if element has children, then build a list of text and join with a semicolon // if element has children, then build a list of text and join with a semicolon
if (element.childNodes.length > 0) { if (element.childNodes.length > 0) {
@@ -511,8 +534,10 @@ function getElementContent(element, skipped_element = null) {
for (var child of element.childNodes) { for (var child of element.childNodes) {
let childText = ""; let childText = "";
if (child.nodeType === Node.TEXT_NODE) { if (child.nodeType === Node.TEXT_NODE) {
childText = child.data.trim(); childText = getVisibleText(child).trim();
nodeTextContentList.push(childText); if (childText.length > 0) {
nodeTextContentList.push(childText);
}
} else if (child.nodeType === Node.ELEMENT_NODE) { } else if (child.nodeType === Node.ELEMENT_NODE) {
// childText = child.textContent.trim(); // childText = child.textContent.trim();
childText = getElementContent(child, skipped_element); childText = getElementContent(child, skipped_element);
@@ -563,7 +588,7 @@ function getListboxOptions(element) {
selectOptions.push({ selectOptions.push({
optionIndex: i, optionIndex: i,
text: removeMultipleSpaces(ele.textContent), text: removeMultipleSpaces(getVisibleText(ele)),
}); });
} }
return selectOptions; return selectOptions;
@@ -785,7 +810,7 @@ function buildTreeFromBody(frame = "main.frame") {
for (let i = 0; i < element.childNodes.length; i++) { for (let i = 0; i < element.childNodes.length; i++) {
var node = element.childNodes[i]; var node = element.childNodes[i];
if (node.nodeType === Node.TEXT_NODE) { if (node.nodeType === Node.TEXT_NODE) {
textContent += node.textContent.trim(); textContent += getVisibleText(node).trim();
} }
} }