support table parse (#708)

This commit is contained in:
LawyZheng
2024-08-15 09:32:18 +08:00
committed by GitHub
parent faf939d29d
commit 4a8b1473ec

View File

@@ -303,6 +303,22 @@ function hasWidgetRole(element) {
return widgetRoles.includes(role.toLowerCase().trim());
}
function isTableRelatedElement(element) {
const tagName = element.tagName.toLowerCase();
return [
"table",
"caption",
"thead",
"tbody",
"tfoot",
"tr",
"th",
"td",
"colgroup",
"col",
].includes(tagName);
}
function isInteractableInput(element) {
const tagName = element.tagName.toLowerCase();
const type = element.getAttribute("type") ?? "text"; // Default is text: https://www.w3schools.com/html/html_form_input_types.asp
@@ -1051,6 +1067,10 @@ async function buildElementTree(
} else if (isParentSVG && isParentSVG.getAttribute("unique_id")) {
// if elemnet is the children of the <svg> with an unique_id
elementObj = await buildElementObject(element, false);
} else if (isTableRelatedElement(element)) {
// build all table related elements into skyvern element
// we need these elements to preserve the DOM structure
elementObj = await buildElementObject(element, false);
} else {
// character length limit for non-interactable elements should be 5000
// we don't use element context in HTML format,