support to parse pseudo content (#1069)
This commit is contained in:
@@ -616,6 +616,30 @@ const isAngularDropdown = (element) => {
|
|||||||
return false;
|
return false;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
function getPseudoContent(element, pseudo) {
|
||||||
|
const pseudoStyle = getElementComputedStyle(element, pseudo);
|
||||||
|
if (!pseudoStyle) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
const content = pseudoStyle
|
||||||
|
.getPropertyValue("content")
|
||||||
|
.replace(/"/g, "")
|
||||||
|
.trim();
|
||||||
|
|
||||||
|
if (content === "none" || !content) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
return content;
|
||||||
|
}
|
||||||
|
|
||||||
|
function hasBeforeOrAfterPseudoContent(element) {
|
||||||
|
return (
|
||||||
|
getPseudoContent(element, "::before") != null ||
|
||||||
|
getPseudoContent(element, "::after") != null
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
const checkParentClass = (className) => {
|
const checkParentClass = (className) => {
|
||||||
const targetParentClasses = ["field", "entry"];
|
const targetParentClasses = ["field", "entry"];
|
||||||
for (let i = 0; i < targetParentClasses.length; i++) {
|
for (let i = 0; i < targetParentClasses.length; i++) {
|
||||||
@@ -876,7 +900,9 @@ function buildElementObject(frame, element, interactable, purgeable = false) {
|
|||||||
interactable: interactable,
|
interactable: interactable,
|
||||||
tagName: elementTagNameLower,
|
tagName: elementTagNameLower,
|
||||||
attributes: attrs,
|
attributes: attrs,
|
||||||
|
beforePseudoText: getPseudoContent(element, "::before"),
|
||||||
text: getElementContent(element),
|
text: getElementContent(element),
|
||||||
|
afterPseudoText: getPseudoContent(element, "::after"),
|
||||||
children: [],
|
children: [],
|
||||||
rect: DomUtils.getVisibleClientRect(element, true),
|
rect: DomUtils.getVisibleClientRect(element, true),
|
||||||
// if purgeable is True, which means this element is only used for building the tree relationship
|
// if purgeable is True, which means this element is only used for building the tree relationship
|
||||||
@@ -1020,6 +1046,8 @@ function buildElementTree(starter = document.body, frame, full_tree = false) {
|
|||||||
// build all table related elements into skyvern element
|
// build all table related elements into skyvern element
|
||||||
// we need these elements to preserve the DOM structure
|
// we need these elements to preserve the DOM structure
|
||||||
elementObj = buildElementObject(frame, element, false);
|
elementObj = buildElementObject(frame, element, false);
|
||||||
|
} else if (hasBeforeOrAfterPseudoContent(element)) {
|
||||||
|
elementObj = buildElementObject(frame, element, false);
|
||||||
} else if (full_tree) {
|
} else if (full_tree) {
|
||||||
// when building full tree, we only get text from element itself
|
// when building full tree, we only get text from element itself
|
||||||
// elements without text are purgeable
|
// elements without text are purgeable
|
||||||
|
|||||||
@@ -122,11 +122,20 @@ def json_to_html(element: dict, need_skyvern_attrs: bool = True) -> str:
|
|||||||
if element.get("purgeable", False):
|
if element.get("purgeable", False):
|
||||||
return children_html + option_html
|
return children_html + option_html
|
||||||
|
|
||||||
|
before_pseudo_text = element.get("beforePseudoText", "")
|
||||||
|
after_pseudo_text = element.get("afterPseudoText", "")
|
||||||
|
|
||||||
# Check if the element is self-closing
|
# Check if the element is self-closing
|
||||||
if tag in ["img", "input", "br", "hr", "meta", "link"] and not option_html and not children_html:
|
if (
|
||||||
|
tag in ["img", "input", "br", "hr", "meta", "link"]
|
||||||
|
and not option_html
|
||||||
|
and not children_html
|
||||||
|
and not before_pseudo_text
|
||||||
|
and not after_pseudo_text
|
||||||
|
):
|
||||||
return f'<{tag}{attributes_html if not attributes_html else " "+attributes_html}>'
|
return f'<{tag}{attributes_html if not attributes_html else " "+attributes_html}>'
|
||||||
else:
|
else:
|
||||||
return f'<{tag}{attributes_html if not attributes_html else " "+attributes_html}>{text}{children_html+option_html}</{tag}>'
|
return f'<{tag}{attributes_html if not attributes_html else " "+attributes_html}>{before_pseudo_text}{text}{children_html+option_html}{after_pseudo_text}</{tag}>'
|
||||||
|
|
||||||
|
|
||||||
def clean_element_before_hashing(element: dict) -> dict:
|
def clean_element_before_hashing(element: dict) -> dict:
|
||||||
@@ -602,6 +611,13 @@ def trim_element(element: dict) -> dict:
|
|||||||
element_text = str(queue_ele["text"]).strip()
|
element_text = str(queue_ele["text"]).strip()
|
||||||
if not element_text:
|
if not element_text:
|
||||||
del queue_ele["text"]
|
del queue_ele["text"]
|
||||||
|
|
||||||
|
if "beforePseudoText" in queue_ele and not queue_ele.get("beforePseudoText"):
|
||||||
|
del queue_ele["beforePseudoText"]
|
||||||
|
|
||||||
|
if "afterPseudoText" in queue_ele and not queue_ele.get("afterPseudoText"):
|
||||||
|
del queue_ele["afterPseudoText"]
|
||||||
|
|
||||||
return element
|
return element
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user