diff --git a/skyvern/webeye/scraper/domUtils.js b/skyvern/webeye/scraper/domUtils.js index 731fcc6e..1f604c16 100644 --- a/skyvern/webeye/scraper/domUtils.js +++ b/skyvern/webeye/scraper/domUtils.js @@ -616,6 +616,30 @@ const isAngularDropdown = (element) => { return false; }; +function getPseudoContent(element, pseudo) { + const pseudoStyle = getElementComputedStyle(element, pseudo); + if (!pseudoStyle) { + return null; + } + const content = pseudoStyle + .getPropertyValue("content") + .replace(/"/g, "") + .trim(); + + if (content === "none" || !content) { + return null; + } + + return content; +} + +function hasBeforeOrAfterPseudoContent(element) { + return ( + getPseudoContent(element, "::before") != null || + getPseudoContent(element, "::after") != null + ); +} + const checkParentClass = (className) => { const targetParentClasses = ["field", "entry"]; for (let i = 0; i < targetParentClasses.length; i++) { @@ -876,7 +900,9 @@ function buildElementObject(frame, element, interactable, purgeable = false) { interactable: interactable, tagName: elementTagNameLower, attributes: attrs, + beforePseudoText: getPseudoContent(element, "::before"), text: getElementContent(element), + afterPseudoText: getPseudoContent(element, "::after"), children: [], rect: DomUtils.getVisibleClientRect(element, true), // if purgeable is True, which means this element is only used for building the tree relationship @@ -1020,6 +1046,8 @@ function buildElementTree(starter = document.body, frame, full_tree = false) { // build all table related elements into skyvern element // we need these elements to preserve the DOM structure elementObj = buildElementObject(frame, element, false); + } else if (hasBeforeOrAfterPseudoContent(element)) { + elementObj = buildElementObject(frame, element, false); } else if (full_tree) { // when building full tree, we only get text from element itself // elements without text are purgeable diff --git a/skyvern/webeye/scraper/scraper.py b/skyvern/webeye/scraper/scraper.py index 93062eb1..d51e16b7 100644 --- a/skyvern/webeye/scraper/scraper.py +++ b/skyvern/webeye/scraper/scraper.py @@ -122,11 +122,20 @@ def json_to_html(element: dict, need_skyvern_attrs: bool = True) -> str: if element.get("purgeable", False): return children_html + option_html + before_pseudo_text = element.get("beforePseudoText", "") + after_pseudo_text = element.get("afterPseudoText", "") + # Check if the element is self-closing - if tag in ["img", "input", "br", "hr", "meta", "link"] and not option_html and not children_html: + if ( + tag in ["img", "input", "br", "hr", "meta", "link"] + and not option_html + and not children_html + and not before_pseudo_text + and not after_pseudo_text + ): return f'<{tag}{attributes_html if not attributes_html else " "+attributes_html}>' else: - return f'<{tag}{attributes_html if not attributes_html else " "+attributes_html}>{text}{children_html+option_html}' + return f'<{tag}{attributes_html if not attributes_html else " "+attributes_html}>{before_pseudo_text}{text}{children_html+option_html}{after_pseudo_text}' def clean_element_before_hashing(element: dict) -> dict: @@ -602,6 +611,13 @@ def trim_element(element: dict) -> dict: element_text = str(queue_ele["text"]).strip() if not element_text: del queue_ele["text"] + + if "beforePseudoText" in queue_ele and not queue_ele.get("beforePseudoText"): + del queue_ele["beforePseudoText"] + + if "afterPseudoText" in queue_ele and not queue_ele.get("afterPseudoText"): + del queue_ele["afterPseudoText"] + return element