From 31d6dbdacd78a0981824043c1804ebea42f6ccfb Mon Sep 17 00:00:00 2001 From: Shuchang Zheng Date: Wed, 28 May 2025 00:55:01 -0700 Subject: [PATCH] stop removing target attr when scraping (#2495) --- skyvern/webeye/scraper/domUtils.js | 9 --------- skyvern/webeye/scraper/scraper.py | 3 --- skyvern/webeye/utils/dom.py | 11 ++++++++++- skyvern/webeye/utils/page.py | 4 ++++ 4 files changed, 14 insertions(+), 13 deletions(-) diff --git a/skyvern/webeye/scraper/domUtils.js b/skyvern/webeye/scraper/domUtils.js index 1da9185b..1e8cfdad 100644 --- a/skyvern/webeye/scraper/domUtils.js +++ b/skyvern/webeye/scraper/domUtils.js @@ -1378,15 +1378,6 @@ async function buildElementObject( isSelect2MultiChoice(element), }; - // if element is an "a" tag and has a target="_blank" attribute, remove the target attribute but keep it in the elementObj - // We're doing this so that skyvern can do all the navigation in a single page/tab and not open new tab - if (elementTagNameLower === "a") { - if (element.getAttribute("target") === "_blank") { - elementObj.target = "_blank"; - element.removeAttribute("target"); - } - } - let isInShadowRoot = element.getRootNode() instanceof ShadowRoot; if (isInShadowRoot) { let shadowHostEle = element.getRootNode().host; diff --git a/skyvern/webeye/scraper/scraper.py b/skyvern/webeye/scraper/scraper.py index e4c07960..1163d68d 100644 --- a/skyvern/webeye/scraper/scraper.py +++ b/skyvern/webeye/scraper/scraper.py @@ -869,9 +869,6 @@ def trim_element(element: dict) -> dict: if "afterPseudoText" in queue_ele and not queue_ele.get("afterPseudoText"): del queue_ele["afterPseudoText"] - if "target" in queue_ele: - del queue_ele["target"] - return element diff --git a/skyvern/webeye/utils/dom.py b/skyvern/webeye/utils/dom.py index 0ecf2aa5..93693942 100644 --- a/skyvern/webeye/utils/dom.py +++ b/skyvern/webeye/utils/dom.py @@ -138,6 +138,13 @@ class SkyvernElement: def __repr__(self) -> str: return f"SkyvernElement({str(self.__static_element)})" + async def _trim_target_attr(self) -> None: + if "target" not in self.get_attributes(): + return + LOG.debug("Removing target attribute from the element", element=self.get_id()) + skyvern_frame = await SkyvernFrame.create_instance(self.get_frame()) + await skyvern_frame.remove_target_attr(await self.get_element_handler()) + def build_HTML(self, need_trim_element: bool = True, need_skyvern_attrs: bool = True) -> str: element_dict = self.get_element_dict() if need_trim_element: @@ -352,7 +359,7 @@ class SkyvernElement: return handler async def should_use_navigation_instead_click(self, page: Page) -> str | None: - if self.__static_element.get("target") != "_blank": + if await self.get_attr("target", mode="static") != "_blank": return None href: str | None = await self.get_attr("href", mode="static") @@ -763,10 +770,12 @@ class SkyvernElement: async def navigate_to_a_href(self, page: Page) -> str | None: if self.get_tag_name() != InteractiveElement.A: + await self._trim_target_attr() return None href = await self.should_use_navigation_instead_click(page) if not href: + await self._trim_target_attr() return None LOG.info( diff --git a/skyvern/webeye/utils/page.py b/skyvern/webeye/utils/page.py index 69c31d7f..71a8e845 100644 --- a/skyvern/webeye/utils/page.py +++ b/skyvern/webeye/utils/page.py @@ -287,3 +287,7 @@ class SkyvernFrame: async def click_element_in_javascript(self, element: ElementHandle) -> None: js_script = "(element) => element.click()" return await self.evaluate(frame=self.frame, expression=js_script, arg=element) + + async def remove_target_attr(self, element: ElementHandle) -> None: + js_script = "(element) => element.removeAttribute('target')" + return await self.evaluate(frame=self.frame, expression=js_script, arg=element)