stop removing target attr when scraping (#2495)

This commit is contained in:
Shuchang Zheng
2025-05-28 00:55:01 -07:00
committed by GitHub
parent 91a666f705
commit 31d6dbdacd
4 changed files with 14 additions and 13 deletions

View File

@@ -1378,15 +1378,6 @@ async function buildElementObject(
isSelect2MultiChoice(element),
};
// if element is an "a" tag and has a target="_blank" attribute, remove the target attribute but keep it in the elementObj
// We're doing this so that skyvern can do all the navigation in a single page/tab and not open new tab
if (elementTagNameLower === "a") {
if (element.getAttribute("target") === "_blank") {
elementObj.target = "_blank";
element.removeAttribute("target");
}
}
let isInShadowRoot = element.getRootNode() instanceof ShadowRoot;
if (isInShadowRoot) {
let shadowHostEle = element.getRootNode().host;

View File

@@ -869,9 +869,6 @@ def trim_element(element: dict) -> dict:
if "afterPseudoText" in queue_ele and not queue_ele.get("afterPseudoText"):
del queue_ele["afterPseudoText"]
if "target" in queue_ele:
del queue_ele["target"]
return element

View File

@@ -138,6 +138,13 @@ class SkyvernElement:
def __repr__(self) -> str:
return f"SkyvernElement({str(self.__static_element)})"
async def _trim_target_attr(self) -> None:
if "target" not in self.get_attributes():
return
LOG.debug("Removing target attribute from the element", element=self.get_id())
skyvern_frame = await SkyvernFrame.create_instance(self.get_frame())
await skyvern_frame.remove_target_attr(await self.get_element_handler())
def build_HTML(self, need_trim_element: bool = True, need_skyvern_attrs: bool = True) -> str:
element_dict = self.get_element_dict()
if need_trim_element:
@@ -352,7 +359,7 @@ class SkyvernElement:
return handler
async def should_use_navigation_instead_click(self, page: Page) -> str | None:
if self.__static_element.get("target") != "_blank":
if await self.get_attr("target", mode="static") != "_blank":
return None
href: str | None = await self.get_attr("href", mode="static")
@@ -763,10 +770,12 @@ class SkyvernElement:
async def navigate_to_a_href(self, page: Page) -> str | None:
if self.get_tag_name() != InteractiveElement.A:
await self._trim_target_attr()
return None
href = await self.should_use_navigation_instead_click(page)
if not href:
await self._trim_target_attr()
return None
LOG.info(

View File

@@ -287,3 +287,7 @@ class SkyvernFrame:
async def click_element_in_javascript(self, element: ElementHandle) -> None:
js_script = "(element) => element.click()"
return await self.evaluate(frame=self.frame, expression=js_script, arg=element)
async def remove_target_attr(self, element: ElementHandle) -> None:
js_script = "(element) => element.removeAttribute('target')"
return await self.evaluate(frame=self.frame, expression=js_script, arg=element)