stop removing target attr when scraping (#2495)
This commit is contained in:
@@ -1378,15 +1378,6 @@ async function buildElementObject(
|
|||||||
isSelect2MultiChoice(element),
|
isSelect2MultiChoice(element),
|
||||||
};
|
};
|
||||||
|
|
||||||
// if element is an "a" tag and has a target="_blank" attribute, remove the target attribute but keep it in the elementObj
|
|
||||||
// We're doing this so that skyvern can do all the navigation in a single page/tab and not open new tab
|
|
||||||
if (elementTagNameLower === "a") {
|
|
||||||
if (element.getAttribute("target") === "_blank") {
|
|
||||||
elementObj.target = "_blank";
|
|
||||||
element.removeAttribute("target");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let isInShadowRoot = element.getRootNode() instanceof ShadowRoot;
|
let isInShadowRoot = element.getRootNode() instanceof ShadowRoot;
|
||||||
if (isInShadowRoot) {
|
if (isInShadowRoot) {
|
||||||
let shadowHostEle = element.getRootNode().host;
|
let shadowHostEle = element.getRootNode().host;
|
||||||
|
|||||||
@@ -869,9 +869,6 @@ def trim_element(element: dict) -> dict:
|
|||||||
if "afterPseudoText" in queue_ele and not queue_ele.get("afterPseudoText"):
|
if "afterPseudoText" in queue_ele and not queue_ele.get("afterPseudoText"):
|
||||||
del queue_ele["afterPseudoText"]
|
del queue_ele["afterPseudoText"]
|
||||||
|
|
||||||
if "target" in queue_ele:
|
|
||||||
del queue_ele["target"]
|
|
||||||
|
|
||||||
return element
|
return element
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -138,6 +138,13 @@ class SkyvernElement:
|
|||||||
def __repr__(self) -> str:
|
def __repr__(self) -> str:
|
||||||
return f"SkyvernElement({str(self.__static_element)})"
|
return f"SkyvernElement({str(self.__static_element)})"
|
||||||
|
|
||||||
|
async def _trim_target_attr(self) -> None:
|
||||||
|
if "target" not in self.get_attributes():
|
||||||
|
return
|
||||||
|
LOG.debug("Removing target attribute from the element", element=self.get_id())
|
||||||
|
skyvern_frame = await SkyvernFrame.create_instance(self.get_frame())
|
||||||
|
await skyvern_frame.remove_target_attr(await self.get_element_handler())
|
||||||
|
|
||||||
def build_HTML(self, need_trim_element: bool = True, need_skyvern_attrs: bool = True) -> str:
|
def build_HTML(self, need_trim_element: bool = True, need_skyvern_attrs: bool = True) -> str:
|
||||||
element_dict = self.get_element_dict()
|
element_dict = self.get_element_dict()
|
||||||
if need_trim_element:
|
if need_trim_element:
|
||||||
@@ -352,7 +359,7 @@ class SkyvernElement:
|
|||||||
return handler
|
return handler
|
||||||
|
|
||||||
async def should_use_navigation_instead_click(self, page: Page) -> str | None:
|
async def should_use_navigation_instead_click(self, page: Page) -> str | None:
|
||||||
if self.__static_element.get("target") != "_blank":
|
if await self.get_attr("target", mode="static") != "_blank":
|
||||||
return None
|
return None
|
||||||
|
|
||||||
href: str | None = await self.get_attr("href", mode="static")
|
href: str | None = await self.get_attr("href", mode="static")
|
||||||
@@ -763,10 +770,12 @@ class SkyvernElement:
|
|||||||
|
|
||||||
async def navigate_to_a_href(self, page: Page) -> str | None:
|
async def navigate_to_a_href(self, page: Page) -> str | None:
|
||||||
if self.get_tag_name() != InteractiveElement.A:
|
if self.get_tag_name() != InteractiveElement.A:
|
||||||
|
await self._trim_target_attr()
|
||||||
return None
|
return None
|
||||||
|
|
||||||
href = await self.should_use_navigation_instead_click(page)
|
href = await self.should_use_navigation_instead_click(page)
|
||||||
if not href:
|
if not href:
|
||||||
|
await self._trim_target_attr()
|
||||||
return None
|
return None
|
||||||
|
|
||||||
LOG.info(
|
LOG.info(
|
||||||
|
|||||||
@@ -287,3 +287,7 @@ class SkyvernFrame:
|
|||||||
async def click_element_in_javascript(self, element: ElementHandle) -> None:
|
async def click_element_in_javascript(self, element: ElementHandle) -> None:
|
||||||
js_script = "(element) => element.click()"
|
js_script = "(element) => element.click()"
|
||||||
return await self.evaluate(frame=self.frame, expression=js_script, arg=element)
|
return await self.evaluate(frame=self.frame, expression=js_script, arg=element)
|
||||||
|
|
||||||
|
async def remove_target_attr(self, element: ElementHandle) -> None:
|
||||||
|
js_script = "(element) => element.removeAttribute('target')"
|
||||||
|
return await self.evaluate(frame=self.frame, expression=js_script, arg=element)
|
||||||
|
|||||||
Reference in New Issue
Block a user