diff --git a/skyvern/forge/agent.py b/skyvern/forge/agent.py index adef5b61..69779b10 100644 --- a/skyvern/forge/agent.py +++ b/skyvern/forge/agent.py @@ -777,17 +777,6 @@ class ForgeAgent: action=action, action_result=results, ) - # if the action triggered javascript calls - # this action should be the last action this round and do not take more actions. - # for now, we're being optimistic and assuming that - # js call doesn't have impact on the following actions - if results[-1].javascript_triggered: - LOG.info( - "Action triggered javascript. Stop executing reamaining actions.", - action=action, - ) - # stop executing the rest actions - break elif results and not results[-1].success and not results[-1].stop_execution_on_failure: LOG.warning( "Action failed, but not stopping execution", @@ -1198,7 +1187,9 @@ class ForgeAgent: navigation_goal = task.navigation_goal starting_url = task.url page = await browser_state.get_working_page() - current_url = await page.evaluate("() => document.location.href") if page else starting_url + current_url = ( + await SkyvernFrame.evaluate(frame=page, expression="() => document.location.href") if page else starting_url + ) final_navigation_payload = self._build_navigation_payload( task, expire_verification_code=expire_verification_code ) diff --git a/skyvern/webeye/actions/handler.py b/skyvern/webeye/actions/handler.py index a7b675d0..57d1fc2c 100644 --- a/skyvern/webeye/actions/handler.py +++ b/skyvern/webeye/actions/handler.py @@ -613,7 +613,7 @@ async def handle_upload_file_action( locator = skyvern_element.locator file_path = await download_file(file_url) - is_file_input = await is_file_input_element(locator) + is_file_input = await skyvern_element.is_file_input() if is_file_input: LOG.info("Taking UploadFileAction. Found file input tag", action=action) @@ -1103,24 +1103,14 @@ async def chain_click( Clicks on an element identified by the css and its parent if failed. :param css: css of the element to click """ - javascript_triggered = await is_javascript_triggered(scraped_page, page, locator) try: await locator.click(timeout=timeout) LOG.info("Chain click: main element click succeeded", action=action, locator=locator) - return [ - ActionSuccess( - javascript_triggered=javascript_triggered, - ) - ] + return [ActionSuccess()] except Exception: - action_results: list[ActionResult] = [ - ActionFailure( - FailToClick(action.element_id), - javascript_triggered=javascript_triggered, - ) - ] + action_results: list[ActionResult] = [ActionFailure(FailToClick(action.element_id))] if skyvern_element.get_tag_name() == "label": LOG.info( @@ -1137,12 +1127,7 @@ async def chain_click( action_results.append(ActionSuccess()) return action_results except Exception: - action_results.append( - ActionFailure( - FailToClick(action.element_id, anchor="for"), - javascript_triggered=javascript_triggered, - ) - ) + action_results.append(ActionFailure(FailToClick(action.element_id, anchor="for"))) if skyvern_element.get_tag_name() == InteractiveElement.INPUT: LOG.info( @@ -1158,10 +1143,6 @@ async def chain_click( try: parent_locator = locator.locator("..") - - parent_javascript_triggered = await is_javascript_triggered(scraped_page, page, parent_locator) - javascript_triggered = javascript_triggered or parent_javascript_triggered - await parent_locator.click(timeout=timeout) LOG.info( @@ -1169,12 +1150,7 @@ async def chain_click( action=action, parent_locator=parent_locator, ) - action_results.append( - ActionSuccess( - javascript_triggered=javascript_triggered, - interacted_with_parent=True, - ) - ) + action_results.append(ActionSuccess(interacted_with_parent=True)) except Exception: LOG.warning( "Failed to click parent element", @@ -1185,7 +1161,6 @@ async def chain_click( action_results.append( ActionFailure( FailToClick(action.element_id, anchor="parent"), - javascript_triggered=javascript_triggered, interacted_with_parent=True, ) ) @@ -2223,48 +2198,9 @@ def get_checkbox_id_in_label_children(scraped_page: ScrapedPage, element_id: str return None -@deprecated("This function is deprecated. It was used for select2 dropdown, but we don't use it anymore.") -async def is_javascript_triggered(scraped_page: ScrapedPage, page: Page, locator: Locator) -> bool: - element = locator.first - - tag_name = await element.evaluate("e => e.tagName") - if tag_name.lower() == "a": - href = await element.evaluate("e => e.href") - if href.lower().startswith("javascript:"): - LOG.info("Found javascript call in anchor tag, marking step as completed. Dropping remaining actions") - return True - return False - - -async def get_tag_name_lowercase(locator: Locator) -> str | None: - element = locator.first - if element: - tag_name = await element.evaluate("e => e.tagName") - return tag_name.lower() - return None - - -async def is_file_input_element(locator: Locator) -> bool: - element = locator.first - if element: - tag_name = await element.evaluate("el => el.tagName") - type_name = await element.evaluate("el => el.type") - return tag_name.lower() == "input" and type_name == "file" - return False - - -async def is_input_element(locator: Locator) -> bool: - element = locator.first - if element: - tag_name = await element.evaluate("el => el.tagName") - return tag_name.lower() == "input" - return False - - async def click_sibling_of_input( locator: Locator, timeout: int, - javascript_triggered: bool = False, ) -> ActionResult: try: input_element = locator.first @@ -2278,18 +2214,16 @@ async def click_sibling_of_input( "Successfully clicked sibling label of input element", sibling_label_css=sibling_label_css, ) - return ActionSuccess(javascript_triggered=javascript_triggered, interacted_with_sibling=True) + return ActionSuccess(interacted_with_sibling=True) # Should never get here return ActionFailure( exception=Exception("Failed while trying to click sibling of input element"), - javascript_triggered=javascript_triggered, interacted_with_sibling=True, ) except Exception: LOG.warning("Failed to click sibling label of input element", exc_info=True) return ActionFailure( exception=Exception("Failed while trying to click sibling of input element"), - javascript_triggered=javascript_triggered, ) diff --git a/skyvern/webeye/actions/responses.py b/skyvern/webeye/actions/responses.py index 77367444..6895a4ee 100644 --- a/skyvern/webeye/actions/responses.py +++ b/skyvern/webeye/actions/responses.py @@ -13,7 +13,6 @@ class ActionResult(BaseModel): data: dict[str, Any] | list | str | None = None step_retry_number: int | None = None step_order: int | None = None - javascript_triggered: bool = False download_triggered: bool | None = None # None is used for old data so that we can differentiate between old and new data which only has boolean interacted_with_sibling: bool | None = None @@ -30,8 +29,6 @@ class ActionResult(BaseModel): results.append(f"step_order={self.step_order}") if self.step_retry_number: results.append(f"step_retry_number={self.step_retry_number}") - if self.javascript_triggered: - results.append(f"javascript_triggered={self.javascript_triggered}") if self.download_triggered is not None: results.append(f"download_triggered={self.download_triggered}") if self.interacted_with_sibling is not None: @@ -49,7 +46,6 @@ class ActionSuccess(ActionResult): def __init__( self, data: dict[str, Any] | list | str | None = None, - javascript_triggered: bool = False, download_triggered: bool | None = None, interacted_with_sibling: bool = False, interacted_with_parent: bool = False, @@ -57,7 +53,6 @@ class ActionSuccess(ActionResult): super().__init__( success=True, data=data, - javascript_triggered=javascript_triggered, download_triggered=download_triggered, interacted_with_sibling=interacted_with_sibling, interacted_with_parent=interacted_with_parent, @@ -69,7 +64,6 @@ class ActionFailure(ActionResult): self, exception: Exception, stop_execution_on_failure: bool = True, - javascript_triggered: bool = False, download_triggered: bool | None = None, interacted_with_sibling: bool = False, interacted_with_parent: bool = False, @@ -79,7 +73,6 @@ class ActionFailure(ActionResult): exception_type=type(exception).__name__, stop_execution_on_failure=stop_execution_on_failure, exception_message=remove_whitespace(str(exception)), - javascript_triggered=javascript_triggered, download_triggered=download_triggered, interacted_with_sibling=interacted_with_sibling, interacted_with_parent=interacted_with_parent, @@ -91,14 +84,12 @@ class ActionFailure(ActionResult): class ActionAbort(ActionResult): def __init__( self, - javascript_triggered: bool = False, download_triggered: bool | None = None, interacted_with_sibling: bool = False, interacted_with_parent: bool = False, ): super().__init__( success=True, - javascript_triggered=javascript_triggered, download_triggered=download_triggered, interacted_with_sibling=interacted_with_sibling, interacted_with_parent=interacted_with_parent, diff --git a/skyvern/webeye/browser_factory.py b/skyvern/webeye/browser_factory.py index 501ffcff..f456f545 100644 --- a/skyvern/webeye/browser_factory.py +++ b/skyvern/webeye/browser_factory.py @@ -419,7 +419,7 @@ class BrowserState: async def stop_page_loading(self) -> None: page = await self.__assert_page() try: - await page.evaluate("window.stop()") + await SkyvernFrame.evaluate(frame=page, expression="window.stop()") except Exception as e: LOG.exception(f"Error while stop loading the page: {repr(e)}") raise FailedToStopLoadingPage(url=page.url, error_message=repr(e)) diff --git a/skyvern/webeye/scraper/scraper.py b/skyvern/webeye/scraper/scraper.py index 4b4411ad..2200a2c2 100644 --- a/skyvern/webeye/scraper/scraper.py +++ b/skyvern/webeye/scraper/scraper.py @@ -284,7 +284,7 @@ async def get_frame_text(iframe: Frame) -> str: js_script = "() => document.body.innerText" try: - text = await iframe.evaluate(js_script) + text = await SkyvernFrame.evaluate(frame=iframe, expression=js_script) except Exception: LOG.warning( "failed to get text from iframe", @@ -417,8 +417,10 @@ async def get_interactable_element_tree_in_frame( frame_js_script = f"() => buildTreeFromBody('{unique_id}')" - await frame.evaluate(JS_FUNCTION_DEFS) - frame_elements, frame_element_tree = await frame.evaluate(frame_js_script) + await SkyvernFrame.evaluate(frame=frame, expression=JS_FUNCTION_DEFS) + frame_elements, frame_element_tree = await SkyvernFrame.evaluate( + frame=frame, expression=frame_js_script, timeout_ms=60 * 1000 + ) if len(frame.child_frames) > 0: frame_elements, frame_element_tree = await get_interactable_element_tree_in_frame( @@ -450,9 +452,11 @@ async def get_interactable_element_tree( :param page: Page instance to get the element tree from. :return: Tuple containing the element tree and a map of element IDs to elements. """ - await page.evaluate(JS_FUNCTION_DEFS) + await SkyvernFrame.evaluate(frame=page, expression=JS_FUNCTION_DEFS) main_frame_js_script = "() => buildTreeFromBody()" - elements, element_tree = await page.evaluate(main_frame_js_script) + elements, element_tree = await SkyvernFrame.evaluate( + frame=page, expression=main_frame_js_script, timeout_ms=60 * 1000 + ) if len(page.main_frame.child_frames) > 0: elements, element_tree = await get_interactable_element_tree_in_frame( @@ -481,7 +485,9 @@ class IncrementalScrapePage: frame = self.skyvern_frame.get_frame() js_script = "() => getIncrementElements()" - incremental_elements, incremental_tree = await frame.evaluate(js_script) + incremental_elements, incremental_tree = await SkyvernFrame.evaluate( + frame=frame, expression=js_script, timeout_ms=60 * 1000 + ) # we listen the incremental elements seperated by frames, so all elements will be in the same SkyvernFrame self.id_to_css_dict, self.id_to_element_dict, _, _, _ = build_element_dict(incremental_elements) @@ -497,15 +503,15 @@ class IncrementalScrapePage: async def start_listen_dom_increment(self) -> None: js_script = "() => startGlobalIncrementalObserver()" - await self.skyvern_frame.get_frame().evaluate(js_script) + await SkyvernFrame.evaluate(frame=self.skyvern_frame.get_frame(), expression=js_script) async def stop_listen_dom_increment(self) -> None: js_script = "() => stopGlobalIncrementalObserver()" - await self.skyvern_frame.get_frame().evaluate(js_script) + await SkyvernFrame.evaluate(frame=self.skyvern_frame.get_frame(), expression=js_script) async def get_incremental_elements_num(self) -> int: js_script = "() => window.globalOneTimeIncrementElements.length" - return await self.skyvern_frame.get_frame().evaluate(js_script) + return await SkyvernFrame.evaluate(frame=self.skyvern_frame.get_frame(), expression=js_script) async def __validate_element_by_value(self, value: str, element: dict) -> tuple[Locator | None, bool]: """ diff --git a/skyvern/webeye/utils/dom.py b/skyvern/webeye/utils/dom.py index 3ef74c77..6673b48b 100644 --- a/skyvern/webeye/utils/dom.py +++ b/skyvern/webeye/utils/dom.py @@ -213,6 +213,9 @@ class SkyvernElement: return False + async def is_file_input(self) -> bool: + return self.get_tag_name() == InteractiveElement.INPUT and await self.get_attr("type") == "file" + def is_interactable(self) -> bool: return self.__static_element.get("interactable", False) @@ -507,7 +510,9 @@ class SkyvernElement: await page.mouse.click(click_x, click_y) async def blur(self) -> None: - await self.get_frame().evaluate("(element) => element.blur()", await self.get_element_handler()) + await SkyvernFrame.evaluate( + frame=self.get_frame(), expression="(element) => element.blur()", arg=await self.get_element_handler() + ) async def scroll_into_view(self, timeout: float = SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS) -> None: element_handler = await self.get_element_handler(timeout=timeout) diff --git a/skyvern/webeye/utils/page.py b/skyvern/webeye/utils/page.py index 3d28e9e5..b91e0ae7 100644 --- a/skyvern/webeye/utils/page.py +++ b/skyvern/webeye/utils/page.py @@ -2,7 +2,7 @@ from __future__ import annotations import asyncio import time -from typing import Dict, List +from typing import Any, Dict, List import structlog from playwright._impl._errors import TimeoutError @@ -32,6 +32,20 @@ JS_FUNCTION_DEFS = load_js_script() class SkyvernFrame: + @staticmethod + async def evaluate( + frame: Page | Frame, + expression: str, + arg: Any | None = None, + timeout_ms: float = SettingsManager.get_settings().BROWSER_ACTION_TIMEOUT_MS, + ) -> Any: + try: + async with asyncio.timeout(timeout_ms / 1000): + return await frame.evaluate(expression=expression, arg=arg) + except asyncio.TimeoutError: + LOG.exception("Timeout to evaluate expression", expression=expression) + raise TimeoutError("timeout to evaluate expression") + @staticmethod async def take_screenshot( page: Page, @@ -123,7 +137,7 @@ class SkyvernFrame: @classmethod async def create_instance(cls, frame: Page | Frame) -> SkyvernFrame: instance = cls(frame=frame) - await instance.frame.evaluate(JS_FUNCTION_DEFS) + await cls.evaluate(frame=instance.frame, expression=JS_FUNCTION_DEFS) return instance def __init__(self, frame: Page | Frame) -> None: @@ -138,35 +152,35 @@ class SkyvernFrame: async def get_scroll_x_y(self) -> tuple[int, int]: js_script = "() => getScrollXY()" - return await self.frame.evaluate(js_script) + return await self.evaluate(frame=self.frame, expression=js_script) async def scroll_to_x_y(self, x: int, y: int) -> None: js_script = "([x, y]) => scrollToXY(x, y)" - return await self.frame.evaluate(js_script, [x, y]) + return await self.evaluate(frame=self.frame, expression=js_script, arg=[x, y]) async def scroll_to_element_bottom(self, element: ElementHandle, page_by_page: bool = False) -> None: js_script = "([element, page_by_page]) => scrollToElementBottom(element, page_by_page)" - return await self.frame.evaluate(js_script, [element, page_by_page]) + return await self.evaluate(frame=self.frame, expression=js_script, arg=[element, page_by_page]) async def scroll_to_element_top(self, element: ElementHandle) -> None: js_script = "(element) => scrollToElementTop(element)" - return await self.frame.evaluate(js_script, element) + return await self.evaluate(frame=self.frame, expression=js_script, arg=element) async def parse_element_from_html(self, frame: str, element: ElementHandle, interactable: bool) -> Dict: js_script = "([frame, element, interactable]) => buildElementObject(frame, element, interactable)" - return await self.frame.evaluate(js_script, [frame, element, interactable]) + return await self.evaluate(frame=self.frame, expression=js_script, arg=[frame, element, interactable]) async def get_element_scrollable(self, element: ElementHandle) -> bool: js_script = "(element) => isScrollable(element)" - return await self.frame.evaluate(js_script, element) + return await self.evaluate(frame=self.frame, expression=js_script, arg=element) async def get_element_visible(self, element: ElementHandle) -> bool: js_script = "(element) => isElementVisible(element) && !isHidden(element)" - return await self.frame.evaluate(js_script, element) + return await self.evaluate(frame=self.frame, expression=js_script, arg=element) async def get_disabled_from_style(self, element: ElementHandle) -> bool: js_script = "(element) => checkDisabledFromStyle(element)" - return await self.frame.evaluate(js_script, element) + return await self.evaluate(frame=self.frame, expression=js_script, arg=element) async def scroll_to_top(self, draw_boxes: bool) -> float: """ @@ -176,7 +190,7 @@ class SkyvernFrame: :return: Screenshot of the page. """ js_script = f"() => scrollToTop({str(draw_boxes).lower()})" - scroll_y_px = await self.frame.evaluate(js_script) + scroll_y_px = await self.evaluate(frame=self.frame, expression=js_script) return scroll_y_px async def scroll_to_next_page(self, draw_boxes: bool) -> float: @@ -187,7 +201,7 @@ class SkyvernFrame: :return: Screenshot of the page. """ js_script = f"() => scrollToNextPage({str(draw_boxes).lower()})" - scroll_y_px = await self.frame.evaluate(js_script) + scroll_y_px = await self.evaluate(frame=self.frame, expression=js_script) return scroll_y_px async def remove_bounding_boxes(self) -> None: @@ -196,12 +210,12 @@ class SkyvernFrame: :param page: Page instance to remove the bounding boxes from. """ js_script = "() => removeBoundingBoxes()" - await self.frame.evaluate(js_script) + await self.evaluate(frame=self.frame, expression=js_script) async def build_elements_and_draw_bounding_boxes(self) -> None: js_script = "() => buildElementsAndDrawBoundingBoxes()" - await self.frame.evaluate(js_script) + await self.evaluate(frame=self.frame, expression=js_script) async def is_window_scrollable(self) -> bool: js_script = "() => isWindowScrollable()" - return await self.frame.evaluate(js_script) + return await self.evaluate(frame=self.frame, expression=js_script)