diff --git a/skyvern/forge/agent.py b/skyvern/forge/agent.py index 9b84c31a..22f719e2 100644 --- a/skyvern/forge/agent.py +++ b/skyvern/forge/agent.py @@ -1234,7 +1234,13 @@ class ForgeAgent: "is_retry": step.retry_index > 0, } - results = await ActionHandler.handle_action(scraped_page, task, step, current_page, action) + results = await ActionHandler.handle_action( + scraped_page=scraped_page, + task=task, + step=step, + page=current_page, + action=action, + ) await app.AGENT_FUNCTION.post_action_execution(action) detailed_agent_step_output.actions_and_results[action_idx] = ( action, diff --git a/skyvern/forge/prompts/skyvern/extract-action-static.j2 b/skyvern/forge/prompts/skyvern/extract-action-static.j2 index d41296d9..73868a95 100644 --- a/skyvern/forge/prompts/skyvern/extract-action-static.j2 +++ b/skyvern/forge/prompts/skyvern/extract-action-static.j2 @@ -22,7 +22,7 @@ Reply in JSON format with the following keys: "id": str, // The id of the element to take action on. The id has to be one from the elements list "text": str, // Text for INPUT_TEXT action only "file_url": str, // The url of the file to upload if applicable. This field must be present for UPLOAD_FILE but can also be present for CLICK only if the click is to upload the file. It should be null otherwise. - "download": bool, // Can only be true for CLICK actions. If true, the browser will trigger a download by clicking the element. If false, the browser will click the element without triggering a download. + "download": bool, // Can only be true for CLICK or SELECT_OPTION actions. If true, the browser will trigger a download by clicking the element. If false, the browser will click the element without triggering a download. "option": { // The option to select for SELECT_OPTION action only. null if not SELECT_OPTION action "label": str, // the label of the option if any. MAKE SURE YOU USE THIS LABEL TO SELECT THE OPTION. DO NOT PUT ANYTHING OTHER THAN A VALID OPTION LABEL HERE "index": int, // the index corresponding to the option index under the select element. diff --git a/skyvern/forge/prompts/skyvern/extract-action.j2 b/skyvern/forge/prompts/skyvern/extract-action.j2 index 00308512..8973e2c2 100644 --- a/skyvern/forge/prompts/skyvern/extract-action.j2 +++ b/skyvern/forge/prompts/skyvern/extract-action.j2 @@ -22,7 +22,7 @@ Reply in JSON format with the following keys: "id": str, // The id of the element to take action on. The id has to be one from the elements list "text": str, // Text for INPUT_TEXT action only "file_url": str, // The url of the file to upload if applicable. This field must be present for UPLOAD_FILE but can also be present for CLICK only if the click is to upload the file. It should be null otherwise. - "download": bool, // Can only be true for CLICK actions. If true, the browser will trigger a download by clicking the element. If false, the browser will click the element without triggering a download. + "download": bool, // Can only be true for CLICK or SELECT_OPTION actions. If true, the browser will trigger a download by clicking the element. If false, the browser will click the element without triggering a download. "option": { // The option to select for SELECT_OPTION action only. null if not SELECT_OPTION action "label": str, // the label of the option if any. MAKE SURE YOU USE THIS LABEL TO SELECT THE OPTION. DO NOT PUT ANYTHING OTHER THAN A VALID OPTION LABEL HERE "index": int, // the index corresponding to the option index under the select element. diff --git a/skyvern/webeye/actions/actions.py b/skyvern/webeye/actions/actions.py index a2110e7d..573fa898 100644 --- a/skyvern/webeye/actions/actions.py +++ b/skyvern/webeye/actions/actions.py @@ -258,9 +258,10 @@ class SolveCaptchaAction(Action): class SelectOptionAction(WebAction): action_type: ActionType = ActionType.SELECT_OPTION option: SelectOption + download: bool = False def __repr__(self) -> str: - return f"SelectOptionAction(element_id={self.element_id}, option={self.option}, context={self.input_or_select_context})" + return f"SelectOptionAction(element_id={self.element_id}, option={self.option}, context={self.input_or_select_context}, download={self.download})" ### diff --git a/skyvern/webeye/actions/handler.py b/skyvern/webeye/actions/handler.py index a62b3eaf..3d3b69a3 100644 --- a/skyvern/webeye/actions/handler.py +++ b/skyvern/webeye/actions/handler.py @@ -394,6 +394,146 @@ class ActionHandler: step: Step, page: Page, action: Action, + ) -> list[ActionResult]: + browser_state = app.BROWSER_MANAGER.get_for_task(task.task_id, workflow_run_id=task.workflow_run_id) + # TODO: maybe support all action types in the future(?) + trigger_download_action = isinstance(action, (SelectOptionAction, ClickAction)) and action.download + if not trigger_download_action: + results = await ActionHandler._handle_action( + scraped_page=scraped_page, + task=task, + step=step, + page=page, + action=action, + ) + await app.DATABASE.create_action(action=action) + return results + + context = skyvern_context.current() + download_dir = Path( + get_download_dir( + run_id=context.run_id if context and context.run_id else task.workflow_run_id or task.task_id + ) + ) + initial_page_count = 0 + # get the initial page count + if browser_state: + initial_page_count = len(await browser_state.list_valid_pages()) + + list_files_before = list_files_in_directory(download_dir) + if task.browser_session_id: + files_in_browser_session = await app.STORAGE.list_downloaded_files_in_browser_session( + organization_id=task.organization_id, browser_session_id=task.browser_session_id + ) + list_files_before = list_files_before + files_in_browser_session + LOG.info( + "Number of files in download directory before action", + num_downloaded_files_before=len(list_files_before), + download_dir=download_dir, + ) + + download_triggered = False + try: + results = await ActionHandler._handle_action( + scraped_page=scraped_page, + task=task, + step=step, + page=page, + action=action, + ) + if not results: + return results + try: + LOG.info( + "Checking if there is any new files after click", + download_dir=download_dir, + ) + async with asyncio.timeout(task.download_timeout or BROWSER_DOWNLOAD_MAX_WAIT_TIME): + while True: + list_files_after = list_files_in_directory(download_dir) + if task.browser_session_id: + files_in_browser_session = await app.STORAGE.list_downloaded_files_in_browser_session( + organization_id=task.organization_id, browser_session_id=task.browser_session_id + ) + list_files_after = list_files_after + files_in_browser_session + + if len(list_files_after) > len(list_files_before): + LOG.info( + "Found new files in download directory after action", + num_downloaded_files_after=len(list_files_after), + download_dir=download_dir, + workflow_run_id=task.workflow_run_id, + ) + download_triggered = True + break + await asyncio.sleep(1) + + except asyncio.TimeoutError: + LOG.warning( + "No file to download after action", + workflow_run_id=task.workflow_run_id, + ) + + if not download_triggered: + return results + results[-1].download_triggered = True + + # check if there's any file is still downloading + downloading_files = list_downloading_files_in_directory(download_dir) + if task.browser_session_id: + files_in_browser_session = await app.STORAGE.list_downloading_files_in_browser_session( + organization_id=task.organization_id, browser_session_id=task.browser_session_id + ) + downloading_files = downloading_files + files_in_browser_session + + if len(downloading_files) == 0: + return results + + LOG.info( + "File downloading hasn't completed, wait for a while", + downloading_files=downloading_files, + workflow_run_id=task.workflow_run_id, + ) + try: + await wait_for_download_finished( + downloading_files=downloading_files, timeout=task.download_timeout or BROWSER_DOWNLOAD_TIMEOUT + ) + except DownloadFileMaxWaitingTime as e: + LOG.warning( + "There're several long-time downloading files, these files might be broken", + downloading_files=e.downloading_files, + workflow_run_id=task.workflow_run_id, + ) + return results + finally: + if browser_state is not None and download_triggered: + # get the page count after download + pages_after_download = await browser_state.list_valid_pages() + page_count_after_download = len(pages_after_download) + LOG.info( + "Page count after download file action", + initial_page_count=initial_page_count, + page_count_after_download=page_count_after_download, + ) + if page_count_after_download > initial_page_count: + LOG.info( + "Download triggered, closing the extra page", + ) + + if page == pages_after_download[-1]: + LOG.warning("The extra page is the current page, closing it") + # close the extra page + await pages_after_download[-1].close() + + await app.DATABASE.create_action(action=action) + + @staticmethod + async def _handle_action( + scraped_page: ScrapedPage, + task: Task, + step: Step, + page: Page, + action: Action, ) -> list[ActionResult]: LOG.info("Handling action", action=action) actions_result: list[ActionResult] = [] @@ -468,8 +608,6 @@ class ActionHandler: LOG.warning("Action failed to execute, setting status to failed", action=action) action.status = ActionStatus.failed - await app.DATABASE.create_action(action=action) - if llm_caller and action.tool_call_id: tool_call_result = { "type": "tool_result", @@ -586,54 +724,8 @@ async def handle_click_action( return [ActionFailure(InteractWithDisabledElement(skyvern_element.get_id()))] if action.download: - # get the initial page count - browser_state = app.BROWSER_MANAGER.get_for_task(task.task_id, workflow_run_id=task.workflow_run_id) - initial_page_count = 0 - if browser_state is not None: - initial_page_count = len(browser_state.browser_context.pages if browser_state.browser_context else []) - LOG.info( - "Page count before download file action", - initial_page_count=initial_page_count, - workflow_run_id=task.workflow_run_id, - ) - results: list[ActionResult] = [] - try: - results = await handle_click_to_download_file_action(action, page, scraped_page, task, step) - except Exception: - raise - finally: - # get the page count after download - page_count_after_download = 0 - if browser_state is not None: - page_count_after_download = len( - browser_state.browser_context.pages if browser_state.browser_context else [] - ) + results = await handle_click_to_download_file_action(action, page, scraped_page, task, step) - LOG.info( - "Page count after download file action", - initial_page_count=initial_page_count, - page_count_after_download=page_count_after_download, - workflow_run_id=task.workflow_run_id, - ) - if page_count_after_download > initial_page_count and browser_state and browser_state.browser_context: - if results and results[-1].download_triggered: - LOG.info( - "Download triggered, closing the extra page", - workflow_run_id=task.workflow_run_id, - ) - - if page == browser_state.browser_context.pages[-1]: - LOG.warning( - "The extra page is the current page, closing it", - workflow_run_id=task.workflow_run_id, - ) - # close the extra page - await browser_state.browser_context.pages[-1].close() - else: - LOG.info( - "No download triggered, not closing the extra page", - workflow_run_id=task.workflow_run_id, - ) elif action.file_url: upload_file_action = UploadFileAction( reasoning=action.reasoning, @@ -828,24 +920,6 @@ async def handle_click_to_download_file_action( skyvern_element = await dom.get_skyvern_element_by_id(action.element_id) locator = skyvern_element.locator - context = skyvern_context.current() - download_dir = Path( - get_download_dir(run_id=context.run_id if context and context.run_id else task.workflow_run_id or task.task_id) - ) - list_files_before = list_files_in_directory(download_dir) - if task.browser_session_id: - files_in_browser_session = await app.STORAGE.list_downloaded_files_in_browser_session( - organization_id=task.organization_id, browser_session_id=task.browser_session_id - ) - list_files_before = list_files_before + files_in_browser_session - - LOG.info( - "Number of files in download directory before click", - num_downloaded_files_before=len(list_files_before), - download_dir=download_dir, - workflow_run_id=task.workflow_run_id, - ) - try: if not await skyvern_element.navigate_to_a_href(page=page): await locator.click(timeout=settings.BROWSER_ACTION_TIMEOUT_MS) @@ -859,65 +933,7 @@ async def handle_click_to_download_file_action( ) return [ActionFailure(e, download_triggered=False)] - try: - LOG.info( - "Checking if there is any new files after click", - download_dir=download_dir, - ) - async with asyncio.timeout(task.download_timeout or BROWSER_DOWNLOAD_MAX_WAIT_TIME): - while True: - list_files_after = list_files_in_directory(download_dir) - if task.browser_session_id: - files_in_browser_session = await app.STORAGE.list_downloaded_files_in_browser_session( - organization_id=task.organization_id, browser_session_id=task.browser_session_id - ) - list_files_after = list_files_after + files_in_browser_session - - if len(list_files_after) > len(list_files_before): - LOG.info( - "Found new files in download directory after click", - num_downloaded_files_after=len(list_files_after), - download_dir=download_dir, - workflow_run_id=task.workflow_run_id, - ) - break - await asyncio.sleep(1) - - except asyncio.TimeoutError: - LOG.warning( - "No file to download after click", - workflow_run_id=task.workflow_run_id, - ) - return [ActionSuccess(download_triggered=False)] - - # check if there's any file is still downloading - downloading_files = list_downloading_files_in_directory(download_dir) - if task.browser_session_id: - files_in_browser_session = await app.STORAGE.list_downloading_files_in_browser_session( - organization_id=task.organization_id, browser_session_id=task.browser_session_id - ) - downloading_files = downloading_files + files_in_browser_session - - if len(downloading_files) == 0: - return [ActionSuccess(download_triggered=True)] - - LOG.info( - "File downloading hasn't completed, wait for a while", - downloading_files=downloading_files, - workflow_run_id=task.workflow_run_id, - ) - try: - await wait_for_download_finished( - downloading_files=downloading_files, timeout=task.download_timeout or BROWSER_DOWNLOAD_TIMEOUT - ) - except DownloadFileMaxWaitingTime as e: - LOG.warning( - "There're several long-time downloading files, these files might be broken", - downloading_files=e.downloading_files, - workflow_run_id=task.workflow_run_id, - ) - - return [ActionSuccess(download_triggered=True)] + return [ActionSuccess()] # TOTP timing constants diff --git a/skyvern/webeye/actions/parse_actions.py b/skyvern/webeye/actions/parse_actions.py index 70b23a7e..0d55799e 100644 --- a/skyvern/webeye/actions/parse_actions.py +++ b/skyvern/webeye/actions/parse_actions.py @@ -154,6 +154,7 @@ def parse_action( index=index, ), input_or_select_context=input_or_select_context, + download=action.get("download", False), ) if action_type == ActionType.CHECKBOX: