support download by select action (#4009)

2025-11-17 14:46:32 +08:00
parent 84bfba3384
commit abcdf6a033
6 changed files with 154 additions and 130 deletions
--- a/skyvern/forge/agent.py
+++ b/skyvern/forge/agent.py
@@ -1234,7 +1234,13 @@ class ForgeAgent:
                        "is_retry": step.retry_index > 0,
                    }
-                results = await ActionHandler.handle_action(scraped_page, task, step, current_page, action)
+                results = await ActionHandler.handle_action(
                    scraped_page=scraped_page,
                    task=task,
                    step=step,
                    page=current_page,
                    action=action,
                )
                await app.AGENT_FUNCTION.post_action_execution(action)
                detailed_agent_step_output.actions_and_results[action_idx] = (
                    action,
--- a/skyvern/forge/prompts/skyvern/extract-action-static.j2
+++ b/skyvern/forge/prompts/skyvern/extract-action-static.j2
@@ -22,7 +22,7 @@ Reply in JSON format with the following keys:
        "id": str, // The id of the element to take action on. The id has to be one from the elements list
        "text": str, // Text for INPUT_TEXT action only
        "file_url": str, // The url of the file to upload if applicable. This field must be present for UPLOAD_FILE but can also be present for CLICK only if the click is to upload the file. It should be null otherwise.
-        "download": bool, // Can only be true for CLICK actions. If true, the browser will trigger a download by clicking the element. If false, the browser will click the element without triggering a download.
+        "download": bool, // Can only be true for CLICK or SELECT_OPTION actions. If true, the browser will trigger a download by clicking the element. If false, the browser will click the element without triggering a download.
        "option": {  // The option to select for SELECT_OPTION action only. null if not SELECT_OPTION action
            "label": str, // the label of the option if any. MAKE SURE YOU USE THIS LABEL TO SELECT THE OPTION. DO NOT PUT ANYTHING OTHER THAN A VALID OPTION LABEL HERE
            "index": int, // the index corresponding to the option index under the select element.
--- a/skyvern/forge/prompts/skyvern/extract-action.j2
+++ b/skyvern/forge/prompts/skyvern/extract-action.j2
@@ -22,7 +22,7 @@ Reply in JSON format with the following keys:
        "id": str, // The id of the element to take action on. The id has to be one from the elements list
        "text": str, // Text for INPUT_TEXT action only
        "file_url": str, // The url of the file to upload if applicable. This field must be present for UPLOAD_FILE but can also be present for CLICK only if the click is to upload the file. It should be null otherwise.
-        "download": bool, // Can only be true for CLICK actions. If true, the browser will trigger a download by clicking the element. If false, the browser will click the element without triggering a download.
+        "download": bool, // Can only be true for CLICK or SELECT_OPTION actions. If true, the browser will trigger a download by clicking the element. If false, the browser will click the element without triggering a download.
        "option": {  // The option to select for SELECT_OPTION action only. null if not SELECT_OPTION action
            "label": str, // the label of the option if any. MAKE SURE YOU USE THIS LABEL TO SELECT THE OPTION. DO NOT PUT ANYTHING OTHER THAN A VALID OPTION LABEL HERE
            "index": int, // the index corresponding to the option index under the select element.
--- a/skyvern/webeye/actions/actions.py
+++ b/skyvern/webeye/actions/actions.py
@@ -258,9 +258,10 @@ class SolveCaptchaAction(Action):
 class SelectOptionAction(WebAction):
    action_type: ActionType = ActionType.SELECT_OPTION
    option: SelectOption
    download: bool = False
    def __repr__(self) -> str:
-        return f"SelectOptionAction(element_id={self.element_id}, option={self.option}, context={self.input_or_select_context})"
+        return f"SelectOptionAction(element_id={self.element_id}, option={self.option}, context={self.input_or_select_context}, download={self.download})"
 ###
--- a/skyvern/webeye/actions/handler.py
+++ b/skyvern/webeye/actions/handler.py
@@ -394,6 +394,146 @@ class ActionHandler:
        step: Step,
        page: Page,
        action: Action,
    ) -> list[ActionResult]:
        browser_state = app.BROWSER_MANAGER.get_for_task(task.task_id, workflow_run_id=task.workflow_run_id)
        # TODO: maybe support all action types in the future(?)
        trigger_download_action = isinstance(action, (SelectOptionAction, ClickAction)) and action.download
        if not trigger_download_action:
            results = await ActionHandler._handle_action(
                scraped_page=scraped_page,
                task=task,
                step=step,
                page=page,
                action=action,
            )
            await app.DATABASE.create_action(action=action)
            return results
        context = skyvern_context.current()
        download_dir = Path(
            get_download_dir(
                run_id=context.run_id if context and context.run_id else task.workflow_run_id or task.task_id
            )
        )
        initial_page_count = 0
        # get the initial page count
        if browser_state:
            initial_page_count = len(await browser_state.list_valid_pages())
        list_files_before = list_files_in_directory(download_dir)
        if task.browser_session_id:
            files_in_browser_session = await app.STORAGE.list_downloaded_files_in_browser_session(
                organization_id=task.organization_id, browser_session_id=task.browser_session_id
            )
            list_files_before = list_files_before + files_in_browser_session
        LOG.info(
            "Number of files in download directory before action",
            num_downloaded_files_before=len(list_files_before),
            download_dir=download_dir,
        )
        download_triggered = False
        try:
            results = await ActionHandler._handle_action(
                scraped_page=scraped_page,
                task=task,
                step=step,
                page=page,
                action=action,
            )
            if not results:
                return results
            try:
                LOG.info(
                    "Checking if there is any new files after click",
                    download_dir=download_dir,
                )
                async with asyncio.timeout(task.download_timeout or BROWSER_DOWNLOAD_MAX_WAIT_TIME):
                    while True:
                        list_files_after = list_files_in_directory(download_dir)
                        if task.browser_session_id:
                            files_in_browser_session = await app.STORAGE.list_downloaded_files_in_browser_session(
                                organization_id=task.organization_id, browser_session_id=task.browser_session_id
                            )
                            list_files_after = list_files_after + files_in_browser_session
                        if len(list_files_after) > len(list_files_before):
                            LOG.info(
                                "Found new files in download directory after action",
                                num_downloaded_files_after=len(list_files_after),
                                download_dir=download_dir,
                                workflow_run_id=task.workflow_run_id,
                            )
                            download_triggered = True
                            break
                        await asyncio.sleep(1)
            except asyncio.TimeoutError:
                LOG.warning(
                    "No file to download after action",
                    workflow_run_id=task.workflow_run_id,
                )
            if not download_triggered:
                return results
            results[-1].download_triggered = True
            # check if there's any file is still downloading
            downloading_files = list_downloading_files_in_directory(download_dir)
            if task.browser_session_id:
                files_in_browser_session = await app.STORAGE.list_downloading_files_in_browser_session(
                    organization_id=task.organization_id, browser_session_id=task.browser_session_id
                )
                downloading_files = downloading_files + files_in_browser_session
            if len(downloading_files) == 0:
                return results
            LOG.info(
                "File downloading hasn't completed, wait for a while",
                downloading_files=downloading_files,
                workflow_run_id=task.workflow_run_id,
            )
            try:
                await wait_for_download_finished(
                    downloading_files=downloading_files, timeout=task.download_timeout or BROWSER_DOWNLOAD_TIMEOUT
                )
            except DownloadFileMaxWaitingTime as e:
                LOG.warning(
                    "There're several long-time downloading files, these files might be broken",
                    downloading_files=e.downloading_files,
                    workflow_run_id=task.workflow_run_id,
                )
            return results
        finally:
            if browser_state is not None and download_triggered:
                # get the page count after download
                pages_after_download = await browser_state.list_valid_pages()
                page_count_after_download = len(pages_after_download)
                LOG.info(
                    "Page count after download file action",
                    initial_page_count=initial_page_count,
                    page_count_after_download=page_count_after_download,
                )
                if page_count_after_download > initial_page_count:
                    LOG.info(
                        "Download triggered, closing the extra page",
                    )
                    if page == pages_after_download[-1]:
                        LOG.warning("The extra page is the current page, closing it")
                    # close the extra page
                    await pages_after_download[-1].close()
            await app.DATABASE.create_action(action=action)
    @staticmethod
    async def _handle_action(
        scraped_page: ScrapedPage,
        task: Task,
        step: Step,
        page: Page,
        action: Action,
    ) -> list[ActionResult]:
        LOG.info("Handling action", action=action)
        actions_result: list[ActionResult] = []
@@ -468,8 +608,6 @@ class ActionHandler:
                    LOG.warning("Action failed to execute, setting status to failed", action=action)
                action.status = ActionStatus.failed
            await app.DATABASE.create_action(action=action)
            if llm_caller and action.tool_call_id:
                tool_call_result = {
                    "type": "tool_result",
@@ -586,54 +724,8 @@ async def handle_click_action(
        return [ActionFailure(InteractWithDisabledElement(skyvern_element.get_id()))]
    if action.download:
-        # get the initial page count
+        results = await handle_click_to_download_file_action(action, page, scraped_page, task, step)
        browser_state = app.BROWSER_MANAGER.get_for_task(task.task_id, workflow_run_id=task.workflow_run_id)
        initial_page_count = 0
        if browser_state is not None:
            initial_page_count = len(browser_state.browser_context.pages if browser_state.browser_context else [])
        LOG.info(
            "Page count before download file action",
            initial_page_count=initial_page_count,
            workflow_run_id=task.workflow_run_id,
        )
        results: list[ActionResult] = []
        try:
            results = await handle_click_to_download_file_action(action, page, scraped_page, task, step)
        except Exception:
            raise
        finally:
            # get the page count after download
            page_count_after_download = 0
            if browser_state is not None:
                page_count_after_download = len(
                    browser_state.browser_context.pages if browser_state.browser_context else []
                )
            LOG.info(
                "Page count after download file action",
                initial_page_count=initial_page_count,
                page_count_after_download=page_count_after_download,
                workflow_run_id=task.workflow_run_id,
            )
            if page_count_after_download > initial_page_count and browser_state and browser_state.browser_context:
                if results and results[-1].download_triggered:
                    LOG.info(
                        "Download triggered, closing the extra page",
                        workflow_run_id=task.workflow_run_id,
                    )
                    if page == browser_state.browser_context.pages[-1]:
                        LOG.warning(
                            "The extra page is the current page, closing it",
                            workflow_run_id=task.workflow_run_id,
                        )
                    # close the extra page
                    await browser_state.browser_context.pages[-1].close()
                else:
                    LOG.info(
                        "No download triggered, not closing the extra page",
                        workflow_run_id=task.workflow_run_id,
                    )
    elif action.file_url:
        upload_file_action = UploadFileAction(
            reasoning=action.reasoning,
@@ -828,24 +920,6 @@ async def handle_click_to_download_file_action(
    skyvern_element = await dom.get_skyvern_element_by_id(action.element_id)
    locator = skyvern_element.locator
    context = skyvern_context.current()
    download_dir = Path(
        get_download_dir(run_id=context.run_id if context and context.run_id else task.workflow_run_id or task.task_id)
    )
    list_files_before = list_files_in_directory(download_dir)
    if task.browser_session_id:
        files_in_browser_session = await app.STORAGE.list_downloaded_files_in_browser_session(
            organization_id=task.organization_id, browser_session_id=task.browser_session_id
        )
        list_files_before = list_files_before + files_in_browser_session
    LOG.info(
        "Number of files in download directory before click",
        num_downloaded_files_before=len(list_files_before),
        download_dir=download_dir,
        workflow_run_id=task.workflow_run_id,
    )
    try:
        if not await skyvern_element.navigate_to_a_href(page=page):
            await locator.click(timeout=settings.BROWSER_ACTION_TIMEOUT_MS)
@@ -859,65 +933,7 @@ async def handle_click_to_download_file_action(
        )
        return [ActionFailure(e, download_triggered=False)]
-    try:
+    return [ActionSuccess()]
        LOG.info(
            "Checking if there is any new files after click",
            download_dir=download_dir,
        )
        async with asyncio.timeout(task.download_timeout or BROWSER_DOWNLOAD_MAX_WAIT_TIME):
            while True:
                list_files_after = list_files_in_directory(download_dir)
                if task.browser_session_id:
                    files_in_browser_session = await app.STORAGE.list_downloaded_files_in_browser_session(
                        organization_id=task.organization_id, browser_session_id=task.browser_session_id
                    )
                    list_files_after = list_files_after + files_in_browser_session
                if len(list_files_after) > len(list_files_before):
                    LOG.info(
                        "Found new files in download directory after click",
                        num_downloaded_files_after=len(list_files_after),
                        download_dir=download_dir,
                        workflow_run_id=task.workflow_run_id,
                    )
                    break
                await asyncio.sleep(1)
    except asyncio.TimeoutError:
        LOG.warning(
            "No file to download after click",
            workflow_run_id=task.workflow_run_id,
        )
        return [ActionSuccess(download_triggered=False)]
    # check if there's any file is still downloading
    downloading_files = list_downloading_files_in_directory(download_dir)
    if task.browser_session_id:
        files_in_browser_session = await app.STORAGE.list_downloading_files_in_browser_session(
            organization_id=task.organization_id, browser_session_id=task.browser_session_id
        )
        downloading_files = downloading_files + files_in_browser_session
    if len(downloading_files) == 0:
        return [ActionSuccess(download_triggered=True)]
    LOG.info(
        "File downloading hasn't completed, wait for a while",
        downloading_files=downloading_files,
        workflow_run_id=task.workflow_run_id,
    )
    try:
        await wait_for_download_finished(
            downloading_files=downloading_files, timeout=task.download_timeout or BROWSER_DOWNLOAD_TIMEOUT
        )
    except DownloadFileMaxWaitingTime as e:
        LOG.warning(
            "There're several long-time downloading files, these files might be broken",
            downloading_files=e.downloading_files,
            workflow_run_id=task.workflow_run_id,
        )
    return [ActionSuccess(download_triggered=True)]
 # TOTP timing constants
--- a/skyvern/webeye/actions/parse_actions.py
+++ b/skyvern/webeye/actions/parse_actions.py
@@ -154,6 +154,7 @@ def parse_action(
                index=index,
            ),
            input_or_select_context=input_or_select_context,
            download=action.get("download", False),
        )
    if action_type == ActionType.CHECKBOX: