support download by select action (#4009)

This commit is contained in:
LawyZheng
2025-11-17 14:46:32 +08:00
committed by GitHub
parent 84bfba3384
commit abcdf6a033
6 changed files with 154 additions and 130 deletions

View File

@@ -1234,7 +1234,13 @@ class ForgeAgent:
"is_retry": step.retry_index > 0, "is_retry": step.retry_index > 0,
} }
results = await ActionHandler.handle_action(scraped_page, task, step, current_page, action) results = await ActionHandler.handle_action(
scraped_page=scraped_page,
task=task,
step=step,
page=current_page,
action=action,
)
await app.AGENT_FUNCTION.post_action_execution(action) await app.AGENT_FUNCTION.post_action_execution(action)
detailed_agent_step_output.actions_and_results[action_idx] = ( detailed_agent_step_output.actions_and_results[action_idx] = (
action, action,

View File

@@ -22,7 +22,7 @@ Reply in JSON format with the following keys:
"id": str, // The id of the element to take action on. The id has to be one from the elements list "id": str, // The id of the element to take action on. The id has to be one from the elements list
"text": str, // Text for INPUT_TEXT action only "text": str, // Text for INPUT_TEXT action only
"file_url": str, // The url of the file to upload if applicable. This field must be present for UPLOAD_FILE but can also be present for CLICK only if the click is to upload the file. It should be null otherwise. "file_url": str, // The url of the file to upload if applicable. This field must be present for UPLOAD_FILE but can also be present for CLICK only if the click is to upload the file. It should be null otherwise.
"download": bool, // Can only be true for CLICK actions. If true, the browser will trigger a download by clicking the element. If false, the browser will click the element without triggering a download. "download": bool, // Can only be true for CLICK or SELECT_OPTION actions. If true, the browser will trigger a download by clicking the element. If false, the browser will click the element without triggering a download.
"option": { // The option to select for SELECT_OPTION action only. null if not SELECT_OPTION action "option": { // The option to select for SELECT_OPTION action only. null if not SELECT_OPTION action
"label": str, // the label of the option if any. MAKE SURE YOU USE THIS LABEL TO SELECT THE OPTION. DO NOT PUT ANYTHING OTHER THAN A VALID OPTION LABEL HERE "label": str, // the label of the option if any. MAKE SURE YOU USE THIS LABEL TO SELECT THE OPTION. DO NOT PUT ANYTHING OTHER THAN A VALID OPTION LABEL HERE
"index": int, // the index corresponding to the option index under the select element. "index": int, // the index corresponding to the option index under the select element.

View File

@@ -22,7 +22,7 @@ Reply in JSON format with the following keys:
"id": str, // The id of the element to take action on. The id has to be one from the elements list "id": str, // The id of the element to take action on. The id has to be one from the elements list
"text": str, // Text for INPUT_TEXT action only "text": str, // Text for INPUT_TEXT action only
"file_url": str, // The url of the file to upload if applicable. This field must be present for UPLOAD_FILE but can also be present for CLICK only if the click is to upload the file. It should be null otherwise. "file_url": str, // The url of the file to upload if applicable. This field must be present for UPLOAD_FILE but can also be present for CLICK only if the click is to upload the file. It should be null otherwise.
"download": bool, // Can only be true for CLICK actions. If true, the browser will trigger a download by clicking the element. If false, the browser will click the element without triggering a download. "download": bool, // Can only be true for CLICK or SELECT_OPTION actions. If true, the browser will trigger a download by clicking the element. If false, the browser will click the element without triggering a download.
"option": { // The option to select for SELECT_OPTION action only. null if not SELECT_OPTION action "option": { // The option to select for SELECT_OPTION action only. null if not SELECT_OPTION action
"label": str, // the label of the option if any. MAKE SURE YOU USE THIS LABEL TO SELECT THE OPTION. DO NOT PUT ANYTHING OTHER THAN A VALID OPTION LABEL HERE "label": str, // the label of the option if any. MAKE SURE YOU USE THIS LABEL TO SELECT THE OPTION. DO NOT PUT ANYTHING OTHER THAN A VALID OPTION LABEL HERE
"index": int, // the index corresponding to the option index under the select element. "index": int, // the index corresponding to the option index under the select element.

View File

@@ -258,9 +258,10 @@ class SolveCaptchaAction(Action):
class SelectOptionAction(WebAction): class SelectOptionAction(WebAction):
action_type: ActionType = ActionType.SELECT_OPTION action_type: ActionType = ActionType.SELECT_OPTION
option: SelectOption option: SelectOption
download: bool = False
def __repr__(self) -> str: def __repr__(self) -> str:
return f"SelectOptionAction(element_id={self.element_id}, option={self.option}, context={self.input_or_select_context})" return f"SelectOptionAction(element_id={self.element_id}, option={self.option}, context={self.input_or_select_context}, download={self.download})"
### ###

View File

@@ -394,6 +394,146 @@ class ActionHandler:
step: Step, step: Step,
page: Page, page: Page,
action: Action, action: Action,
) -> list[ActionResult]:
browser_state = app.BROWSER_MANAGER.get_for_task(task.task_id, workflow_run_id=task.workflow_run_id)
# TODO: maybe support all action types in the future(?)
trigger_download_action = isinstance(action, (SelectOptionAction, ClickAction)) and action.download
if not trigger_download_action:
results = await ActionHandler._handle_action(
scraped_page=scraped_page,
task=task,
step=step,
page=page,
action=action,
)
await app.DATABASE.create_action(action=action)
return results
context = skyvern_context.current()
download_dir = Path(
get_download_dir(
run_id=context.run_id if context and context.run_id else task.workflow_run_id or task.task_id
)
)
initial_page_count = 0
# get the initial page count
if browser_state:
initial_page_count = len(await browser_state.list_valid_pages())
list_files_before = list_files_in_directory(download_dir)
if task.browser_session_id:
files_in_browser_session = await app.STORAGE.list_downloaded_files_in_browser_session(
organization_id=task.organization_id, browser_session_id=task.browser_session_id
)
list_files_before = list_files_before + files_in_browser_session
LOG.info(
"Number of files in download directory before action",
num_downloaded_files_before=len(list_files_before),
download_dir=download_dir,
)
download_triggered = False
try:
results = await ActionHandler._handle_action(
scraped_page=scraped_page,
task=task,
step=step,
page=page,
action=action,
)
if not results:
return results
try:
LOG.info(
"Checking if there is any new files after click",
download_dir=download_dir,
)
async with asyncio.timeout(task.download_timeout or BROWSER_DOWNLOAD_MAX_WAIT_TIME):
while True:
list_files_after = list_files_in_directory(download_dir)
if task.browser_session_id:
files_in_browser_session = await app.STORAGE.list_downloaded_files_in_browser_session(
organization_id=task.organization_id, browser_session_id=task.browser_session_id
)
list_files_after = list_files_after + files_in_browser_session
if len(list_files_after) > len(list_files_before):
LOG.info(
"Found new files in download directory after action",
num_downloaded_files_after=len(list_files_after),
download_dir=download_dir,
workflow_run_id=task.workflow_run_id,
)
download_triggered = True
break
await asyncio.sleep(1)
except asyncio.TimeoutError:
LOG.warning(
"No file to download after action",
workflow_run_id=task.workflow_run_id,
)
if not download_triggered:
return results
results[-1].download_triggered = True
# check if there's any file is still downloading
downloading_files = list_downloading_files_in_directory(download_dir)
if task.browser_session_id:
files_in_browser_session = await app.STORAGE.list_downloading_files_in_browser_session(
organization_id=task.organization_id, browser_session_id=task.browser_session_id
)
downloading_files = downloading_files + files_in_browser_session
if len(downloading_files) == 0:
return results
LOG.info(
"File downloading hasn't completed, wait for a while",
downloading_files=downloading_files,
workflow_run_id=task.workflow_run_id,
)
try:
await wait_for_download_finished(
downloading_files=downloading_files, timeout=task.download_timeout or BROWSER_DOWNLOAD_TIMEOUT
)
except DownloadFileMaxWaitingTime as e:
LOG.warning(
"There're several long-time downloading files, these files might be broken",
downloading_files=e.downloading_files,
workflow_run_id=task.workflow_run_id,
)
return results
finally:
if browser_state is not None and download_triggered:
# get the page count after download
pages_after_download = await browser_state.list_valid_pages()
page_count_after_download = len(pages_after_download)
LOG.info(
"Page count after download file action",
initial_page_count=initial_page_count,
page_count_after_download=page_count_after_download,
)
if page_count_after_download > initial_page_count:
LOG.info(
"Download triggered, closing the extra page",
)
if page == pages_after_download[-1]:
LOG.warning("The extra page is the current page, closing it")
# close the extra page
await pages_after_download[-1].close()
await app.DATABASE.create_action(action=action)
@staticmethod
async def _handle_action(
scraped_page: ScrapedPage,
task: Task,
step: Step,
page: Page,
action: Action,
) -> list[ActionResult]: ) -> list[ActionResult]:
LOG.info("Handling action", action=action) LOG.info("Handling action", action=action)
actions_result: list[ActionResult] = [] actions_result: list[ActionResult] = []
@@ -468,8 +608,6 @@ class ActionHandler:
LOG.warning("Action failed to execute, setting status to failed", action=action) LOG.warning("Action failed to execute, setting status to failed", action=action)
action.status = ActionStatus.failed action.status = ActionStatus.failed
await app.DATABASE.create_action(action=action)
if llm_caller and action.tool_call_id: if llm_caller and action.tool_call_id:
tool_call_result = { tool_call_result = {
"type": "tool_result", "type": "tool_result",
@@ -586,54 +724,8 @@ async def handle_click_action(
return [ActionFailure(InteractWithDisabledElement(skyvern_element.get_id()))] return [ActionFailure(InteractWithDisabledElement(skyvern_element.get_id()))]
if action.download: if action.download:
# get the initial page count results = await handle_click_to_download_file_action(action, page, scraped_page, task, step)
browser_state = app.BROWSER_MANAGER.get_for_task(task.task_id, workflow_run_id=task.workflow_run_id)
initial_page_count = 0
if browser_state is not None:
initial_page_count = len(browser_state.browser_context.pages if browser_state.browser_context else [])
LOG.info(
"Page count before download file action",
initial_page_count=initial_page_count,
workflow_run_id=task.workflow_run_id,
)
results: list[ActionResult] = []
try:
results = await handle_click_to_download_file_action(action, page, scraped_page, task, step)
except Exception:
raise
finally:
# get the page count after download
page_count_after_download = 0
if browser_state is not None:
page_count_after_download = len(
browser_state.browser_context.pages if browser_state.browser_context else []
)
LOG.info(
"Page count after download file action",
initial_page_count=initial_page_count,
page_count_after_download=page_count_after_download,
workflow_run_id=task.workflow_run_id,
)
if page_count_after_download > initial_page_count and browser_state and browser_state.browser_context:
if results and results[-1].download_triggered:
LOG.info(
"Download triggered, closing the extra page",
workflow_run_id=task.workflow_run_id,
)
if page == browser_state.browser_context.pages[-1]:
LOG.warning(
"The extra page is the current page, closing it",
workflow_run_id=task.workflow_run_id,
)
# close the extra page
await browser_state.browser_context.pages[-1].close()
else:
LOG.info(
"No download triggered, not closing the extra page",
workflow_run_id=task.workflow_run_id,
)
elif action.file_url: elif action.file_url:
upload_file_action = UploadFileAction( upload_file_action = UploadFileAction(
reasoning=action.reasoning, reasoning=action.reasoning,
@@ -828,24 +920,6 @@ async def handle_click_to_download_file_action(
skyvern_element = await dom.get_skyvern_element_by_id(action.element_id) skyvern_element = await dom.get_skyvern_element_by_id(action.element_id)
locator = skyvern_element.locator locator = skyvern_element.locator
context = skyvern_context.current()
download_dir = Path(
get_download_dir(run_id=context.run_id if context and context.run_id else task.workflow_run_id or task.task_id)
)
list_files_before = list_files_in_directory(download_dir)
if task.browser_session_id:
files_in_browser_session = await app.STORAGE.list_downloaded_files_in_browser_session(
organization_id=task.organization_id, browser_session_id=task.browser_session_id
)
list_files_before = list_files_before + files_in_browser_session
LOG.info(
"Number of files in download directory before click",
num_downloaded_files_before=len(list_files_before),
download_dir=download_dir,
workflow_run_id=task.workflow_run_id,
)
try: try:
if not await skyvern_element.navigate_to_a_href(page=page): if not await skyvern_element.navigate_to_a_href(page=page):
await locator.click(timeout=settings.BROWSER_ACTION_TIMEOUT_MS) await locator.click(timeout=settings.BROWSER_ACTION_TIMEOUT_MS)
@@ -859,65 +933,7 @@ async def handle_click_to_download_file_action(
) )
return [ActionFailure(e, download_triggered=False)] return [ActionFailure(e, download_triggered=False)]
try: return [ActionSuccess()]
LOG.info(
"Checking if there is any new files after click",
download_dir=download_dir,
)
async with asyncio.timeout(task.download_timeout or BROWSER_DOWNLOAD_MAX_WAIT_TIME):
while True:
list_files_after = list_files_in_directory(download_dir)
if task.browser_session_id:
files_in_browser_session = await app.STORAGE.list_downloaded_files_in_browser_session(
organization_id=task.organization_id, browser_session_id=task.browser_session_id
)
list_files_after = list_files_after + files_in_browser_session
if len(list_files_after) > len(list_files_before):
LOG.info(
"Found new files in download directory after click",
num_downloaded_files_after=len(list_files_after),
download_dir=download_dir,
workflow_run_id=task.workflow_run_id,
)
break
await asyncio.sleep(1)
except asyncio.TimeoutError:
LOG.warning(
"No file to download after click",
workflow_run_id=task.workflow_run_id,
)
return [ActionSuccess(download_triggered=False)]
# check if there's any file is still downloading
downloading_files = list_downloading_files_in_directory(download_dir)
if task.browser_session_id:
files_in_browser_session = await app.STORAGE.list_downloading_files_in_browser_session(
organization_id=task.organization_id, browser_session_id=task.browser_session_id
)
downloading_files = downloading_files + files_in_browser_session
if len(downloading_files) == 0:
return [ActionSuccess(download_triggered=True)]
LOG.info(
"File downloading hasn't completed, wait for a while",
downloading_files=downloading_files,
workflow_run_id=task.workflow_run_id,
)
try:
await wait_for_download_finished(
downloading_files=downloading_files, timeout=task.download_timeout or BROWSER_DOWNLOAD_TIMEOUT
)
except DownloadFileMaxWaitingTime as e:
LOG.warning(
"There're several long-time downloading files, these files might be broken",
downloading_files=e.downloading_files,
workflow_run_id=task.workflow_run_id,
)
return [ActionSuccess(download_triggered=True)]
# TOTP timing constants # TOTP timing constants

View File

@@ -154,6 +154,7 @@ def parse_action(
index=index, index=index,
), ),
input_or_select_context=input_or_select_context, input_or_select_context=input_or_select_context,
download=action.get("download", False),
) )
if action_type == ActionType.CHECKBOX: if action_type == ActionType.CHECKBOX: