From a758b038619f2b8fdf46a1406054a50b4305138b Mon Sep 17 00:00:00 2001 From: Jonathan Dobson Date: Mon, 6 Oct 2025 11:09:20 -0400 Subject: [PATCH] make browser download timeout configurable for blocks and tasks (#3619) --- ...0e4_add_download_timeout_to_tasks_table.py | 31 +++++++++++++++++++ skyvern/forge/agent.py | 8 ++++- skyvern/forge/sdk/db/client.py | 2 ++ skyvern/forge/sdk/db/models.py | 1 + skyvern/forge/sdk/db/utils.py | 1 + skyvern/forge/sdk/schemas/tasks.py | 5 +++ skyvern/forge/sdk/workflow/models/block.py | 2 ++ skyvern/forge/sdk/workflow/service.py | 1 + skyvern/schemas/workflows.py | 1 + skyvern/webeye/actions/handler.py | 7 +++-- skyvern/webeye/browser_factory.py | 6 ++-- 11 files changed, 60 insertions(+), 5 deletions(-) create mode 100644 alembic/versions/2025_10_06_1504-1ab477ef80e4_add_download_timeout_to_tasks_table.py diff --git a/alembic/versions/2025_10_06_1504-1ab477ef80e4_add_download_timeout_to_tasks_table.py b/alembic/versions/2025_10_06_1504-1ab477ef80e4_add_download_timeout_to_tasks_table.py new file mode 100644 index 00000000..af67b6e3 --- /dev/null +++ b/alembic/versions/2025_10_06_1504-1ab477ef80e4_add_download_timeout_to_tasks_table.py @@ -0,0 +1,31 @@ +"""add download_timeout to tasks table + +Revision ID: 1ab477ef80e4 +Revises: cce87185dbb5 +Create Date: 2025-10-06 15:04:12.103789+00:00 + +""" + +from typing import Sequence, Union + +import sqlalchemy as sa + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "1ab477ef80e4" +down_revision: Union[str, None] = "cce87185dbb5" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.add_column("tasks", sa.Column("download_timeout", sa.Numeric(), nullable=True)) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column("tasks", "download_timeout") + # ### end Alembic commands ### diff --git a/skyvern/forge/agent.py b/skyvern/forge/agent.py index 0a14268d..5221084e 100644 --- a/skyvern/forge/agent.py +++ b/skyvern/forge/agent.py @@ -18,6 +18,7 @@ from playwright.async_api import Page from skyvern import analytics from skyvern.config import settings from skyvern.constants import ( + BROWSER_DOWNLOAD_TIMEOUT, BROWSER_DOWNLOADING_SUFFIX, DEFAULT_MAX_SCREENSHOT_SCROLLS, GET_DOWNLOADED_FILES_TIMEOUT, @@ -199,6 +200,7 @@ class ForgeAgent: extra_http_headers=workflow_run.extra_http_headers, browser_address=workflow_run.browser_address, browser_session_id=workflow_run.browser_session_id, + download_timeout=task_block.download_timeout, ) LOG.info( "Created a new task for workflow run", @@ -217,6 +219,7 @@ class ForgeAgent: organization_id=task.organization_id, status=TaskStatus.running, ) + step = await app.DATABASE.create_step( task.task_id, order=0, @@ -500,7 +503,10 @@ class ForgeAgent: step_id=step.step_id, ) try: - await wait_for_download_finished(downloading_files=downloading_files) + await wait_for_download_finished( + downloading_files=downloading_files, + timeout=task_block.download_timeout or BROWSER_DOWNLOAD_TIMEOUT, + ) except DownloadFileMaxWaitingTime as e: LOG.warning( "There're several long-time downloading files, these files might be broken", diff --git a/skyvern/forge/sdk/db/client.py b/skyvern/forge/sdk/db/client.py index f49f5b0d..b46d4e3c 100644 --- a/skyvern/forge/sdk/db/client.py +++ b/skyvern/forge/sdk/db/client.py @@ -172,6 +172,7 @@ class AgentDB: extra_http_headers: dict[str, str] | None = None, browser_session_id: str | None = None, browser_address: str | None = None, + download_timeout: float | None = None, ) -> Task: try: async with self.Session() as session: @@ -203,6 +204,7 @@ class AgentDB: extra_http_headers=extra_http_headers, browser_session_id=browser_session_id, browser_address=browser_address, + download_timeout=download_timeout, ) session.add(new_task) await session.commit() diff --git a/skyvern/forge/sdk/db/models.py b/skyvern/forge/sdk/db/models.py index 3ea31ce0..85c3051a 100644 --- a/skyvern/forge/sdk/db/models.py +++ b/skyvern/forge/sdk/db/models.py @@ -109,6 +109,7 @@ class TaskModel(Base): ) model = Column(JSON, nullable=True) browser_address = Column(String, nullable=True) + download_timeout = Column(Numeric, nullable=True) class StepModel(Base): diff --git a/skyvern/forge/sdk/db/utils.py b/skyvern/forge/sdk/db/utils.py index d4d31c97..2039df5f 100644 --- a/skyvern/forge/sdk/db/utils.py +++ b/skyvern/forge/sdk/db/utils.py @@ -158,6 +158,7 @@ def convert_to_task(task_obj: TaskModel, debug_enabled: bool = False, workflow_p max_screenshot_scrolls=task_obj.max_screenshot_scrolling_times, browser_session_id=task_obj.browser_session_id, browser_address=task_obj.browser_address, + download_timeout=task_obj.download_timeout, ) return task diff --git a/skyvern/forge/sdk/schemas/tasks.py b/skyvern/forge/sdk/schemas/tasks.py index f13f3bf2..810f45d5 100644 --- a/skyvern/forge/sdk/schemas/tasks.py +++ b/skyvern/forge/sdk/schemas/tasks.py @@ -113,6 +113,11 @@ class TaskBase(BaseModel): description="The CDP address for the task.", examples=["http://127.0.0.1:9222", "ws://127.0.0.1:9222/devtools/browser/1234567890"], ) + download_timeout: float | None = Field( + default=None, + description="The maximum time to wait for downloads to complete, in minutes. If not set, defaults to BROWSER_DOWNLOAD_TIMEOUT minutes.", + examples=[15.0], + ) class TaskRequest(TaskBase): diff --git a/skyvern/forge/sdk/workflow/models/block.py b/skyvern/forge/sdk/workflow/models/block.py index 8529867e..2c19066d 100644 --- a/skyvern/forge/sdk/workflow/models/block.py +++ b/skyvern/forge/sdk/workflow/models/block.py @@ -422,6 +422,7 @@ class BaseTaskBlock(Block): cache_actions: bool = False complete_verification: bool = True include_action_history_in_verification: bool = False + download_timeout: float | None = None # minutes def get_all_parameters( self, @@ -631,6 +632,7 @@ class BaseTaskBlock(Block): failure_reason=str(e), ) raise e + try: # add screenshot artifact for the first task screenshot = await browser_state.take_fullpage_screenshot( diff --git a/skyvern/forge/sdk/workflow/service.py b/skyvern/forge/sdk/workflow/service.py index f94c24b9..60d936db 100644 --- a/skyvern/forge/sdk/workflow/service.py +++ b/skyvern/forge/sdk/workflow/service.py @@ -2490,6 +2490,7 @@ class WorkflowService: cache_actions=block_yaml.cache_actions, complete_on_download=True, complete_verification=True, + download_timeout=block_yaml.download_timeout, ) elif block_yaml.block_type == BlockType.TaskV2: return TaskV2Block( diff --git a/skyvern/schemas/workflows.py b/skyvern/schemas/workflows.py index 4c17787f..8d95c6f0 100644 --- a/skyvern/schemas/workflows.py +++ b/skyvern/schemas/workflows.py @@ -433,6 +433,7 @@ class FileDownloadBlockYAML(BlockYAML): totp_verification_url: str | None = None totp_identifier: str | None = None cache_actions: bool = False + download_timeout: float | None = None class UrlBlockYAML(BlockYAML): diff --git a/skyvern/webeye/actions/handler.py b/skyvern/webeye/actions/handler.py index 3a1b1270..8d99aa8e 100644 --- a/skyvern/webeye/actions/handler.py +++ b/skyvern/webeye/actions/handler.py @@ -17,6 +17,7 @@ from skyvern.config import settings from skyvern.constants import ( AUTO_COMPLETION_POTENTIAL_VALUES_COUNT, BROWSER_DOWNLOAD_MAX_WAIT_TIME, + BROWSER_DOWNLOAD_TIMEOUT, DROPDOWN_MENU_MAX_DISTANCE, REPO_ROOT_DIR, SKYVERN_ID_ATTR, @@ -864,7 +865,7 @@ async def handle_click_to_download_file_action( "Checking if there is any new files after click", download_dir=download_dir, ) - async with asyncio.timeout(BROWSER_DOWNLOAD_MAX_WAIT_TIME): + async with asyncio.timeout(task.download_timeout or BROWSER_DOWNLOAD_MAX_WAIT_TIME): while True: list_files_after = list_files_in_directory(download_dir) if task.browser_session_id: @@ -913,7 +914,9 @@ async def handle_click_to_download_file_action( workflow_run_id=task.workflow_run_id, ) try: - await wait_for_download_finished(downloading_files=downloading_files) + await wait_for_download_finished( + downloading_files=downloading_files, timeout=task.download_timeout or BROWSER_DOWNLOAD_TIMEOUT + ) except DownloadFileMaxWaitingTime as e: LOG.warning( "There're several long-time downloading files, these files might be broken", diff --git a/skyvern/webeye/browser_factory.py b/skyvern/webeye/browser_factory.py index 44d96f52..c94d89e8 100644 --- a/skyvern/webeye/browser_factory.py +++ b/skyvern/webeye/browser_factory.py @@ -70,12 +70,14 @@ def set_browser_console_log(browser_context: BrowserContext, browser_artifacts: browser_context.on("console", browser_console_log) -def set_download_file_listener(browser_context: BrowserContext, **kwargs: Any) -> None: +def set_download_file_listener( + browser_context: BrowserContext, download_timeout: float | None = None, **kwargs: Any +) -> None: async def listen_to_download(download: Download) -> None: workflow_run_id = kwargs.get("workflow_run_id") task_id = kwargs.get("task_id") try: - async with asyncio.timeout(BROWSER_DOWNLOAD_TIMEOUT): + async with asyncio.timeout(download_timeout or BROWSER_DOWNLOAD_TIMEOUT): file_path = await download.path() if file_path.suffix: return