diff --git a/alembic/versions/2025_08_21_0313-afdfd14ac46d_add_cdp_address_in_task_and_workflow.py b/alembic/versions/2025_08_21_0313-afdfd14ac46d_add_cdp_address_in_task_and_workflow.py new file mode 100644 index 00000000..b6645bc3 --- /dev/null +++ b/alembic/versions/2025_08_21_0313-afdfd14ac46d_add_cdp_address_in_task_and_workflow.py @@ -0,0 +1,35 @@ +"""add_cdp_address_in_task_and_workflow + +Revision ID: afdfd14ac46d +Revises: 20e960cf015d +Create Date: 2025-08-21 03:13:24.973540+00:00 + +""" + +from typing import Sequence, Union + +import sqlalchemy as sa + +from alembic import op + +# revision identifiers, used by Alembic. +revision: str = "afdfd14ac46d" +down_revision: Union[str, None] = "20e960cf015d" +branch_labels: Union[str, Sequence[str], None] = None +depends_on: Union[str, Sequence[str], None] = None + + +def upgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.add_column("observer_cruises", sa.Column("browser_address", sa.String(), nullable=True)) + op.add_column("tasks", sa.Column("browser_address", sa.String(), nullable=True)) + op.add_column("workflow_runs", sa.Column("browser_address", sa.String(), nullable=True)) + # ### end Alembic commands ### + + +def downgrade() -> None: + # ### commands auto generated by Alembic - please adjust! ### + op.drop_column("workflow_runs", "browser_address") + op.drop_column("tasks", "browser_address") + op.drop_column("observer_cruises", "browser_address") + # ### end Alembic commands ### diff --git a/skyvern/forge/agent.py b/skyvern/forge/agent.py index e0f90074..0cc044c7 100644 --- a/skyvern/forge/agent.py +++ b/skyvern/forge/agent.py @@ -187,6 +187,7 @@ class ForgeAgent: model=task_block.model, max_screenshot_scrolling_times=workflow_run.max_screenshot_scrolls, extra_http_headers=workflow_run.extra_http_headers, + browser_address=workflow_run.browser_address, ) LOG.info( "Created a new task for workflow run", @@ -255,6 +256,7 @@ class ForgeAgent: max_screenshot_scrolling_times=task_request.max_screenshot_scrolls, extra_http_headers=task_request.extra_http_headers, browser_session_id=task_request.browser_session_id, + browser_address=task_request.browser_address, ) LOG.info( "Created new task", @@ -292,6 +294,10 @@ class ForgeAgent: if engine in CUA_ENGINES: complete_verification = False + close_browser_on_completion = ( + close_browser_on_completion and browser_session_id is None and not task.browser_address + ) + workflow_run: WorkflowRun | None = None if task.workflow_run_id: workflow_run = await app.DATABASE.get_workflow_run( @@ -352,7 +358,7 @@ class ForgeAgent: api_key=api_key, need_call_webhook=True, browser_session_id=browser_session_id, - close_browser_on_completion=close_browser_on_completion and browser_session_id is None, + close_browser_on_completion=close_browser_on_completion, ) return step, None, None @@ -410,7 +416,7 @@ class ForgeAgent: last_step=step, api_key=api_key, need_call_webhook=True, - close_browser_on_completion=close_browser_on_completion and browser_session_id is None, + close_browser_on_completion=close_browser_on_completion, browser_session_id=browser_session_id, ) return step, detailed_output, None @@ -517,7 +523,7 @@ class ForgeAgent: task=completed_task, last_step=last_step, api_key=api_key, - close_browser_on_completion=close_browser_on_completion and browser_session_id is None, + close_browser_on_completion=close_browser_on_completion, browser_session_id=browser_session_id, ) return last_step, detailed_output, None @@ -534,7 +540,7 @@ class ForgeAgent: task=task, last_step=step, api_key=api_key, - close_browser_on_completion=close_browser_on_completion and browser_session_id is None, + close_browser_on_completion=close_browser_on_completion, browser_session_id=browser_session_id, ) return step, detailed_output, None @@ -557,7 +563,7 @@ class ForgeAgent: task=task, last_step=last_step, api_key=api_key, - close_browser_on_completion=close_browser_on_completion and browser_session_id is None, + close_browser_on_completion=close_browser_on_completion, browser_session_id=browser_session_id, ) return last_step, detailed_output, None @@ -642,7 +648,7 @@ class ForgeAgent: task=task, last_step=step, api_key=api_key, - close_browser_on_completion=browser_session_id is None, + close_browser_on_completion=close_browser_on_completion, browser_session_id=browser_session_id, ) return step, detailed_output, None @@ -659,7 +665,7 @@ class ForgeAgent: task=task, last_step=step, api_key=api_key, - close_browser_on_completion=close_browser_on_completion and browser_session_id is None, + close_browser_on_completion=close_browser_on_completion, browser_session_id=browser_session_id, ) else: @@ -694,7 +700,7 @@ class ForgeAgent: task=task, last_step=step, api_key=api_key, - close_browser_on_completion=close_browser_on_completion and browser_session_id is None, + close_browser_on_completion=close_browser_on_completion, need_final_screenshot=False, browser_session_id=browser_session_id, ) @@ -716,7 +722,7 @@ class ForgeAgent: api_key=api_key, need_call_webhook=False, browser_session_id=browser_session_id, - close_browser_on_completion=close_browser_on_completion and browser_session_id is None, + close_browser_on_completion=close_browser_on_completion, ) return step, detailed_output, None except InvalidTaskStatusTransition: @@ -732,7 +738,7 @@ class ForgeAgent: api_key=api_key, need_call_webhook=False, browser_session_id=browser_session_id, - close_browser_on_completion=close_browser_on_completion and browser_session_id is None, + close_browser_on_completion=close_browser_on_completion, ) return step, detailed_output, None except (UnsupportedActionType, UnsupportedTaskType, FailedToParseActionInstruction) as e: @@ -750,7 +756,7 @@ class ForgeAgent: api_key=api_key, need_call_webhook=False, browser_session_id=browser_session_id, - close_browser_on_completion=close_browser_on_completion and browser_session_id is None, + close_browser_on_completion=close_browser_on_completion, ) return step, detailed_output, None except ScrapingFailed as sfe: @@ -771,7 +777,7 @@ class ForgeAgent: task=task, last_step=step, api_key=api_key, - close_browser_on_completion=close_browser_on_completion and browser_session_id is None, + close_browser_on_completion=close_browser_on_completion, browser_session_id=browser_session_id, ) return step, detailed_output, None @@ -792,7 +798,7 @@ class ForgeAgent: task=task, last_step=step, api_key=api_key, - close_browser_on_completion=close_browser_on_completion and browser_session_id is None, + close_browser_on_completion=close_browser_on_completion, browser_session_id=browser_session_id, ) else: diff --git a/skyvern/forge/sdk/db/client.py b/skyvern/forge/sdk/db/client.py index 6faa6f1a..9fc5e8e9 100644 --- a/skyvern/forge/sdk/db/client.py +++ b/skyvern/forge/sdk/db/client.py @@ -163,6 +163,7 @@ class AgentDB: max_screenshot_scrolling_times: int | None = None, extra_http_headers: dict[str, str] | None = None, browser_session_id: str | None = None, + browser_address: str | None = None, ) -> Task: try: async with self.Session() as session: @@ -193,6 +194,7 @@ class AgentDB: max_screenshot_scrolling_times=max_screenshot_scrolling_times, extra_http_headers=extra_http_headers, browser_session_id=browser_session_id, + browser_address=browser_address, ) session.add(new_task) await session.commit() @@ -1632,6 +1634,7 @@ class AgentDB: parent_workflow_run_id: str | None = None, max_screenshot_scrolling_times: int | None = None, extra_http_headers: dict[str, str] | None = None, + browser_address: str | None = None, ) -> WorkflowRun: try: async with self.Session() as session: @@ -1648,6 +1651,7 @@ class AgentDB: parent_workflow_run_id=parent_workflow_run_id, max_screenshot_scrolling_times=max_screenshot_scrolling_times, extra_http_headers=extra_http_headers, + browser_address=browser_address, ) session.add(workflow_run) await session.commit() @@ -2607,6 +2611,7 @@ class AgentDB: model: dict[str, Any] | None = None, max_screenshot_scrolling_times: int | None = None, extra_http_headers: dict[str, str] | None = None, + browser_address: str | None = None, ) -> TaskV2: async with self.Session() as session: new_task_v2 = TaskV2Model( @@ -2625,6 +2630,7 @@ class AgentDB: model=model, max_screenshot_scrolling_times=max_screenshot_scrolling_times, extra_http_headers=extra_http_headers, + browser_address=browser_address, ) session.add(new_task_v2) await session.commit() diff --git a/skyvern/forge/sdk/db/models.py b/skyvern/forge/sdk/db/models.py index 11d9c5cb..267568ed 100644 --- a/skyvern/forge/sdk/db/models.py +++ b/skyvern/forge/sdk/db/models.py @@ -106,6 +106,7 @@ class TaskModel(Base): index=True, ) model = Column(JSON, nullable=True) + browser_address = Column(String, nullable=True) class StepModel(Base): @@ -278,6 +279,7 @@ class WorkflowRunModel(Base): totp_identifier = Column(String) max_screenshot_scrolling_times = Column(Integer, nullable=True) extra_http_headers = Column(JSON, nullable=True) + browser_address = Column(String, nullable=True) queued_at = Column(DateTime, nullable=True) started_at = Column(DateTime, nullable=True) @@ -659,6 +661,7 @@ class TaskV2Model(Base): max_steps = Column(Integer, nullable=True) max_screenshot_scrolling_times = Column(Integer, nullable=True) extra_http_headers = Column(JSON, nullable=True) + browser_address = Column(String, nullable=True) queued_at = Column(DateTime, nullable=True) started_at = Column(DateTime, nullable=True) diff --git a/skyvern/forge/sdk/db/utils.py b/skyvern/forge/sdk/db/utils.py index cfd2c818..02d36434 100644 --- a/skyvern/forge/sdk/db/utils.py +++ b/skyvern/forge/sdk/db/utils.py @@ -152,6 +152,7 @@ def convert_to_task(task_obj: TaskModel, debug_enabled: bool = False, workflow_p finished_at=task_obj.finished_at, max_screenshot_scrolls=task_obj.max_screenshot_scrolling_times, browser_session_id=task_obj.browser_session_id, + browser_address=task_obj.browser_address, ) return task @@ -300,6 +301,7 @@ def convert_to_workflow_run( workflow_title=workflow_title, max_screenshot_scrolls=workflow_run_model.max_screenshot_scrolling_times, extra_http_headers=workflow_run_model.extra_http_headers, + browser_address=workflow_run_model.browser_address, ) diff --git a/skyvern/forge/sdk/executor/async_executor.py b/skyvern/forge/sdk/executor/async_executor.py index 86a3a52f..3745fdba 100644 --- a/skyvern/forge/sdk/executor/async_executor.py +++ b/skyvern/forge/sdk/executor/async_executor.py @@ -90,9 +90,6 @@ class BackgroundTaskExecutor(AsyncExecutor): **kwargs: dict, ) -> None: LOG.info("Executing task using background task executor", task_id=task_id) - - close_browser_on_completion = browser_session_id is None - organization = await app.DATABASE.get_organization(organization_id) if organization is None: raise OrganizationNotFound(organization_id) @@ -109,6 +106,9 @@ class BackgroundTaskExecutor(AsyncExecutor): status=TaskStatus.running, organization_id=organization_id, ) + + close_browser_on_completion = browser_session_id is None and not task.browser_address + run_obj = await app.DATABASE.get_run(run_id=task_id, organization_id=organization_id) engine = RunEngine.skyvern_v1 if run_obj and run_obj.task_run_type == RunType.openai_cua: diff --git a/skyvern/forge/sdk/routes/agent_protocol.py b/skyvern/forge/sdk/routes/agent_protocol.py index 2340a4d2..3261c2d1 100644 --- a/skyvern/forge/sdk/routes/agent_protocol.py +++ b/skyvern/forge/sdk/routes/agent_protocol.py @@ -173,6 +173,7 @@ async def run_task( model=run_request.model, max_screenshot_scrolls=run_request.max_screenshot_scrolls, extra_http_headers=run_request.extra_http_headers, + browser_address=run_request.browser_address, ) task_v1_response = await task_v1_service.run_task( task=task_v1_request, @@ -231,6 +232,7 @@ async def run_task( model=run_request.model, max_screenshot_scrolling_times=run_request.max_screenshot_scrolls, extra_http_headers=run_request.extra_http_headers, + browser_address=run_request.browser_address, ) except MissingBrowserAddressError as e: raise HTTPException(status_code=400, detail=str(e)) from e @@ -330,6 +332,7 @@ async def run_workflow( browser_session_id=workflow_run_request.browser_session_id, max_screenshot_scrolls=workflow_run_request.max_screenshot_scrolls, extra_http_headers=workflow_run_request.extra_http_headers, + browser_address=workflow_run_request.browser_address, ) try: @@ -1914,6 +1917,7 @@ async def run_task_v2( max_screenshot_scrolling_times=data.max_screenshot_scrolls, browser_session_id=data.browser_session_id, extra_http_headers=data.extra_http_headers, + browser_address=data.browser_address, ) except MissingBrowserAddressError as e: raise HTTPException(status_code=400, detail=str(e)) from e diff --git a/skyvern/forge/sdk/schemas/task_v2.py b/skyvern/forge/sdk/schemas/task_v2.py index c83a7351..55e7a483 100644 --- a/skyvern/forge/sdk/schemas/task_v2.py +++ b/skyvern/forge/sdk/schemas/task_v2.py @@ -51,6 +51,7 @@ class TaskV2(BaseModel): finished_at: datetime | None = None max_screenshot_scrolls: int | None = Field(default=None, alias="max_screenshot_scrolling_times") extra_http_headers: dict[str, str] | None = None + browser_address: str | None = None created_at: datetime modified_at: datetime @@ -153,6 +154,7 @@ class TaskV2Request(BaseModel): error_code_mapping: dict[str, str] | None = None max_screenshot_scrolls: int | None = None extra_http_headers: dict[str, str] | None = None + browser_address: str | None = None @field_validator("url", "webhook_callback_url", "totp_verification_url") @classmethod diff --git a/skyvern/forge/sdk/schemas/tasks.py b/skyvern/forge/sdk/schemas/tasks.py index 5b0f6557..f13f3bf2 100644 --- a/skyvern/forge/sdk/schemas/tasks.py +++ b/skyvern/forge/sdk/schemas/tasks.py @@ -108,6 +108,11 @@ class TaskBase(BaseModel): description="The maximum number of scrolls for the post action screenshot. When it's None or 0, it takes the current viewpoint screenshot.", examples=[10], ) + browser_address: str | None = Field( + default=None, + description="The CDP address for the task.", + examples=["http://127.0.0.1:9222", "ws://127.0.0.1:9222/devtools/browser/1234567890"], + ) class TaskRequest(TaskBase): diff --git a/skyvern/forge/sdk/workflow/models/block.py b/skyvern/forge/sdk/workflow/models/block.py index db043c54..28e711b4 100644 --- a/skyvern/forge/sdk/workflow/models/block.py +++ b/skyvern/forge/sdk/workflow/models/block.py @@ -640,13 +640,14 @@ class BaseTaskBlock(Block): try: current_context = skyvern_context.ensure_context() current_context.task_id = task.task_id + close_browser_on_completion = browser_session_id is None and not workflow_run.browser_address await app.agent.execute_step( organization=organization, task=task, step=step, task_block=self, browser_session_id=browser_session_id, - close_browser_on_completion=browser_session_id is None, + close_browser_on_completion=close_browser_on_completion, complete_verification=self.complete_verification, engine=self.engine, ) @@ -800,7 +801,9 @@ class BaseTaskBlock(Block): class TaskBlock(BaseTaskBlock): - block_type: Literal[BlockType.TASK] = BlockType.TASK + # There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error: + # Parameter 1 of Literal[...] cannot be of type "Any" + block_type: Literal[BlockType.TASK] = BlockType.TASK # type: ignore class LoopBlockExecutedResult(BaseModel): @@ -844,7 +847,9 @@ class LoopBlockExecutedResult(BaseModel): class ForLoopBlock(Block): - block_type: Literal[BlockType.FOR_LOOP] = BlockType.FOR_LOOP + # There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error: + # Parameter 1 of Literal[...] cannot be of type "Any" + block_type: Literal[BlockType.FOR_LOOP] = BlockType.FOR_LOOP # type: ignore loop_blocks: list[BlockTypeVar] loop_over: PARAMETER_TYPE | None = None @@ -1361,7 +1366,9 @@ class ForLoopBlock(Block): class CodeBlock(Block): - block_type: Literal[BlockType.CODE] = BlockType.CODE + # There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error: + # Parameter 1 of Literal[...] cannot be of type "Any" + block_type: Literal[BlockType.CODE] = BlockType.CODE # type: ignore code: str parameters: list[PARAMETER_TYPE] = [] @@ -1534,7 +1541,9 @@ DEFAULT_TEXT_PROMPT_LLM_KEY = settings.PROMPT_BLOCK_LLM_KEY or settings.LLM_KEY class TextPromptBlock(Block): - block_type: Literal[BlockType.TEXT_PROMPT] = BlockType.TEXT_PROMPT + # There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error: + # Parameter 1 of Literal[...] cannot be of type "Any" + block_type: Literal[BlockType.TEXT_PROMPT] = BlockType.TEXT_PROMPT # type: ignore llm_key: str = DEFAULT_TEXT_PROMPT_LLM_KEY prompt: str @@ -1640,7 +1649,9 @@ class TextPromptBlock(Block): class DownloadToS3Block(Block): - block_type: Literal[BlockType.DOWNLOAD_TO_S3] = BlockType.DOWNLOAD_TO_S3 + # There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error: + # Parameter 1 of Literal[...] cannot be of type "Any" + block_type: Literal[BlockType.DOWNLOAD_TO_S3] = BlockType.DOWNLOAD_TO_S3 # type: ignore url: str @@ -1726,7 +1737,9 @@ class DownloadToS3Block(Block): class UploadToS3Block(Block): - block_type: Literal[BlockType.UPLOAD_TO_S3] = BlockType.UPLOAD_TO_S3 + # There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error: + # Parameter 1 of Literal[...] cannot be of type "Any" + block_type: Literal[BlockType.UPLOAD_TO_S3] = BlockType.UPLOAD_TO_S3 # type: ignore # TODO (kerem): A directory upload is supported but we should also support a list of files path: str | None = None @@ -1835,7 +1848,9 @@ class UploadToS3Block(Block): class FileUploadBlock(Block): - block_type: Literal[BlockType.FILE_UPLOAD] = BlockType.FILE_UPLOAD + # There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error: + # Parameter 1 of Literal[...] cannot be of type "Any" + block_type: Literal[BlockType.FILE_UPLOAD] = BlockType.FILE_UPLOAD # type: ignore storage_type: FileStorageType = FileStorageType.S3 s3_bucket: str | None = None @@ -2069,7 +2084,9 @@ class FileUploadBlock(Block): class SendEmailBlock(Block): - block_type: Literal[BlockType.SEND_EMAIL] = BlockType.SEND_EMAIL + # There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error: + # Parameter 1 of Literal[...] cannot be of type "Any" + block_type: Literal[BlockType.SEND_EMAIL] = BlockType.SEND_EMAIL # type: ignore smtp_host: AWSSecretParameter smtp_port: AWSSecretParameter @@ -2397,7 +2414,9 @@ class SendEmailBlock(Block): class FileParserBlock(Block): - block_type: Literal[BlockType.FILE_URL_PARSER] = BlockType.FILE_URL_PARSER + # There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error: + # Parameter 1 of Literal[...] cannot be of type "Any" + block_type: Literal[BlockType.FILE_URL_PARSER] = BlockType.FILE_URL_PARSER # type: ignore file_url: str file_type: FileType @@ -2672,7 +2691,9 @@ class PDFParserBlock(Block): This block will be removed in a future version. """ - block_type: Literal[BlockType.PDF_PARSER] = BlockType.PDF_PARSER + # There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error: + # Parameter 1 of Literal[...] cannot be of type "Any" + block_type: Literal[BlockType.PDF_PARSER] = BlockType.PDF_PARSER # type: ignore file_url: str json_schema: dict[str, Any] | None = None @@ -2778,7 +2799,9 @@ class PDFParserBlock(Block): class WaitBlock(Block): - block_type: Literal[BlockType.WAIT] = BlockType.WAIT + # There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error: + # Parameter 1 of Literal[...] cannot be of type "Any" + block_type: Literal[BlockType.WAIT] = BlockType.WAIT # type: ignore wait_sec: int parameters: list[PARAMETER_TYPE] = [] @@ -2823,7 +2846,9 @@ class WaitBlock(Block): class ValidationBlock(BaseTaskBlock): - block_type: Literal[BlockType.VALIDATION] = BlockType.VALIDATION + # There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error: + # Parameter 1 of Literal[...] cannot be of type "Any" + block_type: Literal[BlockType.VALIDATION] = BlockType.VALIDATION # type: ignore def get_all_parameters( self, @@ -2860,36 +2885,50 @@ class ValidationBlock(BaseTaskBlock): class ActionBlock(BaseTaskBlock): - block_type: Literal[BlockType.ACTION] = BlockType.ACTION + # There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error: + # Parameter 1 of Literal[...] cannot be of type "Any" + block_type: Literal[BlockType.ACTION] = BlockType.ACTION # type: ignore class NavigationBlock(BaseTaskBlock): - block_type: Literal[BlockType.NAVIGATION] = BlockType.NAVIGATION + # There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error: + # Parameter 1 of Literal[...] cannot be of type "Any" + block_type: Literal[BlockType.NAVIGATION] = BlockType.NAVIGATION # type: ignore navigation_goal: str class ExtractionBlock(BaseTaskBlock): - block_type: Literal[BlockType.EXTRACTION] = BlockType.EXTRACTION + # There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error: + # Parameter 1 of Literal[...] cannot be of type "Any" + block_type: Literal[BlockType.EXTRACTION] = BlockType.EXTRACTION # type: ignore data_extraction_goal: str class LoginBlock(BaseTaskBlock): - block_type: Literal[BlockType.LOGIN] = BlockType.LOGIN + # There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error: + # Parameter 1 of Literal[...] cannot be of type "Any" + block_type: Literal[BlockType.LOGIN] = BlockType.LOGIN # type: ignore class FileDownloadBlock(BaseTaskBlock): - block_type: Literal[BlockType.FILE_DOWNLOAD] = BlockType.FILE_DOWNLOAD + # There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error: + # Parameter 1 of Literal[...] cannot be of type "Any" + block_type: Literal[BlockType.FILE_DOWNLOAD] = BlockType.FILE_DOWNLOAD # type: ignore class UrlBlock(BaseTaskBlock): - block_type: Literal[BlockType.GOTO_URL] = BlockType.GOTO_URL + # There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error: + # Parameter 1 of Literal[...] cannot be of type "Any" + block_type: Literal[BlockType.GOTO_URL] = BlockType.GOTO_URL # type: ignore url: str class TaskV2Block(Block): - block_type: Literal[BlockType.TaskV2] = BlockType.TaskV2 + # There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error: + # Parameter 1 of Literal[...] cannot be of type "Any" + block_type: Literal[BlockType.TaskV2] = BlockType.TaskV2 # type: ignore prompt: str url: str | None = None totp_verification_url: str | None = None @@ -3047,7 +3086,9 @@ class TaskV2Block(Block): class HttpRequestBlock(Block): - block_type: Literal[BlockType.HTTP_REQUEST] = BlockType.HTTP_REQUEST + # There is a mypy bug with Literal. Without the type: ignore, mypy will raise an error: + # Parameter 1 of Literal[...] cannot be of type "Any" + block_type: Literal[BlockType.HTTP_REQUEST] = BlockType.HTTP_REQUEST # type: ignore # Individual HTTP parameters method: str = "GET" diff --git a/skyvern/forge/sdk/workflow/models/workflow.py b/skyvern/forge/sdk/workflow/models/workflow.py index c41655d7..9fe41851 100644 --- a/skyvern/forge/sdk/workflow/models/workflow.py +++ b/skyvern/forge/sdk/workflow/models/workflow.py @@ -25,6 +25,7 @@ class WorkflowRequestBody(BaseModel): browser_session_id: str | None = None max_screenshot_scrolls: int | None = None extra_http_headers: dict[str, str] | None = None + browser_address: str | None = None @field_validator("webhook_callback_url", "totp_verification_url") @classmethod @@ -120,6 +121,7 @@ class WorkflowRun(BaseModel): parent_workflow_run_id: str | None = None workflow_title: str | None = None max_screenshot_scrolls: int | None = None + browser_address: str | None = None queued_at: datetime | None = None started_at: datetime | None = None @@ -170,3 +172,4 @@ class WorkflowRunResponseBase(BaseModel): workflow_title: str | None = None browser_session_id: str | None = None max_screenshot_scrolls: int | None = None + browser_address: str | None = None diff --git a/skyvern/forge/sdk/workflow/service.py b/skyvern/forge/sdk/workflow/service.py index 0cc139e4..55f57081 100644 --- a/skyvern/forge/sdk/workflow/service.py +++ b/skyvern/forge/sdk/workflow/service.py @@ -276,6 +276,7 @@ class WorkflowService: ) workflow_run = await self.get_workflow_run(workflow_run_id=workflow_run_id, organization_id=organization_id) workflow = await self.get_workflow_by_permanent_id(workflow_permanent_id=workflow_run.workflow_permanent_id) + close_browser_on_completion = browser_session_id is None and not workflow_run.browser_address # Check if there's a related workflow script that should be used instead workflow_script = await self._get_workflow_script(workflow, workflow_run) @@ -352,7 +353,7 @@ class WorkflowService: workflow_run=workflow_run, api_key=api_key, browser_session_id=browser_session_id, - close_browser_on_completion=browser_session_id is None, + close_browser_on_completion=close_browser_on_completion, ) return workflow_run @@ -404,7 +405,7 @@ class WorkflowService: workflow_run=workflow_run, api_key=api_key, need_call_webhook=True, - close_browser_on_completion=browser_session_id is None, + close_browser_on_completion=close_browser_on_completion, browser_session_id=browser_session_id, ) return workflow_run @@ -422,7 +423,7 @@ class WorkflowService: workflow_run=workflow_run, api_key=api_key, need_call_webhook=True, - close_browser_on_completion=browser_session_id is None, + close_browser_on_completion=close_browser_on_completion, browser_session_id=browser_session_id, ) return workflow_run @@ -464,7 +465,7 @@ class WorkflowService: workflow_run=workflow_run, api_key=api_key, need_call_webhook=False, - close_browser_on_completion=browser_session_id is None, + close_browser_on_completion=close_browser_on_completion, browser_session_id=browser_session_id, ) return workflow_run @@ -489,7 +490,7 @@ class WorkflowService: workflow=workflow, workflow_run=workflow_run, api_key=api_key, - close_browser_on_completion=browser_session_id is None, + close_browser_on_completion=close_browser_on_completion, browser_session_id=browser_session_id, ) return workflow_run @@ -525,7 +526,7 @@ class WorkflowService: workflow=workflow, workflow_run=workflow_run, api_key=api_key, - close_browser_on_completion=browser_session_id is None, + close_browser_on_completion=close_browser_on_completion, browser_session_id=browser_session_id, ) return workflow_run @@ -561,7 +562,7 @@ class WorkflowService: workflow=workflow, workflow_run=workflow_run, api_key=api_key, - close_browser_on_completion=browser_session_id is None, + close_browser_on_completion=close_browser_on_completion, browser_session_id=browser_session_id, ) return workflow_run @@ -599,7 +600,7 @@ class WorkflowService: workflow_run=workflow_run, api_key=api_key, browser_session_id=browser_session_id, - close_browser_on_completion=browser_session_id is None, + close_browser_on_completion=close_browser_on_completion, ) return workflow_run @@ -626,7 +627,7 @@ class WorkflowService: workflow_run=workflow_run, api_key=api_key, browser_session_id=browser_session_id, - close_browser_on_completion=browser_session_id is None, + close_browser_on_completion=close_browser_on_completion, ) # Track workflow run duration when completed @@ -869,6 +870,7 @@ class WorkflowService: parent_workflow_run_id=parent_workflow_run_id, max_screenshot_scrolling_times=workflow_request.max_screenshot_scrolls, extra_http_headers=workflow_request.extra_http_headers, + browser_address=workflow_request.browser_address, ) async def mark_workflow_run_as_completed(self, workflow_run_id: str) -> WorkflowRun: @@ -1299,6 +1301,7 @@ class WorkflowService: browser_session_id=workflow_run.browser_session_id, max_screenshot_scrolls=workflow_run.max_screenshot_scrolls, task_v2=task_v2, + browser_address=workflow_run.browser_address, ) async def clean_up_workflow( @@ -1313,10 +1316,13 @@ class WorkflowService: analytics.capture("skyvern-oss-agent-workflow-status", {"status": workflow_run.status}) tasks = await self.get_tasks_by_workflow_run_id(workflow_run.workflow_run_id) all_workflow_task_ids = [task.task_id for task in tasks] + close_browser_on_completion = ( + close_browser_on_completion and browser_session_id is None and not workflow_run.browser_address + ) browser_state = await app.BROWSER_MANAGER.cleanup_for_workflow_run( workflow_run.workflow_run_id, all_workflow_task_ids, - close_browser_on_completion=close_browser_on_completion and browser_session_id is None, + close_browser_on_completion=close_browser_on_completion, browser_session_id=browser_session_id, organization_id=workflow_run.organization_id, ) diff --git a/skyvern/schemas/runs.py b/skyvern/schemas/runs.py index 963fe79d..b8be0fec 100644 --- a/skyvern/schemas/runs.py +++ b/skyvern/schemas/runs.py @@ -293,6 +293,11 @@ class TaskRunRequest(BaseModel): default=None, description="The maximum number of scrolls for the post action screenshot. When it's None or 0, it takes the current viewpoint screenshot.", ) + browser_address: str | None = Field( + default=None, + description="The CDP address for the task.", + examples=["http://127.0.0.1:9222", "ws://127.0.0.1:9222/devtools/browser/1234567890"], + ) @field_validator("url", "webhook_url", "totp_url") @classmethod @@ -348,6 +353,11 @@ class WorkflowRunRequest(BaseModel): default=None, description="The extra HTTP headers for the requests in browser.", ) + browser_address: str | None = Field( + default=None, + description="The CDP address for the workflow run.", + examples=["http://127.0.0.1:9222", "ws://127.0.0.1:9222/devtools/browser/1234567890"], + ) @field_validator("webhook_url", "totp_url") @classmethod diff --git a/skyvern/services/task_v2_service.py b/skyvern/services/task_v2_service.py index b4257b67..d714edef 100644 --- a/skyvern/services/task_v2_service.py +++ b/skyvern/services/task_v2_service.py @@ -164,6 +164,7 @@ async def initialize_task_v2( max_screenshot_scrolling_times: int | None = None, browser_session_id: str | None = None, extra_http_headers: dict[str, str] | None = None, + browser_address: str | None = None, ) -> TaskV2: task_v2 = await app.DATABASE.create_task_v2( prompt=user_prompt, @@ -177,6 +178,7 @@ async def initialize_task_v2( model=model, max_screenshot_scrolling_times=max_screenshot_scrolling_times, extra_http_headers=extra_http_headers, + browser_address=browser_address, ) # set task_v2_id in context context = skyvern_context.current() @@ -230,6 +232,7 @@ async def initialize_task_v2( max_screenshot_scrolls=max_screenshot_scrolling_times, browser_session_id=browser_session_id, extra_http_headers=extra_http_headers, + browser_address=browser_address, ), workflow_permanent_id=new_workflow.workflow_permanent_id, organization=organization, @@ -368,7 +371,7 @@ async def run_task_v2( workflow=workflow, workflow_run=workflow_run, browser_session_id=browser_session_id, - close_browser_on_completion=browser_session_id is None, + close_browser_on_completion=browser_session_id is None and not workflow_run.browser_address, need_call_webhook=False, ) else: diff --git a/skyvern/services/workflow_service.py b/skyvern/services/workflow_service.py index c79d0d31..807faac7 100644 --- a/skyvern/services/workflow_service.py +++ b/skyvern/services/workflow_service.py @@ -130,6 +130,7 @@ async def get_workflow_run_response( totp_url=workflow_run.totp_verification_url or None, totp_identifier=workflow_run.totp_identifier, max_screenshot_scrolls=workflow_run.max_screenshot_scrolls, + browser_address=workflow_run.browser_address, # TODO: add browser session id ), ) diff --git a/skyvern/webeye/browser_factory.py b/skyvern/webeye/browser_factory.py index 00478d86..10f8be0e 100644 --- a/skyvern/webeye/browser_factory.py +++ b/skyvern/webeye/browser_factory.py @@ -631,6 +631,7 @@ class BrowserState: script_id: str | None = None, organization_id: str | None = None, extra_http_headers: dict[str, str] | None = None, + browser_address: str | None = None, ) -> None: if self.browser_context is None: LOG.info("creating browser context") @@ -647,6 +648,7 @@ class BrowserState: script_id=script_id, organization_id=organization_id, extra_http_headers=extra_http_headers, + browser_address=browser_address, ) self.browser_context = browser_context self.browser_artifacts = browser_artifacts @@ -654,9 +656,13 @@ class BrowserState: LOG.info("browser context is created") if await self.get_working_page() is None: - page = await self.browser_context.new_page() - await self.set_working_page(page, 0) - await self._close_all_other_pages() + if browser_address and len(self.browser_context.pages) > 0: + page = self.browser_context.pages[0] + await self.set_working_page(page, 0) + else: + page = await self.browser_context.new_page() + await self.set_working_page(page, 0) + await self._close_all_other_pages() if url: await self.navigate_to_url(page=page, url=url) @@ -779,6 +785,7 @@ class BrowserState: script_id: str | None = None, organization_id: str | None = None, extra_http_headers: dict[str, str] | None = None, + browser_address: str | None = None, ) -> Page: page = await self.get_working_page() if page is not None: @@ -793,6 +800,7 @@ class BrowserState: script_id=script_id, organization_id=organization_id, extra_http_headers=extra_http_headers, + browser_address=browser_address, ) except Exception as e: error_message = str(e) @@ -809,6 +817,7 @@ class BrowserState: script_id=script_id, organization_id=organization_id, extra_http_headers=extra_http_headers, + browser_address=browser_address, ) page = await self.__assert_page() @@ -824,6 +833,7 @@ class BrowserState: script_id=script_id, organization_id=organization_id, extra_http_headers=extra_http_headers, + browser_address=browser_address, ) page = await self.__assert_page() return page diff --git a/skyvern/webeye/browser_manager.py b/skyvern/webeye/browser_manager.py index eff7c703..bae08c5b 100644 --- a/skyvern/webeye/browser_manager.py +++ b/skyvern/webeye/browser_manager.py @@ -33,6 +33,7 @@ class BrowserManager: script_id: str | None = None, organization_id: str | None = None, extra_http_headers: dict[str, str] | None = None, + browser_address: str | None = None, ) -> BrowserState: pw = await async_playwright().start() ( @@ -48,6 +49,7 @@ class BrowserManager: script_id=script_id, organization_id=organization_id, extra_http_headers=extra_http_headers, + browser_address=browser_address, ) return BrowserState( pw=pw, @@ -113,6 +115,7 @@ class BrowserManager: task_id=task.task_id, organization_id=task.organization_id, extra_http_headers=task.extra_http_headers, + browser_address=task.browser_address, ) if browser_session_id: @@ -133,6 +136,7 @@ class BrowserManager: task_id=task.task_id, organization_id=task.organization_id, extra_http_headers=task.extra_http_headers, + browser_address=task.browser_address, ) return browser_state @@ -187,6 +191,7 @@ class BrowserManager: workflow_run_id=workflow_run.workflow_run_id, organization_id=workflow_run.organization_id, extra_http_headers=workflow_run.extra_http_headers, + browser_address=workflow_run.browser_address, ) if browser_session_id: @@ -207,6 +212,7 @@ class BrowserManager: workflow_run_id=workflow_run.workflow_run_id, organization_id=workflow_run.organization_id, extra_http_headers=workflow_run.extra_http_headers, + browser_address=workflow_run.browser_address, ) return browser_state