From fb2464121286bf6deaab515c073ff5a5c8e85d39 Mon Sep 17 00:00:00 2001 From: Stanislav Novosad Date: Fri, 17 Oct 2025 13:15:24 -0600 Subject: [PATCH] Skyvern SDK Prototype (#3624) --- skyvern/client/client.py | 15 ++ skyvern/client/environment.py | 2 +- skyvern/forge/sdk/routes/run_blocks.py | 1 + skyvern/library/constants.py | 1 + skyvern/library/skyvern_browser.py | 77 ++++++ skyvern/library/skyvern_browser_page.py | 296 ++++++++++++++++++++++++ skyvern/library/skyvern_sdk.py | 222 ++++++++++++++++++ skyvern/schemas/run_blocks.py | 5 + skyvern/webeye/browser_factory.py | 32 ++- 9 files changed, 648 insertions(+), 3 deletions(-) create mode 100644 skyvern/library/skyvern_browser.py create mode 100644 skyvern/library/skyvern_browser_page.py create mode 100644 skyvern/library/skyvern_sdk.py diff --git a/skyvern/client/client.py b/skyvern/client/client.py index 7f618363..3bfc49de 100644 --- a/skyvern/client/client.py +++ b/skyvern/client/client.py @@ -1959,6 +1959,7 @@ class AsyncSkyvern: totp_identifier: typing.Optional[str] = OMIT, totp_url: typing.Optional[str] = OMIT, browser_session_id: typing.Optional[str] = OMIT, + browser_address: typing.Optional[str] = OMIT, model: typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] = OMIT, extra_http_headers: typing.Optional[typing.Dict[str, typing.Optional[str]]] = OMIT, publish_workflow: typing.Optional[bool] = OMIT, @@ -2041,6 +2042,9 @@ class AsyncSkyvern: Run the task or workflow in the specific Skyvern browser session. Having a browser session can persist the real-time state of the browser, so that the next run can continue from where the previous run left off. + browser_address : typing.Optional[str] + The CDP address for the task + model : typing.Optional[typing.Dict[str, typing.Optional[typing.Any]]] Optional model configuration. @@ -2103,6 +2107,7 @@ class AsyncSkyvern: "totp_identifier": totp_identifier, "totp_url": totp_url, "browser_session_id": browser_session_id, + "browser_address": browser_address, "model": model, "extra_http_headers": extra_http_headers, "publish_workflow": publish_workflow, @@ -2163,6 +2168,7 @@ class AsyncSkyvern: totp_url: typing.Optional[str] = OMIT, totp_identifier: typing.Optional[str] = OMIT, browser_session_id: typing.Optional[str] = OMIT, + browser_address: typing.Optional[str] = OMIT, max_screenshot_scrolls: typing.Optional[int] = OMIT, extra_http_headers: typing.Optional[typing.Dict[str, typing.Optional[str]]] = OMIT, request_options: typing.Optional[RequestOptions] = None, @@ -2226,6 +2232,9 @@ class AsyncSkyvern: browser_session_id : typing.Optional[str] ID of a Skyvern browser session to reuse, having it continue from the current screen state + browser_address : typing.Optional[str] + The CDP address for the workflow + max_screenshot_scrolls : typing.Optional[int] The maximum number of scrolls for the post action screenshot. When it's None or 0, it takes the current viewpoint screenshot. @@ -2275,6 +2284,7 @@ class AsyncSkyvern: "totp_url": totp_url, "totp_identifier": totp_identifier, "browser_session_id": browser_session_id, + "browser_address": browser_address, "max_screenshot_scrolls": max_screenshot_scrolls, "extra_http_headers": extra_http_headers, }, @@ -3757,6 +3767,7 @@ class AsyncSkyvern: totp_identifier: typing.Optional[str] = OMIT, totp_url: typing.Optional[str] = OMIT, browser_session_id: typing.Optional[str] = OMIT, + browser_address: typing.Optional[str] = OMIT, extra_http_headers: typing.Optional[typing.Dict[str, typing.Optional[str]]] = OMIT, max_screenshot_scrolling_times: typing.Optional[int] = OMIT, credential_id: typing.Optional[str] = OMIT, @@ -3795,6 +3806,9 @@ class AsyncSkyvern: browser_session_id : typing.Optional[str] ID of the browser session to use, which is prefixed by `pbs_` e.g. `pbs_123456` + browser_address : typing.Optional[str] + The CDP address for the task + extra_http_headers : typing.Optional[typing.Dict[str, typing.Optional[str]]] Additional HTTP headers to include in requests @@ -3856,6 +3870,7 @@ class AsyncSkyvern: "totp_identifier": totp_identifier, "totp_url": totp_url, "browser_session_id": browser_session_id, + "browser_address": browser_address, "extra_http_headers": extra_http_headers, "max_screenshot_scrolling_times": max_screenshot_scrolling_times, "credential_id": credential_id, diff --git a/skyvern/client/environment.py b/skyvern/client/environment.py index a2001184..15a2b421 100644 --- a/skyvern/client/environment.py +++ b/skyvern/client/environment.py @@ -6,4 +6,4 @@ import enum class SkyvernEnvironment(enum.Enum): PRODUCTION = "https://api.skyvern.com" STAGING = "https://api-staging.skyvern.com" - DEVELOPMENT = "http://localhost:8000" + LOCAL = "http://localhost:8000" diff --git a/skyvern/forge/sdk/routes/run_blocks.py b/skyvern/forge/sdk/routes/run_blocks.py index 7b68b0a4..652863de 100644 --- a/skyvern/forge/sdk/routes/run_blocks.py +++ b/skyvern/forge/sdk/routes/run_blocks.py @@ -194,6 +194,7 @@ async def login( totp_identifier=login_request.totp_identifier, totp_verification_url=login_request.totp_url, browser_session_id=login_request.browser_session_id, + browser_address=login_request.browser_address, max_screenshot_scrolls=login_request.max_screenshot_scrolling_times, extra_http_headers=login_request.extra_http_headers, ) diff --git a/skyvern/library/constants.py b/skyvern/library/constants.py index 7a0e6ab3..c4b69708 100644 --- a/skyvern/library/constants.py +++ b/skyvern/library/constants.py @@ -1,2 +1,3 @@ DEFAULT_AGENT_TIMEOUT = 1800 # 30 minutes DEFAULT_AGENT_HEARTBEAT_INTERVAL = 10 # 10 seconds +DEFAULT_CDP_PORT = 9222 diff --git a/skyvern/library/skyvern_browser.py b/skyvern/library/skyvern_browser.py new file mode 100644 index 00000000..8c763b5c --- /dev/null +++ b/skyvern/library/skyvern_browser.py @@ -0,0 +1,77 @@ +from playwright.async_api import BrowserContext, Page + +from skyvern.client import AsyncSkyvern +from skyvern.library.skyvern_browser_page import SkyvernBrowserPage, SkyvernPageRun + + +class SkyvernBrowser: + """A browser context wrapper that creates Skyvern-enabled pages. + + This class wraps a Playwright BrowserContext and provides methods to create + SkyvernBrowserPage instances that combine traditional browser automation with + AI-powered task execution capabilities. It manages browser session state and + enables persistent browser sessions across multiple pages. + + Example: + ```python + sdk = SkyvernSdk() + browser = await sdk.launch_local_browser() + + # Get or create the working page + page = await browser.get_working_page() + + # Create a new page + new_page = await browser.new_page() + ``` + + Attributes: + _browser_context: The underlying Playwright BrowserContext. + _browser_session_id: Optional session ID for persistent browser sessions. + _browser_address: Optional address for remote browser connections. + _client: The AsyncSkyvern client for API communication. + """ + + def __init__( + self, + browser_context: BrowserContext, + client: AsyncSkyvern, + *, + browser_session_id: str | None = None, + browser_address: str | None = None, + ): + self._browser_context = browser_context + self._browser_session_id = browser_session_id + self._browser_address = browser_address + self._client = client + + async def get_working_page(self) -> SkyvernBrowserPage: + """Get the most recent page or create a new one if none exists. + + This method returns the last page in the browser context, or creates a new page + if the context has no pages. This is useful for continuing work on an existing + page without creating unnecessary new tabs. + + Returns: + SkyvernBrowserPage: The most recent page wrapped with Skyvern capabilities. + """ + if self._browser_context.pages: + page = self._browser_context.pages[-1] + else: + page = await self._browser_context.new_page() + return await self._create_skyvern_page(page) + + async def new_page(self) -> SkyvernBrowserPage: + """Create a new page (tab) in the browser context. + + This method always creates a new page, similar to opening a new tab in a browser. + The new page will have both Playwright's standard API and Skyvern's AI capabilities. + + Returns: + SkyvernBrowserPage: A new page wrapped with Skyvern capabilities. + """ + page = await self._browser_context.new_page() + return await self._create_skyvern_page(page) + + async def _create_skyvern_page(self, page: Page) -> SkyvernBrowserPage: + page_ai = SkyvernPageRun(page, self._browser_session_id, self._browser_address, self._client) + return SkyvernBrowserPage(page, page_ai) diff --git a/skyvern/library/skyvern_browser_page.py b/skyvern/library/skyvern_browser_page.py new file mode 100644 index 00000000..8179e942 --- /dev/null +++ b/skyvern/library/skyvern_browser_page.py @@ -0,0 +1,296 @@ +import asyncio +from typing import Any + +from playwright.async_api import Page + +from skyvern.client import AsyncSkyvern, GetRunResponse +from skyvern.client.types.workflow_run_response import WorkflowRunResponse +from skyvern.library.constants import DEFAULT_AGENT_HEARTBEAT_INTERVAL, DEFAULT_AGENT_TIMEOUT +from skyvern.schemas.run_blocks import CredentialType +from skyvern.schemas.runs import RunEngine, RunStatus, TaskRunResponse + + +class SkyvernPageRun: + """Provides methods to run Skyvern tasks and workflows in the context of a browser page. + + This class enables executing AI-powered browser automation tasks while sharing the + context of an existing browser page. It supports running custom tasks, login workflows, + and pre-defined workflows with automatic waiting for completion. + """ + + def __init__( + self, page: Page, browser_session_id: str | None, browser_address: str | None, client: AsyncSkyvern + ) -> None: + self._page = page + self._browser_session_id = browser_session_id + self._browser_address = browser_address + self._client = client + + async def run_task( + self, + prompt: str, + engine: RunEngine = RunEngine.skyvern_v2, + model: dict[str, Any] | None = None, + url: str | None = None, + webhook_url: str | None = None, + totp_identifier: str | None = None, + totp_url: str | None = None, + title: str | None = None, + error_code_mapping: dict[str, str] | None = None, + data_extraction_schema: dict[str, Any] | str | None = None, + max_steps: int | None = None, + timeout: float = DEFAULT_AGENT_TIMEOUT, + user_agent: str | None = None, + ) -> TaskRunResponse: + """Run a task in the context of this page and wait for it to finish. + + Args: + prompt: Natural language description of the task to perform. + engine: The execution engine to use. Defaults to skyvern_v2. + model: LLM model configuration options. + url: URL to navigate to. If not provided, uses the current page URL. + webhook_url: URL to receive webhook notifications about task progress. + totp_identifier: Identifier for TOTP (Time-based One-Time Password) authentication. + totp_url: URL to fetch TOTP codes from. + title: Human-readable title for this task run. + error_code_mapping: Mapping of error codes to custom error messages. + data_extraction_schema: Schema defining what data to extract from the page. + max_steps: Maximum number of steps the agent can take. + timeout: Maximum time in seconds to wait for task completion. + user_agent: Custom user agent string to use. + + Returns: + TaskRunResponse containing the task execution results. + """ + + task_run = await self._client.run_task( + prompt=prompt, + engine=engine, + model=model, + url=url or self._get_page_url(), + webhook_url=webhook_url, + totp_identifier=totp_identifier, + totp_url=totp_url, + title=title, + error_code_mapping=error_code_mapping, + data_extraction_schema=data_extraction_schema, + max_steps=max_steps, + browser_session_id=self._browser_session_id, + browser_address=self._browser_address, + user_agent=user_agent, + ) + + task_run = await self._wait_for_run_completion(task_run.run_id, timeout) + return TaskRunResponse.model_validate(task_run.model_dump()) + + async def login( + self, + credential_type: CredentialType, + *, + url: str | None = None, + credential_id: str | None = None, + bitwarden_collection_id: str | None = None, + bitwarden_item_id: str | None = None, + onepassword_vault_id: str | None = None, + onepassword_item_id: str | None = None, + prompt: str | None = None, + webhook_url: str | None = None, + totp_identifier: str | None = None, + totp_url: str | None = None, + extra_http_headers: dict[str, str] | None = None, + timeout: float = DEFAULT_AGENT_TIMEOUT, + ) -> WorkflowRunResponse: + """Run a login task in the context of this page and wait for it to finish. + + Args: + credential_type: Type of credential store to use (e.g., bitwarden, onepassword). + url: URL to navigate to for login. If not provided, uses the current page URL. + credential_id: ID of the credential to use. + bitwarden_collection_id: Bitwarden collection ID containing the credentials. + bitwarden_item_id: Bitwarden item ID for the credentials. + onepassword_vault_id: 1Password vault ID containing the credentials. + onepassword_item_id: 1Password item ID for the credentials. + prompt: Additional instructions for the login process. + webhook_url: URL to receive webhook notifications about login progress. + totp_identifier: Identifier for TOTP authentication. + totp_url: URL to fetch TOTP codes from. + extra_http_headers: Additional HTTP headers to include in requests. + timeout: Maximum time in seconds to wait for login completion. + + Returns: + WorkflowRunResponse containing the login workflow execution results. + """ + + workflow_run = await self._client.login( + credential_type=credential_type, + url=url or self._get_page_url(), + credential_id=credential_id, + bitwarden_collection_id=bitwarden_collection_id, + bitwarden_item_id=bitwarden_item_id, + onepassword_vault_id=onepassword_vault_id, + onepassword_item_id=onepassword_item_id, + prompt=prompt, + webhook_url=webhook_url, + totp_identifier=totp_identifier, + totp_url=totp_url, + browser_session_id=self._browser_session_id, + browser_address=self._browser_address, + extra_http_headers=extra_http_headers, + ) + + workflow_run = await self._wait_for_run_completion(workflow_run.run_id, timeout) + return WorkflowRunResponse.model_validate(workflow_run.model_dump()) + + async def run_workflow( + self, + workflow_id: str, + parameters: dict[str, Any] | None = None, + template: bool | None = None, + title: str | None = None, + webhook_url: str | None = None, + totp_url: str | None = None, + totp_identifier: str | None = None, + timeout: float = DEFAULT_AGENT_TIMEOUT, + ) -> WorkflowRunResponse: + """Run a workflow in the context of this page and wait for it to finish. + + Args: + workflow_id: ID of the workflow to execute. + parameters: Dictionary of parameters to pass to the workflow. + template: Whether this is a workflow template. + title: Human-readable title for this workflow run. + webhook_url: URL to receive webhook notifications about workflow progress. + totp_url: URL to fetch TOTP codes from. + totp_identifier: Identifier for TOTP authentication. + timeout: Maximum time in seconds to wait for workflow completion. + + Returns: + WorkflowRunResponse containing the workflow execution results. + """ + workflow_run = await self._client.run_workflow( + workflow_id=workflow_id, + parameters=parameters, + template=template, + title=title, + webhook_url=webhook_url, + totp_url=totp_url, + totp_identifier=totp_identifier, + browser_session_id=self._browser_session_id, + browser_address=self._browser_address, + ) + + workflow_run = await self._wait_for_run_completion(workflow_run.run_id, timeout) + return WorkflowRunResponse.model_validate(workflow_run.model_dump()) + + async def _wait_for_run_completion(self, run_id: str, timeout: float) -> GetRunResponse: + async with asyncio.timeout(timeout): + while True: + task_run = await self._client.get_run(run_id) + if RunStatus(task_run.status).is_final(): + break + await asyncio.sleep(DEFAULT_AGENT_HEARTBEAT_INTERVAL) + return task_run + + def _get_page_url(self) -> str | None: + url = self._page.url + if url == "about:blank": + return None + return url + + +class SkyvernBrowserPage: + """A browser page wrapper that combines Playwright's page API with Skyvern's AI capabilities. + + This class provides a unified interface for both traditional browser automation (via Playwright) + and AI-powered task execution (via Skyvern). It exposes standard page methods like click, fill, + goto, etc., while also providing access to Skyvern's task and workflow execution through the + `run` attribute. + + Example: + ```python + # Use standard Playwright methods + await page.goto("https://example.com") + await page.fill("#username", "user@example.com") + await page.click("#login-button") + + # Or use Skyvern's AI capabilities + await page.run.run_task("Fill out the contact form and submit it") + ``` + + Attributes: + run: SkyvernPageRun instance for executing AI-powered tasks and workflows. + """ + + def __init__(self, page: Page, run: SkyvernPageRun): + self.run = run + self._playwright_page = page + + async def click(self, selector: str, **kwargs: Any) -> None: + """Click an element matching the selector. + + Args: + selector: A selector to search for an element to click. + **kwargs: Additional options like timeout, force, position, etc. + """ + await self._playwright_page.click(selector, **kwargs) + + async def fill(self, selector: str, value: str, **kwargs: Any) -> None: + """Fill an input field with the given value. + + Args: + selector: A selector to search for an element to fill. + value: Value to fill for the input field. + **kwargs: Additional options like timeout, force, no_wait_after, etc. + """ + await self._playwright_page.fill(selector, value, **kwargs) + + async def goto(self, url: str, **kwargs: Any) -> None: + """Navigate to the given URL. + + Args: + url: URL to navigate page to. + **kwargs: Additional options like timeout, wait_until, referer, etc. + """ + await self._playwright_page.goto(url, **kwargs) + + async def type(self, selector: str, text: str, **kwargs: Any) -> None: + """Type text into an element character by character. + + Args: + selector: A selector to search for an element to type into. + text: Text to type into the element. + **kwargs: Additional options like delay, timeout, no_wait_after, etc. + """ + await self._playwright_page.type(selector, text, **kwargs) + + async def select_option(self, selector: str, value: Any = None, **kwargs: Any) -> list[str]: + """Select option(s) in a