Files
Dorod-Sky/skyvern/library/skyvern.py
2025-12-05 13:21:40 -07:00

551 lines
23 KiB
Python

import asyncio
import os
import pathlib
import tempfile
from typing import Any
import httpx
import structlog
from dotenv import load_dotenv
from playwright.async_api import Playwright, async_playwright
from skyvern.client import AsyncSkyvern, BrowserSessionResponse, SkyvernEnvironment
from skyvern.client.core import RequestOptions
from skyvern.client.types.task_run_response import TaskRunResponse
from skyvern.client.types.workflow_run_response import WorkflowRunResponse
from skyvern.forge.sdk.api.llm.models import LLMConfig, LLMRouterConfig
from skyvern.library.constants import DEFAULT_AGENT_HEARTBEAT_INTERVAL, DEFAULT_AGENT_TIMEOUT, DEFAULT_CDP_PORT
from skyvern.library.skyvern_browser import SkyvernBrowser
from skyvern.schemas.run_blocks import CredentialType
from skyvern.schemas.runs import ProxyLocation, RunEngine, RunStatus
LOG = structlog.get_logger()
class Skyvern(AsyncSkyvern):
"""Main entry point for the Skyvern SDK.
This class provides methods to launch and connect to browsers (both local and cloud-hosted),
and access the Skyvern API client for task and workflow management. It combines browser
automation capabilities with AI-powered task execution.
Example:
```python
# Remote mode: Connect to Skyvern Cloud (API key required)
skyvern = Skyvern(api_key="your-api-key")
# Local/embedded mode (run `skyvern quickstart` first):
skyvern = Skyvern.local()
# Launch a local browser (works only in local environment)
browser = await skyvern.launch_local_browser(headless=False)
page = await browser.get_working_page()
# Or use a cloud browser (works only in cloud environment)
browser = await skyvern.use_cloud_browser()
page = await browser.get_working_page()
# Execute AI-powered tasks
await page.run.run_task("Fill out the form and submit it")
```
You can also mix AI-powered tasks with direct browser control in the same session:
```python
# Create credentials via API
credential = await skyvern.create_credential(
name="my_user",
credential_type="password",
credential=NonEmptyPasswordCredential(username="user@example.com", password="my_password"),
)
# Get a browser page
browser = await skyvern.launch_cloud_browser()
page = await browser.get_working_page()
# Navigate manually
await page.goto("https://example.com")
# Use AI to handle login
await page.run.login(
credential_type=CredentialType.skyvern,
credential_id=credential.credential_id,
)
# Continue with manual browser control
await page.click("#invoices-button")
await page.fill("#search", "my invoice")
await page.screenshot(path="screenshot.png", full_page=True)
```
"""
def __init__(
self,
*,
api_key: str,
environment: SkyvernEnvironment = SkyvernEnvironment.CLOUD,
base_url: str | None = None,
timeout: float | None = None,
follow_redirects: bool | None = True,
httpx_client: httpx.AsyncClient | None = None,
) -> None:
"""Remote mode: Connect to Skyvern Cloud or self-hosted instance.
Args:
api_key: API key for authenticating with Skyvern.
Can be found on the settings page: https://app.skyvern.com/settings
environment: The Skyvern environment to connect to. Use SkyvernEnvironment.CLOUD
for Skyvern Cloud or SkyvernEnvironment.PRODUCTION/STAGING for self-hosted
instances. Defaults to SkyvernEnvironment.CLOUD.
base_url: Override the base URL for the Skyvern API. If not provided, uses the default URL for
the specified environment.
timeout: Timeout in seconds for API requests. If not provided, uses the default timeout.
follow_redirects: Whether to automatically follow HTTP redirects. Defaults to True.
httpx_client: Custom httpx AsyncClient for making API requests.
If not provided, a default client will be created.
"""
super().__init__(
base_url=base_url,
environment=environment,
api_key=api_key,
timeout=timeout,
follow_redirects=follow_redirects,
httpx_client=httpx_client,
)
self._environment = environment
self._api_key: str | None = api_key
self._playwright: Playwright | None = None
@classmethod
def local(
cls,
*,
llm_config: LLMRouterConfig | LLMConfig | None = None,
settings: dict[str, Any] | None = None,
) -> "Skyvern":
"""Local/embedded mode: Run Skyvern locally in-process.
Prerequisites:
Run `skyvern quickstart` first to set up your local environment and create a .env file
Args:
llm_config: Optional custom LLM configuration (LLMConfig or LLMRouterConfig).
If provided, this will be registered as "CUSTOM_LLM" and used as the primary LLM,
overriding the LLM_KEY setting from your .env file.
If not provided, uses the LLM configured via LLM_KEY in your .env file.
Example 1 - Using .env configuration (simplest, recommended):
```python
from skyvern import Skyvern
# Uses LLM_KEY and other settings from your .env file
# Created by running `skyvern quickstart`
skyvern = Skyvern.local()
```
Example 2 - Custom LLM with environment variables:
```python
from skyvern import Skyvern
from skyvern.forge.sdk.api.llm.models import LLMConfig
# Assumes OPENAI_API_KEY is set in your environment
skyvern = Skyvern.local(
llm_config=LLMConfig(
model_name="gpt-4o",
required_env_vars=["OPENAI_API_KEY"],
supports_vision=True,
add_assistant_prefix=False,
)
)
```
Example 3 - Explicitly providing credentials:
```python
from skyvern import Skyvern
from skyvern.forge.sdk.api.llm.models import LLMConfig, LiteLLMParams
skyvern = Skyvern.local(
llm_config=LLMConfig(
model_name="gpt-4o",
required_env_vars=[], # No env vars required
supports_vision=True,
add_assistant_prefix=False,
litellm_params=LiteLLMParams(
api_base="https://api.openai.com/v1",
api_key="sk-...", # Your API key
),
)
)
```
settings: Optional dictionary of Skyvern settings to override.
These override the corresponding settings from your .env file.
Example: {"MAX_STEPS_PER_RUN": 100, "BROWSER_TYPE": "chromium-headful"}
Returns:
Skyvern: A Skyvern instance running in local/embedded mode.
"""
from skyvern.library.embedded_server_factory import create_embedded_server # noqa: PLC0415
if not os.path.exists(".env"):
raise ValueError("Please run `skyvern quickstart` to set up your local Skyvern environment")
load_dotenv(".env")
api_key = os.getenv("SKYVERN_API_KEY")
if not api_key:
raise ValueError("SKYVERN_API_KEY is not set. Provide api_key or set SKYVERN_API_KEY in .env file.")
obj = cls.__new__(cls)
AsyncSkyvern.__init__(
obj,
environment=SkyvernEnvironment.LOCAL,
httpx_client=create_embedded_server(
llm_config=llm_config,
settings_overrides=settings,
),
)
obj._environment = SkyvernEnvironment.LOCAL
obj._api_key = None
obj._playwright = None
return obj
async def run_task(
self,
prompt: str,
engine: RunEngine = RunEngine.skyvern_v2,
model: dict[str, Any] | None = None,
url: str | None = None,
webhook_url: str | None = None,
totp_identifier: str | None = None,
totp_url: str | None = None,
title: str | None = None,
error_code_mapping: dict[str, str] | None = None,
data_extraction_schema: dict[str, Any] | str | None = None,
proxy_location: ProxyLocation | None = None,
max_steps: int | None = None,
wait_for_completion: bool = False,
timeout: float = DEFAULT_AGENT_TIMEOUT,
browser_session_id: str | None = None,
user_agent: str | None = None,
extra_http_headers: dict[str, str] | None = None,
publish_workflow: bool = False,
include_action_history_in_verification: bool | None = None,
max_screenshot_scrolls: int | None = None,
browser_address: str | None = None,
request_options: RequestOptions | None = None,
) -> TaskRunResponse:
task_run = await super().run_task(
prompt=prompt,
engine=engine,
model=model,
url=url,
webhook_url=webhook_url,
totp_identifier=totp_identifier,
totp_url=totp_url,
title=title,
error_code_mapping=error_code_mapping,
data_extraction_schema=data_extraction_schema,
proxy_location=proxy_location,
max_steps=max_steps,
browser_session_id=browser_session_id,
user_agent=user_agent,
extra_http_headers=extra_http_headers,
publish_workflow=publish_workflow,
include_action_history_in_verification=include_action_history_in_verification,
max_screenshot_scrolls=max_screenshot_scrolls,
browser_address=browser_address,
request_options=request_options,
)
if wait_for_completion:
async with asyncio.timeout(timeout):
while True:
task_run = await super().get_run(task_run.run_id)
if RunStatus(task_run.status).is_final():
break
await asyncio.sleep(DEFAULT_AGENT_HEARTBEAT_INTERVAL)
return TaskRunResponse.model_validate(task_run.model_dump())
async def run_workflow(
self,
workflow_id: str,
parameters: dict[str, Any] | None = None,
template: bool | None = None,
title: str | None = None,
proxy_location: ProxyLocation | None = None,
webhook_url: str | None = None,
totp_url: str | None = None,
totp_identifier: str | None = None,
browser_session_id: str | None = None,
max_steps_override: int | None = None,
user_agent: str | None = None,
browser_profile_id: str | None = None,
max_screenshot_scrolls: int | None = None,
extra_http_headers: dict[str, str] | None = None,
browser_address: str | None = None,
ai_fallback: bool | None = None,
run_with: str | None = None,
wait_for_completion: bool = False,
timeout: float = DEFAULT_AGENT_TIMEOUT,
request_options: RequestOptions | None = None,
) -> WorkflowRunResponse:
workflow_run = await super().run_workflow(
workflow_id=workflow_id,
parameters=parameters,
template=template,
title=title,
proxy_location=proxy_location,
webhook_url=webhook_url,
totp_url=totp_url,
totp_identifier=totp_identifier,
browser_session_id=browser_session_id,
max_steps_override=max_steps_override,
user_agent=user_agent,
browser_profile_id=browser_profile_id,
max_screenshot_scrolls=max_screenshot_scrolls,
extra_http_headers=extra_http_headers,
browser_address=browser_address,
ai_fallback=ai_fallback,
run_with=run_with,
request_options=request_options,
)
if wait_for_completion:
async with asyncio.timeout(timeout):
while True:
workflow_run = await super().get_run(workflow_run.run_id)
if RunStatus(workflow_run.status).is_final():
break
await asyncio.sleep(DEFAULT_AGENT_HEARTBEAT_INTERVAL)
return WorkflowRunResponse.model_validate(workflow_run.model_dump())
async def login(
self,
credential_type: CredentialType,
*,
url: str | None = None,
credential_id: str | None = None,
bitwarden_collection_id: str | None = None,
bitwarden_item_id: str | None = None,
onepassword_vault_id: str | None = None,
onepassword_item_id: str | None = None,
prompt: str | None = None,
webhook_url: str | None = None,
proxy_location: ProxyLocation | None = None,
totp_identifier: str | None = None,
totp_url: str | None = None,
browser_session_id: str | None = None,
browser_address: str | None = None,
extra_http_headers: dict[str, str] | None = None,
max_screenshot_scrolling_times: int | None = None,
azure_vault_name: str | None = None,
azure_vault_username_key: str | None = None,
azure_vault_password_key: str | None = None,
azure_vault_totp_secret_key: str | None = None,
wait_for_completion: bool = False,
timeout: float = DEFAULT_AGENT_TIMEOUT,
request_options: RequestOptions | None = None,
) -> WorkflowRunResponse:
workflow_run = await super().login(
credential_type=credential_type,
url=url,
credential_id=credential_id,
bitwarden_collection_id=bitwarden_collection_id,
bitwarden_item_id=bitwarden_item_id,
onepassword_vault_id=onepassword_vault_id,
onepassword_item_id=onepassword_item_id,
prompt=prompt,
webhook_url=webhook_url,
proxy_location=proxy_location,
totp_identifier=totp_identifier,
totp_url=totp_url,
browser_session_id=browser_session_id,
browser_address=browser_address,
extra_http_headers=extra_http_headers,
max_screenshot_scrolling_times=max_screenshot_scrolling_times,
azure_vault_name=azure_vault_name,
azure_vault_username_key=azure_vault_username_key,
azure_vault_password_key=azure_vault_password_key,
azure_vault_totp_secret_key=azure_vault_totp_secret_key,
request_options=request_options,
)
if wait_for_completion:
async with asyncio.timeout(timeout):
while True:
workflow_run = await super().get_run(workflow_run.run_id)
if RunStatus(workflow_run.status).is_final():
break
await asyncio.sleep(DEFAULT_AGENT_HEARTBEAT_INTERVAL)
return WorkflowRunResponse.model_validate(workflow_run.model_dump())
async def launch_local_browser(
self,
*,
headless: bool = False,
port: int = DEFAULT_CDP_PORT,
args: list[str] | None = None,
user_data_dir: str | None = None,
) -> SkyvernBrowser:
"""Launch a new local Chromium browser with Chrome DevTools Protocol (CDP) enabled.
This method launches a browser on your local machine with remote debugging enabled,
allowing Skyvern to control it via CDP. Useful for development and debugging.
Args:
headless: Whether to run the browser in headless mode. Defaults to False.
port: The port number for the CDP endpoint. Defaults to DEFAULT_CDP_PORT.
args: Additional command-line arguments to pass to Chromium. Defaults to None.
Example: ["--disable-blink-features=AutomationControlled", "--window-size=1920,1080"]
Returns:
SkyvernBrowser: A browser instance with Skyvern capabilities.
"""
playwright = await self._get_playwright()
if user_data_dir:
user_data_path = pathlib.Path(user_data_dir)
else:
user_data_path = pathlib.Path(tempfile.gettempdir()) / "skyvern-browser"
launch_args = [
f"--remote-debugging-port={port}",
]
if args:
launch_args.extend(args)
browser_context = await playwright.chromium.launch_persistent_context(
user_data_dir=str(user_data_path),
headless=headless,
args=launch_args,
)
browser_address = f"http://localhost:{port}"
return SkyvernBrowser(self, browser_context, browser_address=browser_address)
async def connect_to_browser_over_cdp(self, cdp_url: str) -> SkyvernBrowser:
"""Connect to an existing browser instance via Chrome DevTools Protocol (CDP).
Use this to connect to a browser that's already running with CDP enabled,
whether local or remote.
Args:
cdp_url: The CDP WebSocket URL (e.g., "http://localhost:9222").
Returns:
SkyvernBrowser: A browser instance connected to the existing browser.
"""
playwright = await self._get_playwright()
browser = await playwright.chromium.connect_over_cdp(cdp_url)
browser_context = browser.contexts[0] if browser.contexts else await browser.new_context()
return SkyvernBrowser(self, browser_context, browser_address=cdp_url)
async def connect_to_cloud_browser_session(self, browser_session_id: str) -> SkyvernBrowser:
"""Connect to an existing cloud-hosted browser session by ID.
Args:
browser_session_id: The ID of the cloud browser session to connect to.
Returns:
SkyvernBrowser: A browser instance connected to the cloud session.
"""
self._ensure_cloud_environment()
browser_session = await self.get_browser_session(browser_session_id)
LOG.info("Connecting to existing cloud browser session", browser_session_id=browser_session.browser_session_id)
return await self._connect_to_cloud_browser_session(browser_session)
async def launch_cloud_browser(
self,
*,
timeout: int | None = None,
proxy_location: ProxyLocation | None = None,
) -> SkyvernBrowser:
"""Launch a new cloud-hosted browser session.
This creates a new browser session in Skyvern's cloud infrastructure and connects to it.
Args:
timeout: Timeout in minutes for the session. Timeout is applied after the session is started.
Must be between 5 and 1440. Defaults to 60.
proxy_location: Geographic proxy location to route the browser traffic through.
This is only available in Skyvern Cloud.
Returns:
SkyvernBrowser: A browser instance connected to the new cloud session.
"""
self._ensure_cloud_environment()
browser_session = await self.create_browser_session(
timeout=timeout,
proxy_location=proxy_location,
)
LOG.info("Launched new cloud browser session", browser_session_id=browser_session.browser_session_id)
return await self._connect_to_cloud_browser_session(browser_session)
async def use_cloud_browser(
self,
*,
timeout: int | None = None,
proxy_location: ProxyLocation | None = None,
) -> SkyvernBrowser:
"""Get or create a cloud browser session.
This method attempts to reuse the most recent available cloud browser session.
If no session exists, it creates a new one. This is useful for cost efficiency
and session persistence.
Args:
timeout: Timeout in minutes for the session. Timeout is applied after the session is started.
Must be between 5 and 1440. Defaults to 60. Only used when creating a new session.
proxy_location: Geographic proxy location to route the browser traffic through.
This is only available in Skyvern Cloud. Only used when creating a new session.
Returns:
SkyvernBrowser: A browser instance connected to an existing or new cloud session.
"""
self._ensure_cloud_environment()
browser_sessions = await self.get_browser_sessions()
browser_session = max(
(s for s in browser_sessions if s.runnable_id is None), key=lambda s: s.started_at, default=None
)
if browser_session is None:
LOG.info("No existing cloud browser session found, launching a new session")
browser_session = await self.create_browser_session(
timeout=timeout,
proxy_location=proxy_location,
)
LOG.info("Launched new cloud browser session", browser_session_id=browser_session.browser_session_id)
else:
LOG.info("Reusing existing cloud browser session", browser_session_id=browser_session.browser_session_id)
return await self._connect_to_cloud_browser_session(browser_session)
def _ensure_cloud_environment(self) -> None:
if self._environment not in (SkyvernEnvironment.CLOUD, SkyvernEnvironment.STAGING):
raise ValueError("Cloud browser sessions are supported only in the cloud environment")
async def _connect_to_cloud_browser_session(self, browser_session: BrowserSessionResponse) -> SkyvernBrowser:
if browser_session.browser_address is None:
raise ValueError(f"Browser address is missing for session {browser_session.browser_session_id}")
playwright = await self._get_playwright()
browser = await playwright.chromium.connect_over_cdp(
browser_session.browser_address, headers={"x-api-key": self._api_key}
)
browser_context = browser.contexts[0] if browser.contexts else await browser.new_context()
return SkyvernBrowser(self, browser_context, browser_session_id=browser_session.browser_session_id)
async def _get_playwright(self) -> Playwright:
if self._playwright is None:
self._playwright = await async_playwright().start()
return self._playwright
async def aclose(self) -> None:
"""Close Playwright and release resources."""
if self._playwright is not None:
try:
await self._playwright.stop()
finally:
self._playwright = None