SKY-7801/7802: MCP Foundation + Hybrid Browser Tools (selector + AI intent) (#4660)
This commit is contained in:
43
skyvern/cli/core/__init__.py
Normal file
43
skyvern/cli/core/__init__.py
Normal file
@@ -0,0 +1,43 @@
|
||||
"""Shared core layer for Skyvern CLI and MCP tools.
|
||||
|
||||
This package provides reusable primitives that both MCP tools and CLI commands
|
||||
import from, preventing logic duplication across interfaces.
|
||||
"""
|
||||
|
||||
from .artifacts import get_artifact_dir, save_artifact
|
||||
from .client import get_skyvern
|
||||
from .result import Artifact, BrowserContext, ErrorCode, Timer, make_error, make_result
|
||||
from .session_manager import (
|
||||
BrowserNotAvailableError,
|
||||
SessionState,
|
||||
browser_session,
|
||||
get_current_session,
|
||||
get_page,
|
||||
no_browser_error,
|
||||
resolve_browser,
|
||||
set_current_session,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
# client.py
|
||||
"get_skyvern",
|
||||
# result.py
|
||||
"Artifact",
|
||||
"BrowserContext",
|
||||
"ErrorCode",
|
||||
"Timer",
|
||||
"make_error",
|
||||
"make_result",
|
||||
# artifacts.py
|
||||
"get_artifact_dir",
|
||||
"save_artifact",
|
||||
# session_manager.py
|
||||
"BrowserNotAvailableError",
|
||||
"SessionState",
|
||||
"browser_session",
|
||||
"get_current_session",
|
||||
"get_page",
|
||||
"no_browser_error",
|
||||
"resolve_browser",
|
||||
"set_current_session",
|
||||
]
|
||||
29
skyvern/cli/core/artifacts.py
Normal file
29
skyvern/cli/core/artifacts.py
Normal file
@@ -0,0 +1,29 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from datetime import datetime, timezone
|
||||
from pathlib import Path
|
||||
|
||||
from .result import Artifact
|
||||
|
||||
|
||||
def get_artifact_dir(session_id: str | None = None, run_id: str | None = None) -> Path:
|
||||
base = Path.home() / ".skyvern" / "artifacts" / datetime.now(timezone.utc).strftime("%Y-%m-%d")
|
||||
if session_id:
|
||||
return base / session_id
|
||||
if run_id:
|
||||
return base / run_id
|
||||
return base / "anonymous"
|
||||
|
||||
|
||||
def save_artifact(
|
||||
content: bytes,
|
||||
kind: str,
|
||||
filename: str,
|
||||
mime: str,
|
||||
session_id: str | None = None,
|
||||
) -> Artifact:
|
||||
dir_path = get_artifact_dir(session_id)
|
||||
dir_path.mkdir(parents=True, exist_ok=True)
|
||||
file_path = dir_path / filename
|
||||
file_path.write_bytes(content)
|
||||
return Artifact(kind=kind, path=str(file_path), mime=mime, bytes=len(content))
|
||||
32
skyvern/cli/core/client.py
Normal file
32
skyvern/cli/core/client.py
Normal file
@@ -0,0 +1,32 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
from contextvars import ContextVar
|
||||
|
||||
from skyvern.client import SkyvernEnvironment
|
||||
from skyvern.config import settings
|
||||
from skyvern.library.skyvern import Skyvern
|
||||
|
||||
_skyvern_instance: ContextVar[Skyvern | None] = ContextVar("skyvern_instance", default=None)
|
||||
|
||||
|
||||
def get_skyvern() -> Skyvern:
|
||||
"""Get or create a Skyvern client instance."""
|
||||
instance = _skyvern_instance.get()
|
||||
if instance is not None:
|
||||
return instance
|
||||
|
||||
api_key = settings.SKYVERN_API_KEY or os.environ.get("SKYVERN_API_KEY")
|
||||
base_url = settings.SKYVERN_BASE_URL or os.environ.get("SKYVERN_BASE_URL")
|
||||
|
||||
if api_key:
|
||||
instance = Skyvern(
|
||||
api_key=api_key,
|
||||
environment=SkyvernEnvironment.CLOUD,
|
||||
base_url=base_url,
|
||||
)
|
||||
else:
|
||||
instance = Skyvern.local()
|
||||
|
||||
_skyvern_instance.set(instance)
|
||||
return instance
|
||||
106
skyvern/cli/core/result.py
Normal file
106
skyvern/cli/core/result.py
Normal file
@@ -0,0 +1,106 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import time
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timezone
|
||||
from typing import Any
|
||||
|
||||
|
||||
class ErrorCode:
|
||||
NO_ACTIVE_BROWSER = "NO_ACTIVE_BROWSER"
|
||||
BROWSER_NOT_FOUND = "BROWSER_NOT_FOUND"
|
||||
SELECTOR_NOT_FOUND = "SELECTOR_NOT_FOUND"
|
||||
ACTION_FAILED = "ACTION_FAILED"
|
||||
AI_FALLBACK_FAILED = "AI_FALLBACK_FAILED"
|
||||
SDK_ERROR = "SDK_ERROR"
|
||||
TIMEOUT = "TIMEOUT"
|
||||
INVALID_INPUT = "INVALID_INPUT"
|
||||
|
||||
|
||||
@dataclass
|
||||
class Artifact:
|
||||
kind: str
|
||||
path: str
|
||||
mime: str
|
||||
bytes: int
|
||||
created_at: str = field(default_factory=lambda: datetime.now(timezone.utc).isoformat())
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"kind": self.kind,
|
||||
"path": self.path,
|
||||
"mime": self.mime,
|
||||
"bytes": self.bytes,
|
||||
"created_at": self.created_at,
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class BrowserContext:
|
||||
mode: str
|
||||
session_id: str | None = None
|
||||
cdp_url: str | None = None
|
||||
|
||||
def to_dict(self) -> dict[str, Any]:
|
||||
return {
|
||||
"mode": self.mode,
|
||||
"session_id": self.session_id,
|
||||
"cdp_url": self.cdp_url,
|
||||
}
|
||||
|
||||
|
||||
def make_result(
|
||||
action: str,
|
||||
*,
|
||||
ok: bool = True,
|
||||
browser_context: BrowserContext | None = None,
|
||||
data: dict[str, Any] | None = None,
|
||||
artifacts: list[Artifact] | None = None,
|
||||
timing_ms: dict[str, int] | None = None,
|
||||
warnings: list[str] | None = None,
|
||||
error: dict[str, Any] | None = None,
|
||||
) -> dict[str, Any]:
|
||||
return {
|
||||
"ok": ok,
|
||||
"action": action,
|
||||
"browser_context": (browser_context or BrowserContext(mode="none")).to_dict(),
|
||||
"data": data,
|
||||
"artifacts": [a.to_dict() for a in (artifacts or [])],
|
||||
"timing_ms": timing_ms or {},
|
||||
"warnings": warnings or [],
|
||||
"error": error,
|
||||
}
|
||||
|
||||
|
||||
def make_error(
|
||||
code: str,
|
||||
message: str,
|
||||
hint: str,
|
||||
details: dict[str, Any] | None = None,
|
||||
) -> dict[str, Any]:
|
||||
return {
|
||||
"code": code,
|
||||
"message": message,
|
||||
"hint": hint,
|
||||
"details": details or {},
|
||||
}
|
||||
|
||||
|
||||
class Timer:
|
||||
def __init__(self) -> None:
|
||||
self._start: float = 0
|
||||
self._marks: dict[str, int] = {}
|
||||
|
||||
def __enter__(self) -> Timer:
|
||||
self._start = time.perf_counter()
|
||||
return self
|
||||
|
||||
def __exit__(self, *args: Any) -> None:
|
||||
self._marks["total"] = int((time.perf_counter() - self._start) * 1000)
|
||||
|
||||
def mark(self, name: str) -> None:
|
||||
self._marks[name] = int((time.perf_counter() - self._start) * 1000)
|
||||
|
||||
@property
|
||||
def timing_ms(self) -> dict[str, int]:
|
||||
return self._marks.copy()
|
||||
153
skyvern/cli/core/session_manager.py
Normal file
153
skyvern/cli/core/session_manager.py
Normal file
@@ -0,0 +1,153 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from contextlib import asynccontextmanager
|
||||
from contextvars import ContextVar
|
||||
from dataclasses import dataclass, field
|
||||
from typing import TYPE_CHECKING, Any, AsyncIterator
|
||||
|
||||
from .client import get_skyvern
|
||||
from .result import BrowserContext, ErrorCode, make_error
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from skyvern.library.skyvern_browser import SkyvernBrowser
|
||||
from skyvern.library.skyvern_browser_page import SkyvernBrowserPage
|
||||
|
||||
|
||||
@dataclass
|
||||
class SessionState:
|
||||
browser: SkyvernBrowser | None = None
|
||||
context: BrowserContext | None = None
|
||||
console_messages: list[dict[str, Any]] = field(default_factory=list)
|
||||
tracing_active: bool = False
|
||||
har_enabled: bool = False
|
||||
|
||||
|
||||
_current_session: ContextVar[SessionState | None] = ContextVar("mcp_session", default=None)
|
||||
|
||||
|
||||
def get_current_session() -> SessionState:
|
||||
state = _current_session.get()
|
||||
if state is None:
|
||||
state = SessionState()
|
||||
_current_session.set(state)
|
||||
return state
|
||||
|
||||
|
||||
def set_current_session(state: SessionState) -> None:
|
||||
_current_session.set(state)
|
||||
|
||||
|
||||
async def resolve_browser(
|
||||
session_id: str | None = None,
|
||||
cdp_url: str | None = None,
|
||||
local: bool = False,
|
||||
create_session: bool = False,
|
||||
timeout: int | None = None,
|
||||
headless: bool = False,
|
||||
) -> tuple[SkyvernBrowser, BrowserContext]:
|
||||
"""Resolve browser from parameters or current session.
|
||||
|
||||
Note: For MCP tools, sessions are stored in ContextVar and persist across tool calls.
|
||||
Cleanup is done via explicit skyvern_session_close() call. For scripts that need
|
||||
guaranteed cleanup, use the browser_session() context manager instead.
|
||||
"""
|
||||
skyvern = get_skyvern()
|
||||
current = get_current_session()
|
||||
|
||||
browser: SkyvernBrowser | None = None
|
||||
try:
|
||||
if session_id:
|
||||
browser = await skyvern.connect_to_cloud_browser_session(session_id)
|
||||
ctx = BrowserContext(mode="cloud_session", session_id=session_id)
|
||||
set_current_session(SessionState(browser=browser, context=ctx))
|
||||
return browser, ctx
|
||||
|
||||
if cdp_url:
|
||||
browser = await skyvern.connect_to_browser_over_cdp(cdp_url)
|
||||
ctx = BrowserContext(mode="cdp", cdp_url=cdp_url)
|
||||
set_current_session(SessionState(browser=browser, context=ctx))
|
||||
return browser, ctx
|
||||
|
||||
if local:
|
||||
browser = await skyvern.launch_local_browser(headless=headless)
|
||||
ctx = BrowserContext(mode="local")
|
||||
set_current_session(SessionState(browser=browser, context=ctx))
|
||||
return browser, ctx
|
||||
|
||||
if create_session:
|
||||
browser = await skyvern.launch_cloud_browser(timeout=timeout)
|
||||
ctx = BrowserContext(mode="cloud_session", session_id=browser.browser_session_id)
|
||||
set_current_session(SessionState(browser=browser, context=ctx))
|
||||
return browser, ctx
|
||||
except Exception:
|
||||
if browser is not None:
|
||||
try:
|
||||
await browser.close()
|
||||
except Exception:
|
||||
pass
|
||||
set_current_session(SessionState())
|
||||
raise
|
||||
|
||||
if current.browser is not None and current.context is not None:
|
||||
return current.browser, current.context
|
||||
|
||||
raise BrowserNotAvailableError()
|
||||
|
||||
|
||||
async def get_page(
|
||||
session_id: str | None = None,
|
||||
cdp_url: str | None = None,
|
||||
) -> tuple[SkyvernBrowserPage, BrowserContext]:
|
||||
"""Get the working page from the current or specified browser session."""
|
||||
browser, ctx = await resolve_browser(session_id=session_id, cdp_url=cdp_url)
|
||||
page = await browser.get_working_page()
|
||||
return page, ctx
|
||||
|
||||
|
||||
@asynccontextmanager
|
||||
async def browser_session(
|
||||
session_id: str | None = None,
|
||||
cdp_url: str | None = None,
|
||||
local: bool = False,
|
||||
timeout: int | None = None,
|
||||
headless: bool = False,
|
||||
) -> AsyncIterator[tuple[SkyvernBrowser, BrowserContext]]:
|
||||
"""Context manager for browser sessions with guaranteed cleanup.
|
||||
|
||||
Use this in scripts that need guaranteed resource cleanup on error.
|
||||
MCP tools use resolve_browser() directly since sessions persist across calls.
|
||||
|
||||
Example:
|
||||
async with browser_session(local=True) as (browser, ctx):
|
||||
page = await browser.get_working_page()
|
||||
await page.goto("https://example.com")
|
||||
# Browser is automatically closed on exit or exception
|
||||
"""
|
||||
browser, ctx = await resolve_browser(
|
||||
session_id=session_id,
|
||||
cdp_url=cdp_url,
|
||||
local=local,
|
||||
create_session=not (session_id or cdp_url or local),
|
||||
timeout=timeout,
|
||||
headless=headless,
|
||||
)
|
||||
try:
|
||||
yield browser, ctx
|
||||
finally:
|
||||
try:
|
||||
await browser.close()
|
||||
except Exception:
|
||||
pass # Best effort cleanup
|
||||
set_current_session(SessionState())
|
||||
|
||||
|
||||
class BrowserNotAvailableError(Exception):
|
||||
"""Raised when no browser session is available."""
|
||||
|
||||
|
||||
def no_browser_error() -> dict[str, Any]:
|
||||
return make_error(
|
||||
ErrorCode.NO_ACTIVE_BROWSER,
|
||||
"No browser session available",
|
||||
"Create a session with skyvern_session_create, provide session_id, or cdp_url",
|
||||
)
|
||||
201
skyvern/cli/mcp_tools/__init__.py
Normal file
201
skyvern/cli/mcp_tools/__init__.py
Normal file
@@ -0,0 +1,201 @@
|
||||
"""Skyvern MCP Tools.
|
||||
|
||||
This module provides MCP (Model Context Protocol) tools for browser automation.
|
||||
Tools are registered with FastMCP and can be used by AI assistants like Claude.
|
||||
"""
|
||||
|
||||
from fastmcp import FastMCP
|
||||
|
||||
from .browser import (
|
||||
skyvern_act,
|
||||
skyvern_click,
|
||||
skyvern_evaluate,
|
||||
skyvern_extract,
|
||||
skyvern_navigate,
|
||||
skyvern_press_key,
|
||||
skyvern_run_task,
|
||||
skyvern_screenshot,
|
||||
skyvern_scroll,
|
||||
skyvern_select_option,
|
||||
skyvern_type,
|
||||
skyvern_validate,
|
||||
skyvern_wait,
|
||||
)
|
||||
from .session import (
|
||||
skyvern_session_close,
|
||||
skyvern_session_connect,
|
||||
skyvern_session_create,
|
||||
skyvern_session_get,
|
||||
skyvern_session_list,
|
||||
)
|
||||
|
||||
mcp = FastMCP(
|
||||
"Skyvern",
|
||||
instructions="""Use Skyvern tools whenever the task involves visiting, browsing, or interacting with ANY website or web application.
|
||||
|
||||
## When to Use These Tools
|
||||
Reach for Skyvern tools when the user asks you to:
|
||||
- Visit, browse, or interact with ANY website or web application
|
||||
- Extract data from web pages (prices, listings, articles, tables, search results, etc.)
|
||||
- Fill out forms, log in, sign up, or complete web-based workflows
|
||||
- Check the current state of a web page or verify something on a site
|
||||
- Do anything you would otherwise attempt with requests, beautifulsoup, selenium, or playwright
|
||||
- Access website data where you are unsure whether an API endpoint exists
|
||||
|
||||
DO NOT try to scrape websites by guessing API endpoints or writing HTTP requests.
|
||||
Instead, use skyvern_navigate + skyvern_extract to get real data from actual pages.
|
||||
These tools give you a real browser — use them instead of writing scraping code.
|
||||
|
||||
## Examples
|
||||
| User says | Use |
|
||||
|-----------|-----|
|
||||
| "Go to amazon.com" | skyvern_navigate |
|
||||
| "What's on this page?" | skyvern_screenshot |
|
||||
| "Get all product prices" | skyvern_extract |
|
||||
| "Click the login button" | skyvern_act or skyvern_click |
|
||||
| "Fill out this form" | skyvern_act |
|
||||
| "Log in and buy the first item" | skyvern_run_task |
|
||||
| "Is checkout complete?" | skyvern_validate |
|
||||
| "Write a script to do this" | Skyvern SDK (see below) |
|
||||
|
||||
## Writing Scripts and Code
|
||||
When asked to write an automation script, use the Skyvern Python SDK with the **hybrid xpath+prompt
|
||||
pattern** for production-quality scripts. The hybrid form tries the xpath/selector first (fast,
|
||||
deterministic) and falls back to AI if the selector breaks — this is the recommended pattern.
|
||||
|
||||
from skyvern import Skyvern
|
||||
skyvern = Skyvern(api_key="YOUR_API_KEY")
|
||||
browser = await skyvern.launch_cloud_browser()
|
||||
page = await browser.get_working_page()
|
||||
await page.goto("https://example.com")
|
||||
|
||||
# BEST: hybrid selector+prompt — fast deterministic selector with AI fallback
|
||||
await page.click("xpath=//button[@id='submit']", prompt="the Submit button")
|
||||
await page.fill("xpath=//input[@name='email']", "user@example.com", prompt="email input field")
|
||||
|
||||
# OK for exploration, but prefer hybrid for production scripts:
|
||||
await page.click(prompt="the Submit button")
|
||||
|
||||
data = await page.extract("Get all product names and prices")
|
||||
|
||||
To get xpaths for hybrid calls, use precision tools (skyvern_click, skyvern_type) during exploration.
|
||||
The `resolved_selector` field in responses gives you the xpath the AI resolved to. Use it in scripts:
|
||||
explore: skyvern_click(intent="Submit button") → response includes resolved_selector="xpath=//button[@id='submit']"
|
||||
script: await page.click("xpath=//button[@id='submit']", prompt="Submit button")
|
||||
|
||||
IMPORTANT: NEVER import from skyvern.cli.mcp_tools — those are internal server modules.
|
||||
The public SDK is: from skyvern import Skyvern
|
||||
|
||||
## Recommended Workflow
|
||||
1. **Connect** — Create or connect to a browser session
|
||||
2. **Explore** — Navigate pages, take screenshots, extract data with AI
|
||||
3. **Build** — Capture selectors and data schemas to construct deterministic workflows
|
||||
4. **Test** — Validate workflows via skyvern_run_task
|
||||
|
||||
## Primary Tools (use these first)
|
||||
These are the tools you should reach for by default:
|
||||
|
||||
- **skyvern_act** — Execute actions from natural language: "log in with test@example.com", "add the first item to cart". Best for exploration and testing flows.
|
||||
- **skyvern_extract** — Pull structured data from any page with natural language + optional JSON Schema. THE differentiator over raw Playwright.
|
||||
- **skyvern_validate** — Assert page conditions with AI: "is the user logged in?", "does the cart have 3 items?"
|
||||
- **skyvern_run_task** — Delegate a full multi-step task to an autonomous AI agent with observability. Use for end-to-end task execution.
|
||||
- **skyvern_navigate** — Go to a URL. Always the first step after connecting.
|
||||
- **skyvern_screenshot** — See what's on the page. Essential for understanding page state.
|
||||
- **skyvern_evaluate** — Run JavaScript to read DOM state, get URLs, or check values.
|
||||
|
||||
## Precision Tools (for debugging and exact control)
|
||||
Use these when the primary tools aren't specific enough, or when you need deterministic
|
||||
selector-based actions (e.g., replaying a known flow):
|
||||
|
||||
- **skyvern_click** — Click a specific element by selector or AI intent
|
||||
- **skyvern_type** — Type into a specific input field by selector or AI intent
|
||||
- **skyvern_scroll** — Scroll the page or an element into view
|
||||
- **skyvern_select_option** — Select a dropdown option by selector or AI intent
|
||||
- **skyvern_press_key** — Press a keyboard key (Enter, Tab, Escape, etc.)
|
||||
- **skyvern_wait** — Wait for a condition, element, or time delay
|
||||
|
||||
## Tool Modes (precision tools)
|
||||
Precision tools support three modes. When unsure, use `intent`.
|
||||
|
||||
1. **Intent mode** — AI-powered element finding:
|
||||
`skyvern_click(intent="the blue Submit button")`
|
||||
|
||||
2. **Hybrid mode** — tries selector first, AI fallback:
|
||||
`skyvern_click(selector="#submit-btn", intent="the Submit button")`
|
||||
|
||||
3. **Selector mode** — deterministic CSS/XPath targeting:
|
||||
`skyvern_click(selector="#submit-btn")`
|
||||
|
||||
## Replay Story: From Exploration to Production Scripts
|
||||
When you use precision tools (skyvern_click, skyvern_type, etc.) with intent mode, the response
|
||||
includes `resolved_selector` — the xpath/CSS the AI found. Capture these to build hybrid scripts.
|
||||
|
||||
**The hybrid pattern** is the recommended default for SDK scripts:
|
||||
await page.click("xpath=//button[@id='submit']", prompt="the Submit button")
|
||||
It tries the selector first (fast, no AI cost), then falls back to AI if the selector breaks.
|
||||
|
||||
**Workflow for generating scripts:**
|
||||
1. Explore: Use skyvern_click(intent="Submit button") during interactive exploration
|
||||
2. Capture: Note the `resolved_selector` from the response (e.g., "//button[@id='submit']")
|
||||
3. Script: Write `page.click("xpath=//button[@id='submit']", prompt="Submit button")`
|
||||
|
||||
The `sdk_equivalent` field in each tool response shows the correct hybrid call to use in scripts.
|
||||
Always prefer hybrid xpath+prompt over prompt-only in generated scripts.
|
||||
|
||||
Note: Currently only skyvern_click returns resolved_selector. Support for skyvern_type and
|
||||
skyvern_select_option is planned (SKY-7905). For those tools, use the selector you provided
|
||||
as input, or fall back to prompt-only until SKY-7905 ships.
|
||||
|
||||
## Getting Started
|
||||
Create a session with skyvern_session_create, then use browser tools to interact with pages.
|
||||
""",
|
||||
)
|
||||
|
||||
# -- Session management --
|
||||
mcp.tool()(skyvern_session_create)
|
||||
mcp.tool()(skyvern_session_close)
|
||||
mcp.tool()(skyvern_session_list)
|
||||
mcp.tool()(skyvern_session_get)
|
||||
mcp.tool()(skyvern_session_connect)
|
||||
|
||||
# -- Primary tools (AI-powered exploration + observation) --
|
||||
mcp.tool()(skyvern_act)
|
||||
mcp.tool()(skyvern_extract)
|
||||
mcp.tool()(skyvern_validate)
|
||||
mcp.tool()(skyvern_run_task)
|
||||
mcp.tool()(skyvern_navigate)
|
||||
mcp.tool()(skyvern_screenshot)
|
||||
mcp.tool()(skyvern_evaluate)
|
||||
|
||||
# -- Precision tools (selector/intent-based browser primitives) --
|
||||
mcp.tool()(skyvern_click)
|
||||
mcp.tool()(skyvern_type)
|
||||
mcp.tool()(skyvern_scroll)
|
||||
mcp.tool()(skyvern_select_option)
|
||||
mcp.tool()(skyvern_press_key)
|
||||
mcp.tool()(skyvern_wait)
|
||||
|
||||
__all__ = [
|
||||
"mcp",
|
||||
# Session
|
||||
"skyvern_session_create",
|
||||
"skyvern_session_close",
|
||||
"skyvern_session_list",
|
||||
"skyvern_session_get",
|
||||
"skyvern_session_connect",
|
||||
# Primary (AI-powered)
|
||||
"skyvern_act",
|
||||
"skyvern_extract",
|
||||
"skyvern_validate",
|
||||
"skyvern_run_task",
|
||||
"skyvern_navigate",
|
||||
"skyvern_screenshot",
|
||||
"skyvern_evaluate",
|
||||
# Precision (selector/intent browser primitives)
|
||||
"skyvern_click",
|
||||
"skyvern_type",
|
||||
"skyvern_scroll",
|
||||
"skyvern_select_option",
|
||||
"skyvern_press_key",
|
||||
"skyvern_wait",
|
||||
]
|
||||
11
skyvern/cli/mcp_tools/_client.py
Normal file
11
skyvern/cli/mcp_tools/_client.py
Normal file
@@ -0,0 +1,11 @@
|
||||
"""Skyvern HTTP API client accessor.
|
||||
|
||||
Workflow tools import from here to get the API client without pulling in
|
||||
browser/Playwright dependencies.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from skyvern.cli.core.client import get_skyvern
|
||||
|
||||
__all__ = ["get_skyvern"]
|
||||
20
skyvern/cli/mcp_tools/_common.py
Normal file
20
skyvern/cli/mcp_tools/_common.py
Normal file
@@ -0,0 +1,20 @@
|
||||
"""Backward-compatible re-exports from skyvern.cli.core.
|
||||
|
||||
MCP tools import from here; the canonical implementations live in core/.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from skyvern.cli.core.artifacts import get_artifact_dir, save_artifact
|
||||
from skyvern.cli.core.result import Artifact, BrowserContext, ErrorCode, Timer, make_error, make_result
|
||||
|
||||
__all__ = [
|
||||
"Artifact",
|
||||
"BrowserContext",
|
||||
"ErrorCode",
|
||||
"Timer",
|
||||
"get_artifact_dir",
|
||||
"make_error",
|
||||
"make_result",
|
||||
"save_artifact",
|
||||
]
|
||||
30
skyvern/cli/mcp_tools/_session.py
Normal file
30
skyvern/cli/mcp_tools/_session.py
Normal file
@@ -0,0 +1,30 @@
|
||||
"""Backward-compatible re-exports from skyvern.cli.core.
|
||||
|
||||
MCP tools import from here; the canonical implementations live in core/.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
from skyvern.cli.core.client import get_skyvern
|
||||
from skyvern.cli.core.session_manager import (
|
||||
BrowserNotAvailableError,
|
||||
SessionState,
|
||||
browser_session,
|
||||
get_current_session,
|
||||
get_page,
|
||||
no_browser_error,
|
||||
resolve_browser,
|
||||
set_current_session,
|
||||
)
|
||||
|
||||
__all__ = [
|
||||
"BrowserNotAvailableError",
|
||||
"SessionState",
|
||||
"browser_session",
|
||||
"get_current_session",
|
||||
"get_page",
|
||||
"get_skyvern",
|
||||
"no_browser_error",
|
||||
"resolve_browser",
|
||||
"set_current_session",
|
||||
]
|
||||
939
skyvern/cli/mcp_tools/browser.py
Normal file
939
skyvern/cli/mcp_tools/browser.py
Normal file
@@ -0,0 +1,939 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import base64
|
||||
import json
|
||||
from datetime import datetime, timezone
|
||||
from typing import Annotated, Any
|
||||
|
||||
from playwright.async_api import TimeoutError as PlaywrightTimeoutError
|
||||
from pydantic import Field
|
||||
|
||||
from ._common import (
|
||||
ErrorCode,
|
||||
Timer,
|
||||
make_error,
|
||||
make_result,
|
||||
save_artifact,
|
||||
)
|
||||
from ._session import BrowserNotAvailableError, get_page, no_browser_error
|
||||
|
||||
|
||||
def _resolve_ai_mode(
|
||||
selector: str | None,
|
||||
intent: str | None,
|
||||
) -> tuple[str | None, str | None]:
|
||||
"""Determine AI mode from selector/intent combination.
|
||||
|
||||
Returns (ai_mode, error_code) — if error_code is set, the call should fail.
|
||||
"""
|
||||
if intent and not selector:
|
||||
return "proactive", None
|
||||
if intent and selector:
|
||||
return "fallback", None
|
||||
if selector and not intent:
|
||||
return None, None
|
||||
return None, "INVALID_INPUT"
|
||||
|
||||
|
||||
async def skyvern_navigate(
|
||||
url: Annotated[str, "The URL to navigate to"],
|
||||
session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
|
||||
cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
|
||||
timeout: Annotated[int, Field(description="Timeout in milliseconds", ge=1000, le=120000)] = 30000,
|
||||
wait_until: Annotated[str | None, Field(description="Wait condition: load, domcontentloaded, networkidle")] = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Open a website in the browser. Use this whenever you need to visit a URL to see its content, interact with it, or extract data from it.
|
||||
|
||||
Returns the final URL (after redirects) and page title.
|
||||
"""
|
||||
if wait_until is not None and wait_until not in ("load", "domcontentloaded", "networkidle", "commit"):
|
||||
return make_result(
|
||||
"skyvern_navigate",
|
||||
ok=False,
|
||||
error=make_error(
|
||||
ErrorCode.INVALID_INPUT,
|
||||
f"Invalid wait_until: {wait_until}",
|
||||
"Use load, domcontentloaded, networkidle, or commit",
|
||||
),
|
||||
)
|
||||
|
||||
try:
|
||||
page, ctx = await get_page(session_id=session_id, cdp_url=cdp_url)
|
||||
except BrowserNotAvailableError:
|
||||
return make_result("skyvern_navigate", ok=False, error=no_browser_error())
|
||||
|
||||
with Timer() as timer:
|
||||
try:
|
||||
await page.goto(url, timeout=timeout, wait_until=wait_until)
|
||||
timer.mark("sdk")
|
||||
final_url = page.url
|
||||
title = await page.title()
|
||||
except Exception as e:
|
||||
return make_result(
|
||||
"skyvern_navigate",
|
||||
ok=False,
|
||||
browser_context=ctx,
|
||||
timing_ms=timer.timing_ms,
|
||||
error=make_error(ErrorCode.ACTION_FAILED, str(e), "Check that the URL is valid and accessible"),
|
||||
)
|
||||
|
||||
return make_result(
|
||||
"skyvern_navigate",
|
||||
browser_context=ctx,
|
||||
data={"url": final_url, "title": title, "sdk_equivalent": f'await page.goto("{url}")'},
|
||||
timing_ms=timer.timing_ms,
|
||||
)
|
||||
|
||||
|
||||
async def skyvern_click(
|
||||
session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
|
||||
cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
|
||||
intent: Annotated[
|
||||
str | None, Field(description="Natural language description of the element to click (uses AI)")
|
||||
] = None,
|
||||
selector: Annotated[str | None, Field(description="CSS selector or XPath for the element to click")] = None,
|
||||
timeout: Annotated[int, Field(description="Timeout in milliseconds", ge=1000, le=60000)] = 30000,
|
||||
button: Annotated[str | None, Field(description="Mouse button: left, right, middle")] = None,
|
||||
click_count: Annotated[int | None, Field(description="Number of clicks (2 for double-click)")] = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Click an element on the page. Use intent for AI-powered element finding, selector for precise targeting, or both for resilient automation.
|
||||
|
||||
Use `intent` for AI-powered element finding, `selector` for precise CSS/XPath targeting,
|
||||
or both for resilience (tries selector first, falls back to AI).
|
||||
"""
|
||||
if button is not None and button not in ("left", "right", "middle"):
|
||||
return make_result(
|
||||
"skyvern_click",
|
||||
ok=False,
|
||||
error=make_error(ErrorCode.INVALID_INPUT, f"Invalid button: {button}", "Use left, right, or middle"),
|
||||
)
|
||||
|
||||
ai_mode, err = _resolve_ai_mode(selector, intent)
|
||||
if err:
|
||||
return make_result(
|
||||
"skyvern_click",
|
||||
ok=False,
|
||||
error=make_error(
|
||||
ErrorCode.INVALID_INPUT,
|
||||
"Must provide intent, selector, or both",
|
||||
"Use intent='describe what to click' for AI-powered clicking, or selector='#css-selector' for precise targeting",
|
||||
),
|
||||
)
|
||||
|
||||
try:
|
||||
page, ctx = await get_page(session_id=session_id, cdp_url=cdp_url)
|
||||
except BrowserNotAvailableError:
|
||||
return make_result("skyvern_click", ok=False, error=no_browser_error())
|
||||
|
||||
with Timer() as timer:
|
||||
try:
|
||||
kwargs: dict[str, Any] = {"timeout": timeout}
|
||||
if button:
|
||||
kwargs["button"] = button
|
||||
if click_count is not None:
|
||||
kwargs["click_count"] = click_count
|
||||
|
||||
if ai_mode is not None:
|
||||
resolved = await page.click(selector=selector, prompt=intent, ai=ai_mode, **kwargs) # type: ignore[arg-type]
|
||||
else:
|
||||
assert selector is not None
|
||||
resolved = await page.click(selector=selector, **kwargs)
|
||||
timer.mark("sdk")
|
||||
except PlaywrightTimeoutError as e:
|
||||
return make_result(
|
||||
"skyvern_click",
|
||||
ok=False,
|
||||
browser_context=ctx,
|
||||
timing_ms=timer.timing_ms,
|
||||
error=make_error(
|
||||
ErrorCode.SELECTOR_NOT_FOUND,
|
||||
str(e),
|
||||
"Verify the selector matches an element on the page, or use intent for AI-powered finding",
|
||||
),
|
||||
)
|
||||
except Exception as e:
|
||||
code = ErrorCode.AI_FALLBACK_FAILED if ai_mode else ErrorCode.ACTION_FAILED
|
||||
return make_result(
|
||||
"skyvern_click",
|
||||
ok=False,
|
||||
browser_context=ctx,
|
||||
timing_ms=timer.timing_ms,
|
||||
error=make_error(
|
||||
code,
|
||||
str(e),
|
||||
"The element may be hidden, disabled, or intercepted by another element",
|
||||
),
|
||||
)
|
||||
|
||||
data: dict[str, Any] = {"selector": selector, "intent": intent, "ai_mode": ai_mode}
|
||||
if resolved and resolved != selector:
|
||||
data["resolved_selector"] = resolved
|
||||
# Build sdk_equivalent: prefer hybrid selector+prompt for production scripts.
|
||||
# resolved_selector already contains the "xpath=" prefix (e.g. "xpath=//button[@id='x']"),
|
||||
# so pass it directly as the selector positional arg.
|
||||
resolved_sel = resolved if resolved and resolved != selector else selector
|
||||
if resolved_sel and intent:
|
||||
data["sdk_equivalent"] = f'await page.click("{resolved_sel}", prompt="{intent}")'
|
||||
elif ai_mode:
|
||||
data["sdk_equivalent"] = f'await page.click(prompt="{intent}")'
|
||||
elif selector:
|
||||
data["sdk_equivalent"] = f'await page.click("{selector}")'
|
||||
|
||||
return make_result(
|
||||
"skyvern_click",
|
||||
browser_context=ctx,
|
||||
data=data,
|
||||
timing_ms=timer.timing_ms,
|
||||
)
|
||||
|
||||
|
||||
async def skyvern_type(
|
||||
text: Annotated[str, "Text to type into the element"],
|
||||
session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
|
||||
cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
|
||||
intent: Annotated[
|
||||
str | None, Field(description="Natural language description of the input field (uses AI)")
|
||||
] = None,
|
||||
selector: Annotated[str | None, Field(description="CSS selector or XPath for the input element")] = None,
|
||||
timeout: Annotated[int, Field(description="Timeout in milliseconds", ge=1000, le=60000)] = 30000,
|
||||
clear: Annotated[bool, Field(description="Clear existing content before typing")] = True,
|
||||
delay: Annotated[int | None, Field(description="Delay between keystrokes in ms")] = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Type text into an input field. Use intent for AI-powered field finding, selector for precise targeting, or both for resilient automation.
|
||||
|
||||
Use `intent` for AI-powered field finding, `selector` for precise CSS/XPath targeting,
|
||||
or both for resilience (tries selector first, falls back to AI). Clears existing content by default.
|
||||
"""
|
||||
ai_mode, err = _resolve_ai_mode(selector, intent)
|
||||
if err:
|
||||
return make_result(
|
||||
"skyvern_type",
|
||||
ok=False,
|
||||
error=make_error(
|
||||
ErrorCode.INVALID_INPUT,
|
||||
"Must provide intent, selector, or both",
|
||||
"Use intent='describe the input field' for AI-powered targeting, or selector='#css-selector' for precise targeting",
|
||||
),
|
||||
)
|
||||
|
||||
try:
|
||||
page, ctx = await get_page(session_id=session_id, cdp_url=cdp_url)
|
||||
except BrowserNotAvailableError:
|
||||
return make_result("skyvern_type", ok=False, error=no_browser_error())
|
||||
|
||||
with Timer() as timer:
|
||||
try:
|
||||
if clear:
|
||||
if ai_mode is not None:
|
||||
await page.fill(selector=selector, value=text, prompt=intent, ai=ai_mode, timeout=timeout) # type: ignore[arg-type]
|
||||
else:
|
||||
assert selector is not None
|
||||
await page.fill(selector, text, timeout=timeout)
|
||||
else:
|
||||
kwargs: dict[str, Any] = {"timeout": timeout}
|
||||
if delay is not None:
|
||||
kwargs["delay"] = delay
|
||||
if ai_mode is not None:
|
||||
loc = page.locator(selector=selector, prompt=intent, ai=ai_mode) # type: ignore[arg-type]
|
||||
await loc.type(text, **kwargs)
|
||||
else:
|
||||
assert selector is not None
|
||||
await page.type(selector, text, **kwargs)
|
||||
timer.mark("sdk")
|
||||
except PlaywrightTimeoutError as e:
|
||||
return make_result(
|
||||
"skyvern_type",
|
||||
ok=False,
|
||||
browser_context=ctx,
|
||||
timing_ms=timer.timing_ms,
|
||||
error=make_error(
|
||||
ErrorCode.SELECTOR_NOT_FOUND,
|
||||
str(e),
|
||||
"Verify the selector matches an editable element, or use intent for AI-powered finding",
|
||||
),
|
||||
)
|
||||
except Exception as e:
|
||||
code = ErrorCode.AI_FALLBACK_FAILED if ai_mode else ErrorCode.ACTION_FAILED
|
||||
return make_result(
|
||||
"skyvern_type",
|
||||
ok=False,
|
||||
browser_context=ctx,
|
||||
timing_ms=timer.timing_ms,
|
||||
error=make_error(
|
||||
code,
|
||||
str(e),
|
||||
"The element may not be editable or may be hidden",
|
||||
),
|
||||
)
|
||||
|
||||
# NOTE: The SDK fill() returns the typed value, not a resolved selector.
|
||||
# Unlike click(), we cannot return resolved_selector here. SKY-7905 will
|
||||
# update the SDK to return element metadata from all action methods.
|
||||
data: dict[str, Any] = {"selector": selector, "intent": intent, "ai_mode": ai_mode, "text_length": len(text)}
|
||||
# Build sdk_equivalent: prefer hybrid selector+prompt for production scripts
|
||||
if selector and intent:
|
||||
data["sdk_equivalent"] = f'await page.fill("{selector}", "{text}", prompt="{intent}")'
|
||||
elif ai_mode:
|
||||
data["sdk_equivalent"] = f'await page.fill(prompt="{intent}", value="{text}")'
|
||||
elif selector:
|
||||
data["sdk_equivalent"] = f'await page.fill("{selector}", "{text}")'
|
||||
return make_result(
|
||||
"skyvern_type",
|
||||
browser_context=ctx,
|
||||
data=data,
|
||||
timing_ms=timer.timing_ms,
|
||||
)
|
||||
|
||||
|
||||
async def skyvern_screenshot(
|
||||
session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
|
||||
cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
|
||||
full_page: Annotated[bool, Field(description="Capture full scrollable page")] = False,
|
||||
selector: Annotated[str | None, Field(description="CSS selector to screenshot specific element")] = None,
|
||||
inline: Annotated[bool, Field(description="Return base64 data instead of file path")] = False,
|
||||
) -> dict[str, Any]:
|
||||
"""See what's currently on the page. Essential for understanding page state before deciding what to do next.
|
||||
|
||||
By default saves to ~/.skyvern/artifacts/ and returns the file path.
|
||||
Set inline=true to get base64 data directly (increases token usage).
|
||||
"""
|
||||
try:
|
||||
page, ctx = await get_page(session_id=session_id, cdp_url=cdp_url)
|
||||
except BrowserNotAvailableError:
|
||||
return make_result("skyvern_screenshot", ok=False, error=no_browser_error())
|
||||
|
||||
with Timer() as timer:
|
||||
try:
|
||||
if selector:
|
||||
element = page.locator(selector)
|
||||
screenshot_bytes = await element.screenshot()
|
||||
else:
|
||||
screenshot_bytes = await page.screenshot(full_page=full_page)
|
||||
timer.mark("sdk")
|
||||
except Exception as e:
|
||||
return make_result(
|
||||
"skyvern_screenshot",
|
||||
ok=False,
|
||||
browser_context=ctx,
|
||||
timing_ms=timer.timing_ms,
|
||||
error=make_error(ErrorCode.ACTION_FAILED, str(e), "Check that the page or element is visible"),
|
||||
)
|
||||
|
||||
if inline:
|
||||
data_b64 = base64.b64encode(screenshot_bytes).decode("utf-8")
|
||||
return make_result(
|
||||
"skyvern_screenshot",
|
||||
browser_context=ctx,
|
||||
data={
|
||||
"inline": True,
|
||||
"data": data_b64,
|
||||
"mime": "image/png",
|
||||
"bytes": len(screenshot_bytes),
|
||||
"sdk_equivalent": "await page.screenshot()",
|
||||
},
|
||||
timing_ms=timer.timing_ms,
|
||||
warnings=["Inline mode increases token usage"],
|
||||
)
|
||||
|
||||
ts = datetime.now(timezone.utc).strftime("%H%M%S_%f")
|
||||
filename = f"screenshot_{ts}.png"
|
||||
artifact = save_artifact(
|
||||
screenshot_bytes,
|
||||
kind="screenshot",
|
||||
filename=filename,
|
||||
mime="image/png",
|
||||
session_id=ctx.session_id,
|
||||
)
|
||||
|
||||
return make_result(
|
||||
"skyvern_screenshot",
|
||||
browser_context=ctx,
|
||||
data={"path": artifact.path, "sdk_equivalent": "await page.screenshot(path='screenshot.png')"},
|
||||
artifacts=[artifact],
|
||||
timing_ms=timer.timing_ms,
|
||||
)
|
||||
|
||||
|
||||
async def skyvern_scroll(
|
||||
direction: Annotated[str, Field(description="Direction: up, down, left, right")],
|
||||
session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
|
||||
cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
|
||||
amount: Annotated[int | None, Field(description="Pixels to scroll (default 500)")] = None,
|
||||
intent: Annotated[
|
||||
str | None, Field(description="Natural language description of element to scroll into view (uses AI)")
|
||||
] = None,
|
||||
selector: Annotated[str | None, Field(description="CSS selector of scrollable element")] = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Scroll the page or use AI to scroll a specific element into view.
|
||||
|
||||
Use `intent` to scroll an AI-located element into view (with or without selector for hybrid fallback).
|
||||
Without intent, scrolls the page or a selector-targeted container by pixel amount.
|
||||
"""
|
||||
valid_directions = ("up", "down", "left", "right")
|
||||
if not intent and direction not in valid_directions:
|
||||
return make_result(
|
||||
"skyvern_scroll",
|
||||
ok=False,
|
||||
error=make_error(
|
||||
ErrorCode.INVALID_INPUT, f"Invalid direction: {direction}", "Use up, down, left, or right"
|
||||
),
|
||||
)
|
||||
|
||||
try:
|
||||
page, ctx = await get_page(session_id=session_id, cdp_url=cdp_url)
|
||||
except BrowserNotAvailableError:
|
||||
return make_result("skyvern_scroll", ok=False, error=no_browser_error())
|
||||
|
||||
if intent:
|
||||
ai_mode = "fallback" if selector else "proactive"
|
||||
with Timer() as timer:
|
||||
try:
|
||||
loc = page.locator(selector=selector, prompt=intent, ai=ai_mode)
|
||||
await loc.scroll_into_view_if_needed()
|
||||
timer.mark("sdk")
|
||||
except Exception as e:
|
||||
code = ErrorCode.AI_FALLBACK_FAILED if ai_mode == "fallback" else ErrorCode.ACTION_FAILED
|
||||
return make_result(
|
||||
"skyvern_scroll",
|
||||
ok=False,
|
||||
browser_context=ctx,
|
||||
timing_ms=timer.timing_ms,
|
||||
error=make_error(code, str(e), "Could not find element to scroll into view"),
|
||||
)
|
||||
|
||||
return make_result(
|
||||
"skyvern_scroll",
|
||||
browser_context=ctx,
|
||||
data={
|
||||
"direction": "into_view",
|
||||
"intent": intent,
|
||||
"ai_mode": ai_mode,
|
||||
"sdk_equivalent": (
|
||||
f'await page.locator("{selector}", prompt="{intent}").scroll_into_view_if_needed()'
|
||||
if selector
|
||||
else f'await page.locator(prompt="{intent}").scroll_into_view_if_needed()'
|
||||
),
|
||||
},
|
||||
timing_ms=timer.timing_ms,
|
||||
)
|
||||
|
||||
pixels = amount or 500
|
||||
direction_map = {
|
||||
"up": (0, -pixels),
|
||||
"down": (0, pixels),
|
||||
"left": (-pixels, 0),
|
||||
"right": (pixels, 0),
|
||||
}
|
||||
dx, dy = direction_map[direction]
|
||||
|
||||
with Timer() as timer:
|
||||
try:
|
||||
if selector:
|
||||
await page.locator(selector).evaluate(f"el => el.scrollBy({dx}, {dy})")
|
||||
else:
|
||||
await page.evaluate(f"window.scrollBy({dx}, {dy})")
|
||||
timer.mark("sdk")
|
||||
except Exception as e:
|
||||
return make_result(
|
||||
"skyvern_scroll",
|
||||
ok=False,
|
||||
browser_context=ctx,
|
||||
timing_ms=timer.timing_ms,
|
||||
error=make_error(ErrorCode.ACTION_FAILED, str(e), "Scroll action failed"),
|
||||
)
|
||||
|
||||
return make_result(
|
||||
"skyvern_scroll",
|
||||
browser_context=ctx,
|
||||
data={
|
||||
"direction": direction,
|
||||
"pixels": pixels,
|
||||
"sdk_equivalent": f'await page.evaluate("window.scrollBy({dx}, {dy})")',
|
||||
},
|
||||
timing_ms=timer.timing_ms,
|
||||
)
|
||||
|
||||
|
||||
async def skyvern_select_option(
|
||||
value: Annotated[str, "Value to select"],
|
||||
session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
|
||||
cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
|
||||
intent: Annotated[str | None, Field(description="Natural language description of the dropdown (uses AI)")] = None,
|
||||
selector: Annotated[str | None, Field(description="CSS selector for the select element")] = None,
|
||||
timeout: Annotated[int, Field(description="Timeout in milliseconds", ge=1000, le=60000)] = 30000,
|
||||
by_label: Annotated[bool, Field(description="Select by visible label instead of value")] = False,
|
||||
) -> dict[str, Any]:
|
||||
"""Select an option from a dropdown menu. Use intent for AI-powered finding, selector for precision.
|
||||
|
||||
Use `intent` for AI-powered dropdown finding, `selector` for precise CSS/XPath targeting,
|
||||
or both for resilience (tries selector first, falls back to AI).
|
||||
"""
|
||||
ai_mode, err = _resolve_ai_mode(selector, intent)
|
||||
if err:
|
||||
return make_result(
|
||||
"skyvern_select_option",
|
||||
ok=False,
|
||||
error=make_error(
|
||||
ErrorCode.INVALID_INPUT,
|
||||
"Must provide intent, selector, or both",
|
||||
"Use intent='describe the dropdown' for AI-powered selection, or selector='#css-selector' for precise targeting",
|
||||
),
|
||||
)
|
||||
|
||||
try:
|
||||
page, ctx = await get_page(session_id=session_id, cdp_url=cdp_url)
|
||||
except BrowserNotAvailableError:
|
||||
return make_result("skyvern_select_option", ok=False, error=no_browser_error())
|
||||
|
||||
with Timer() as timer:
|
||||
try:
|
||||
if ai_mode is not None:
|
||||
# AI paths: pass value= directly -- the AI interprets the text
|
||||
# regardless of whether it represents a value or label.
|
||||
await page.select_option(selector=selector, value=value, prompt=intent, ai=ai_mode, timeout=timeout) # type: ignore[arg-type]
|
||||
else:
|
||||
assert selector is not None
|
||||
if by_label:
|
||||
# Bypass SkyvernPage to avoid value="" coercion conflicting with label kwarg.
|
||||
await page.page.locator(selector).select_option(label=value, timeout=timeout)
|
||||
else:
|
||||
await page.select_option(selector, value=value, timeout=timeout)
|
||||
timer.mark("sdk")
|
||||
except Exception as e:
|
||||
code = ErrorCode.AI_FALLBACK_FAILED if ai_mode else ErrorCode.ACTION_FAILED
|
||||
return make_result(
|
||||
"skyvern_select_option",
|
||||
ok=False,
|
||||
browser_context=ctx,
|
||||
timing_ms=timer.timing_ms,
|
||||
error=make_error(code, str(e), "Check selector and available options"),
|
||||
)
|
||||
|
||||
# NOTE: The SDK select_option() returns the selected value, not a resolved
|
||||
# selector. Unlike click(), we cannot return resolved_selector here.
|
||||
# SKY-7905 will update the SDK to return element metadata from all action methods.
|
||||
data: dict[str, Any] = {"selector": selector, "intent": intent, "ai_mode": ai_mode, "value": value}
|
||||
# Build sdk_equivalent: prefer hybrid selector+prompt for production scripts
|
||||
if selector and intent:
|
||||
data["sdk_equivalent"] = f'await page.select_option("{selector}", value="{value}", prompt="{intent}")'
|
||||
elif ai_mode:
|
||||
data["sdk_equivalent"] = f'await page.select_option(prompt="{intent}", value="{value}")'
|
||||
elif selector:
|
||||
data["sdk_equivalent"] = f'await page.select_option("{selector}", value="{value}")'
|
||||
return make_result(
|
||||
"skyvern_select_option",
|
||||
browser_context=ctx,
|
||||
data=data,
|
||||
timing_ms=timer.timing_ms,
|
||||
)
|
||||
|
||||
|
||||
async def skyvern_press_key(
|
||||
key: Annotated[str, "Key to press (e.g., Enter, Tab, Escape, ArrowDown)"],
|
||||
session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
|
||||
cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
|
||||
intent: Annotated[
|
||||
str | None, Field(description="Natural language description of element to focus first (uses AI)")
|
||||
] = None,
|
||||
selector: Annotated[str | None, Field(description="CSS selector to focus first")] = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Press a keyboard key -- Enter, Tab, Escape, arrow keys, shortcuts, etc.
|
||||
|
||||
Use `intent` or `selector` to focus a specific element before pressing.
|
||||
Without either, presses the key on the currently focused element.
|
||||
"""
|
||||
try:
|
||||
page, ctx = await get_page(session_id=session_id, cdp_url=cdp_url)
|
||||
except BrowserNotAvailableError:
|
||||
return make_result("skyvern_press_key", ok=False, error=no_browser_error())
|
||||
|
||||
with Timer() as timer:
|
||||
try:
|
||||
if intent or selector:
|
||||
ai_mode, _ = _resolve_ai_mode(selector, intent)
|
||||
if ai_mode is not None:
|
||||
loc = page.locator(selector=selector, prompt=intent, ai=ai_mode) # type: ignore[arg-type]
|
||||
else:
|
||||
assert selector is not None
|
||||
loc = page.locator(selector)
|
||||
await loc.press(key)
|
||||
else:
|
||||
await page.keyboard.press(key)
|
||||
timer.mark("sdk")
|
||||
except Exception as e:
|
||||
return make_result(
|
||||
"skyvern_press_key",
|
||||
ok=False,
|
||||
browser_context=ctx,
|
||||
timing_ms=timer.timing_ms,
|
||||
error=make_error(ErrorCode.ACTION_FAILED, str(e), "Check key name is valid"),
|
||||
)
|
||||
|
||||
if selector and intent:
|
||||
sdk_eq = f'await page.locator("{selector}", prompt="{intent}").press("{key}")'
|
||||
elif intent:
|
||||
sdk_eq = f'await page.locator(prompt="{intent}").press("{key}")'
|
||||
elif selector:
|
||||
sdk_eq = f'await page.locator("{selector}").press("{key}")'
|
||||
else:
|
||||
sdk_eq = f'await page.keyboard.press("{key}")'
|
||||
|
||||
return make_result(
|
||||
"skyvern_press_key",
|
||||
browser_context=ctx,
|
||||
data={
|
||||
"key": key,
|
||||
"selector": selector,
|
||||
"intent": intent,
|
||||
"sdk_equivalent": sdk_eq,
|
||||
},
|
||||
timing_ms=timer.timing_ms,
|
||||
)
|
||||
|
||||
|
||||
async def skyvern_wait(
|
||||
session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
|
||||
cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
|
||||
time_ms: Annotated[int | None, Field(description="Time to wait in milliseconds")] = None,
|
||||
intent: Annotated[str | None, Field(description="Natural language condition to wait for (uses AI polling)")] = None,
|
||||
selector: Annotated[str | None, Field(description="CSS selector to wait for")] = None,
|
||||
state: Annotated[str | None, Field(description="Element state: visible, hidden, attached, detached")] = "visible",
|
||||
timeout: Annotated[int, Field(description="Max wait time in milliseconds", ge=1000, le=120000)] = 30000,
|
||||
poll_interval_ms: Annotated[
|
||||
int, Field(description="Polling interval for intent-based waits in ms", ge=500, le=10000)
|
||||
] = 5000,
|
||||
) -> dict[str, Any]:
|
||||
"""Wait for a condition, element, or time delay before proceeding. Use intent for AI-powered condition checking.
|
||||
|
||||
Use `intent` to poll with AI validation (e.g., "wait until the loading spinner disappears").
|
||||
Use `selector` to wait for an element state. Use `time_ms` for a simple delay.
|
||||
"""
|
||||
valid_states = ("visible", "hidden", "attached", "detached")
|
||||
if state is not None and state not in valid_states:
|
||||
return make_result(
|
||||
"skyvern_wait",
|
||||
ok=False,
|
||||
error=make_error(
|
||||
ErrorCode.INVALID_INPUT,
|
||||
f"Invalid state: {state}",
|
||||
"Use visible, hidden, attached, or detached",
|
||||
),
|
||||
)
|
||||
|
||||
if time_ms is None and not selector and not intent:
|
||||
return make_result(
|
||||
"skyvern_wait",
|
||||
ok=False,
|
||||
error=make_error(
|
||||
ErrorCode.INVALID_INPUT,
|
||||
"Must provide intent, selector, or time_ms",
|
||||
"Use intent='condition to wait for' for AI-powered waiting, selector='#element' for element visibility, or time_ms=5000 for a delay",
|
||||
),
|
||||
)
|
||||
|
||||
try:
|
||||
page, ctx = await get_page(session_id=session_id, cdp_url=cdp_url)
|
||||
except BrowserNotAvailableError:
|
||||
return make_result("skyvern_wait", ok=False, error=no_browser_error())
|
||||
|
||||
with Timer() as timer:
|
||||
try:
|
||||
if time_ms is not None:
|
||||
await page.wait_for_timeout(time_ms)
|
||||
waited_for = "time"
|
||||
elif intent:
|
||||
loop = asyncio.get_running_loop()
|
||||
deadline = loop.time() + timeout / 1000
|
||||
last_error: Exception | None = None
|
||||
while True:
|
||||
try:
|
||||
result = await page.validate(intent)
|
||||
last_error = None
|
||||
except Exception as poll_err:
|
||||
result = False
|
||||
last_error = poll_err
|
||||
if result:
|
||||
break
|
||||
if loop.time() >= deadline:
|
||||
code = ErrorCode.SDK_ERROR if last_error else ErrorCode.TIMEOUT
|
||||
msg = str(last_error) if last_error else f"Condition not met within {timeout}ms: {intent}"
|
||||
return make_result(
|
||||
"skyvern_wait",
|
||||
ok=False,
|
||||
browser_context=ctx,
|
||||
timing_ms=timer.timing_ms,
|
||||
error=make_error(
|
||||
code,
|
||||
msg,
|
||||
"Increase timeout or check that the condition can be satisfied",
|
||||
),
|
||||
)
|
||||
await page.wait_for_timeout(poll_interval_ms)
|
||||
waited_for = "intent"
|
||||
elif selector:
|
||||
await page.wait_for_selector(selector, state=state, timeout=timeout)
|
||||
waited_for = "selector"
|
||||
timer.mark("sdk")
|
||||
except Exception as e:
|
||||
return make_result(
|
||||
"skyvern_wait",
|
||||
ok=False,
|
||||
browser_context=ctx,
|
||||
timing_ms=timer.timing_ms,
|
||||
error=make_error(ErrorCode.TIMEOUT, str(e), "Condition was not met within timeout"),
|
||||
)
|
||||
|
||||
sdk_eq = ""
|
||||
if waited_for == "time":
|
||||
sdk_eq = f"await page.wait_for_timeout({time_ms})"
|
||||
elif waited_for == "intent":
|
||||
sdk_eq = f'await page.validate("{intent}")'
|
||||
elif waited_for == "selector":
|
||||
sdk_eq = f'await page.wait_for_selector("{selector}")'
|
||||
return make_result(
|
||||
"skyvern_wait",
|
||||
browser_context=ctx,
|
||||
data={"waited_for": waited_for, "sdk_equivalent": sdk_eq},
|
||||
timing_ms=timer.timing_ms,
|
||||
)
|
||||
|
||||
|
||||
async def skyvern_evaluate(
|
||||
expression: Annotated[str, "JavaScript expression to evaluate"],
|
||||
session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
|
||||
cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Run JavaScript on the page to read DOM state, get URLs, check values, or discover CSS selectors for faster subsequent actions.
|
||||
|
||||
Security: This executes arbitrary JS in the page context. Only use with trusted expressions.
|
||||
"""
|
||||
try:
|
||||
page, ctx = await get_page(session_id=session_id, cdp_url=cdp_url)
|
||||
except BrowserNotAvailableError:
|
||||
return make_result("skyvern_evaluate", ok=False, error=no_browser_error())
|
||||
|
||||
with Timer() as timer:
|
||||
try:
|
||||
result = await page.evaluate(expression)
|
||||
timer.mark("sdk")
|
||||
except Exception as e:
|
||||
return make_result(
|
||||
"skyvern_evaluate",
|
||||
ok=False,
|
||||
browser_context=ctx,
|
||||
timing_ms=timer.timing_ms,
|
||||
error=make_error(ErrorCode.ACTION_FAILED, str(e), "Check JavaScript syntax"),
|
||||
)
|
||||
|
||||
return make_result(
|
||||
"skyvern_evaluate",
|
||||
browser_context=ctx,
|
||||
data={"result": result, "sdk_equivalent": f'await page.evaluate("{expression[:80]}")'},
|
||||
timing_ms=timer.timing_ms,
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# AI Differentiator Tools
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def skyvern_extract(
|
||||
prompt: Annotated[str, "Natural language description of what data to extract from the page"],
|
||||
session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
|
||||
cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
|
||||
schema: Annotated[
|
||||
str | None, Field(description="JSON Schema string defining the expected output structure")
|
||||
] = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Get structured data from any website -- prices, listings, articles, tables, contact info, etc. Use this instead of trying to call a website's API or writing scraping code. Describe what you need in natural language.
|
||||
|
||||
Optionally provide a JSON `schema` to enforce the output structure (pass as a JSON string).
|
||||
"""
|
||||
parsed_schema: dict[str, Any] | None = None
|
||||
if schema is not None:
|
||||
try:
|
||||
parsed_schema = json.loads(schema)
|
||||
except (json.JSONDecodeError, TypeError) as e:
|
||||
return make_result(
|
||||
"skyvern_extract",
|
||||
ok=False,
|
||||
error=make_error(
|
||||
ErrorCode.INVALID_INPUT,
|
||||
f"Invalid JSON schema: {e}",
|
||||
"Provide schema as a valid JSON string",
|
||||
),
|
||||
)
|
||||
|
||||
try:
|
||||
page, ctx = await get_page(session_id=session_id, cdp_url=cdp_url)
|
||||
except BrowserNotAvailableError:
|
||||
return make_result("skyvern_extract", ok=False, error=no_browser_error())
|
||||
|
||||
with Timer() as timer:
|
||||
try:
|
||||
extracted = await page.extract(prompt=prompt, schema=parsed_schema)
|
||||
timer.mark("sdk")
|
||||
except Exception as e:
|
||||
return make_result(
|
||||
"skyvern_extract",
|
||||
ok=False,
|
||||
browser_context=ctx,
|
||||
timing_ms=timer.timing_ms,
|
||||
error=make_error(ErrorCode.SDK_ERROR, str(e), "Check that the page has loaded and the prompt is clear"),
|
||||
)
|
||||
|
||||
return make_result(
|
||||
"skyvern_extract",
|
||||
browser_context=ctx,
|
||||
data={"extracted": extracted, "sdk_equivalent": f'await page.extract(prompt="{prompt}")'},
|
||||
timing_ms=timer.timing_ms,
|
||||
)
|
||||
|
||||
|
||||
async def skyvern_validate(
|
||||
prompt: Annotated[str, "Validation condition to check (e.g., 'the login form is visible')"],
|
||||
session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
|
||||
cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Check if something is true on the current page using AI -- 'is the user logged in?', 'does the cart have 3 items?', 'is the form submitted?'
|
||||
|
||||
Returns whether the described condition is true or false.
|
||||
"""
|
||||
try:
|
||||
page, ctx = await get_page(session_id=session_id, cdp_url=cdp_url)
|
||||
except BrowserNotAvailableError:
|
||||
return make_result("skyvern_validate", ok=False, error=no_browser_error())
|
||||
|
||||
with Timer() as timer:
|
||||
try:
|
||||
valid = await page.validate(prompt)
|
||||
timer.mark("sdk")
|
||||
except Exception as e:
|
||||
return make_result(
|
||||
"skyvern_validate",
|
||||
ok=False,
|
||||
browser_context=ctx,
|
||||
timing_ms=timer.timing_ms,
|
||||
error=make_error(ErrorCode.SDK_ERROR, str(e), "Check that the page has loaded and the prompt is clear"),
|
||||
)
|
||||
|
||||
return make_result(
|
||||
"skyvern_validate",
|
||||
browser_context=ctx,
|
||||
data={"prompt": prompt, "valid": valid, "sdk_equivalent": f'await page.validate("{prompt}")'},
|
||||
timing_ms=timer.timing_ms,
|
||||
)
|
||||
|
||||
|
||||
async def skyvern_act(
|
||||
prompt: Annotated[str, "Natural language instruction for the action to perform (e.g., 'close the cookie banner')"],
|
||||
session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
|
||||
cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Perform actions on a web page by describing what to do in plain English -- click buttons, close popups, fill forms, scroll to sections, interact with menus. Use for any website interaction task.
|
||||
|
||||
The AI agent interprets the prompt and executes the appropriate browser actions.
|
||||
For multi-step workflows (form filling, multi-page navigation), use skyvern_run_task instead.
|
||||
"""
|
||||
try:
|
||||
page, ctx = await get_page(session_id=session_id, cdp_url=cdp_url)
|
||||
except BrowserNotAvailableError:
|
||||
return make_result("skyvern_act", ok=False, error=no_browser_error())
|
||||
|
||||
with Timer() as timer:
|
||||
try:
|
||||
await page.act(prompt)
|
||||
timer.mark("sdk")
|
||||
except Exception as e:
|
||||
return make_result(
|
||||
"skyvern_act",
|
||||
ok=False,
|
||||
browser_context=ctx,
|
||||
timing_ms=timer.timing_ms,
|
||||
error=make_error(ErrorCode.SDK_ERROR, str(e), "Simplify the prompt or break the task into steps"),
|
||||
)
|
||||
|
||||
return make_result(
|
||||
"skyvern_act",
|
||||
browser_context=ctx,
|
||||
data={"prompt": prompt, "completed": True, "sdk_equivalent": f'await page.act("{prompt}")'},
|
||||
timing_ms=timer.timing_ms,
|
||||
)
|
||||
|
||||
|
||||
async def skyvern_run_task(
|
||||
prompt: Annotated[str, "Natural language description of the task to automate"],
|
||||
session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
|
||||
cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
|
||||
url: Annotated[
|
||||
str | None, Field(description="URL to navigate to before running (uses current page if omitted)")
|
||||
] = None,
|
||||
data_extraction_schema: Annotated[
|
||||
str | None, Field(description="JSON Schema string defining what data to extract")
|
||||
] = None,
|
||||
max_steps: Annotated[int | None, Field(description="Maximum number of agent steps")] = None,
|
||||
timeout_seconds: Annotated[
|
||||
int, Field(description="Timeout in seconds (default 180s = 3 minutes)", ge=10, le=1800)
|
||||
] = 180,
|
||||
) -> dict[str, Any]:
|
||||
"""Delegate a complete multi-step web task to an autonomous AI agent. Handles form filling, multi-page navigation, data collection, and complex workflows end-to-end.
|
||||
|
||||
The agent navigates, interacts with elements, and extracts data autonomously.
|
||||
For simple single-step actions, use skyvern_act instead.
|
||||
"""
|
||||
try:
|
||||
page, ctx = await get_page(session_id=session_id, cdp_url=cdp_url)
|
||||
except BrowserNotAvailableError:
|
||||
return make_result("skyvern_run_task", ok=False, error=no_browser_error())
|
||||
|
||||
parsed_schema: dict[str, Any] | str | None = None
|
||||
if data_extraction_schema is not None:
|
||||
try:
|
||||
parsed_schema = json.loads(data_extraction_schema)
|
||||
except (json.JSONDecodeError, TypeError) as e:
|
||||
return make_result(
|
||||
"skyvern_run_task",
|
||||
ok=False,
|
||||
browser_context=ctx,
|
||||
error=make_error(
|
||||
ErrorCode.INVALID_INPUT,
|
||||
f"Invalid data_extraction_schema JSON: {e}",
|
||||
"Provide schema as a valid JSON string",
|
||||
),
|
||||
)
|
||||
|
||||
with Timer() as timer:
|
||||
try:
|
||||
response = await page.agent.run_task(
|
||||
prompt=prompt,
|
||||
url=url,
|
||||
data_extraction_schema=parsed_schema,
|
||||
max_steps=max_steps,
|
||||
timeout=timeout_seconds,
|
||||
)
|
||||
timer.mark("sdk")
|
||||
except Exception as e:
|
||||
return make_result(
|
||||
"skyvern_run_task",
|
||||
ok=False,
|
||||
browser_context=ctx,
|
||||
timing_ms=timer.timing_ms,
|
||||
error=make_error(ErrorCode.SDK_ERROR, str(e), "Check the prompt, URL, and timeout settings"),
|
||||
)
|
||||
|
||||
return make_result(
|
||||
"skyvern_run_task",
|
||||
browser_context=ctx,
|
||||
data={
|
||||
"run_id": response.run_id,
|
||||
"status": response.status,
|
||||
"output": response.output,
|
||||
"failure_reason": response.failure_reason,
|
||||
"recording_url": response.recording_url,
|
||||
"app_url": response.app_url,
|
||||
"sdk_equivalent": f'await page.agent.run_task(prompt="{prompt}")',
|
||||
},
|
||||
timing_ms=timer.timing_ms,
|
||||
)
|
||||
261
skyvern/cli/mcp_tools/session.py
Normal file
261
skyvern/cli/mcp_tools/session.py
Normal file
@@ -0,0 +1,261 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Annotated, Any
|
||||
|
||||
from pydantic import Field
|
||||
|
||||
from skyvern.schemas.runs import ProxyLocation
|
||||
|
||||
from ._common import BrowserContext, ErrorCode, Timer, make_error, make_result
|
||||
from ._session import (
|
||||
SessionState,
|
||||
get_current_session,
|
||||
get_skyvern,
|
||||
resolve_browser,
|
||||
set_current_session,
|
||||
)
|
||||
|
||||
|
||||
async def skyvern_session_create(
|
||||
timeout: Annotated[int | None, Field(description="Session timeout in minutes (5-1440)")] = 60,
|
||||
proxy_location: Annotated[str | None, Field(description="Proxy location: RESIDENTIAL, US, etc.")] = None,
|
||||
local: Annotated[bool, Field(description="Launch local browser instead of cloud")] = False,
|
||||
headless: Annotated[bool, Field(description="Run local browser in headless mode")] = False,
|
||||
) -> dict[str, Any]:
|
||||
"""Create a new browser session to start interacting with websites. Creates a cloud browser by default.
|
||||
|
||||
Use local=true for a local Chromium instance.
|
||||
The session persists across tool calls until explicitly closed.
|
||||
"""
|
||||
with Timer() as timer:
|
||||
try:
|
||||
skyvern = get_skyvern()
|
||||
|
||||
if local:
|
||||
browser = await skyvern.launch_local_browser(headless=headless)
|
||||
ctx = BrowserContext(mode="local")
|
||||
set_current_session(SessionState(browser=browser, context=ctx))
|
||||
timer.mark("sdk")
|
||||
return make_result(
|
||||
"skyvern_session_create",
|
||||
browser_context=ctx,
|
||||
data={"local": True, "headless": headless},
|
||||
timing_ms=timer.timing_ms,
|
||||
)
|
||||
|
||||
proxy = ProxyLocation(proxy_location) if proxy_location else None
|
||||
browser = await skyvern.launch_cloud_browser(timeout=timeout, proxy_location=proxy)
|
||||
ctx = BrowserContext(mode="cloud_session", session_id=browser.browser_session_id)
|
||||
set_current_session(SessionState(browser=browser, context=ctx))
|
||||
timer.mark("sdk")
|
||||
|
||||
except ValueError as e:
|
||||
return make_result(
|
||||
"skyvern_session_create",
|
||||
ok=False,
|
||||
timing_ms=timer.timing_ms,
|
||||
error=make_error(
|
||||
ErrorCode.SDK_ERROR,
|
||||
str(e),
|
||||
"Cloud sessions require SKYVERN_API_KEY. Check your environment.",
|
||||
),
|
||||
)
|
||||
except Exception as e:
|
||||
return make_result(
|
||||
"skyvern_session_create",
|
||||
ok=False,
|
||||
timing_ms=timer.timing_ms,
|
||||
error=make_error(ErrorCode.SDK_ERROR, str(e), "Failed to create browser session"),
|
||||
)
|
||||
|
||||
return make_result(
|
||||
"skyvern_session_create",
|
||||
browser_context=ctx,
|
||||
data={
|
||||
"session_id": browser.browser_session_id,
|
||||
"timeout_minutes": timeout,
|
||||
},
|
||||
timing_ms=timer.timing_ms,
|
||||
)
|
||||
|
||||
|
||||
async def skyvern_session_close(
|
||||
session_id: Annotated[str | None, Field(description="Session ID to close (uses current if not specified)")] = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Close a browser session when you're done. Frees cloud resources.
|
||||
|
||||
Closes the specified session or the current active session.
|
||||
"""
|
||||
current = get_current_session()
|
||||
|
||||
with Timer() as timer:
|
||||
try:
|
||||
if session_id:
|
||||
skyvern = get_skyvern()
|
||||
await skyvern.close_browser_session(session_id)
|
||||
if current.context and current.context.session_id == session_id:
|
||||
set_current_session(SessionState())
|
||||
timer.mark("sdk")
|
||||
return make_result(
|
||||
"skyvern_session_close",
|
||||
data={"session_id": session_id, "closed": True},
|
||||
timing_ms=timer.timing_ms,
|
||||
)
|
||||
|
||||
if current.browser is None:
|
||||
return make_result(
|
||||
"skyvern_session_close",
|
||||
ok=False,
|
||||
error=make_error(
|
||||
ErrorCode.NO_ACTIVE_BROWSER,
|
||||
"No active session to close",
|
||||
"Provide a session_id or create a session first",
|
||||
),
|
||||
)
|
||||
|
||||
closed_id = current.context.session_id if current.context else None
|
||||
await current.browser.close()
|
||||
set_current_session(SessionState())
|
||||
timer.mark("sdk")
|
||||
|
||||
except Exception as e:
|
||||
return make_result(
|
||||
"skyvern_session_close",
|
||||
ok=False,
|
||||
timing_ms=timer.timing_ms,
|
||||
error=make_error(ErrorCode.SDK_ERROR, str(e), "Failed to close session"),
|
||||
)
|
||||
|
||||
return make_result(
|
||||
"skyvern_session_close",
|
||||
data={"session_id": closed_id, "closed": True},
|
||||
timing_ms=timer.timing_ms,
|
||||
)
|
||||
|
||||
|
||||
async def skyvern_session_list() -> dict[str, Any]:
|
||||
"""List all active browser sessions. Use to find available sessions to connect to."""
|
||||
with Timer() as timer:
|
||||
try:
|
||||
skyvern = get_skyvern()
|
||||
sessions = await skyvern.get_browser_sessions()
|
||||
timer.mark("sdk")
|
||||
|
||||
session_data = [
|
||||
{
|
||||
"session_id": s.browser_session_id,
|
||||
"status": s.status,
|
||||
"started_at": s.started_at.isoformat() if s.started_at else None,
|
||||
"timeout": s.timeout,
|
||||
"runnable_id": s.runnable_id,
|
||||
"available": s.runnable_id is None and s.browser_address is not None,
|
||||
}
|
||||
for s in sessions
|
||||
]
|
||||
|
||||
except ValueError as e:
|
||||
return make_result(
|
||||
"skyvern_session_list",
|
||||
ok=False,
|
||||
timing_ms=timer.timing_ms,
|
||||
error=make_error(
|
||||
ErrorCode.SDK_ERROR,
|
||||
str(e),
|
||||
"Listing sessions requires SKYVERN_API_KEY",
|
||||
),
|
||||
)
|
||||
except Exception as e:
|
||||
return make_result(
|
||||
"skyvern_session_list",
|
||||
ok=False,
|
||||
timing_ms=timer.timing_ms,
|
||||
error=make_error(ErrorCode.SDK_ERROR, str(e), "Failed to list sessions"),
|
||||
)
|
||||
|
||||
current = get_current_session()
|
||||
current_id = current.context.session_id if current.context else None
|
||||
|
||||
return make_result(
|
||||
"skyvern_session_list",
|
||||
data={
|
||||
"sessions": session_data,
|
||||
"count": len(session_data),
|
||||
"current_session_id": current_id,
|
||||
},
|
||||
timing_ms=timer.timing_ms,
|
||||
)
|
||||
|
||||
|
||||
async def skyvern_session_get(
|
||||
session_id: Annotated[str, "Browser session ID to get details for"],
|
||||
) -> dict[str, Any]:
|
||||
"""Get details about a specific browser session -- status, timeout, availability."""
|
||||
with Timer() as timer:
|
||||
try:
|
||||
skyvern = get_skyvern()
|
||||
session = await skyvern.get_browser_session(session_id)
|
||||
timer.mark("sdk")
|
||||
except Exception as e:
|
||||
return make_result(
|
||||
"skyvern_session_get",
|
||||
ok=False,
|
||||
timing_ms=timer.timing_ms,
|
||||
error=make_error(ErrorCode.BROWSER_NOT_FOUND, str(e), "Check the session ID is correct"),
|
||||
)
|
||||
|
||||
current = get_current_session()
|
||||
is_current = current.context and current.context.session_id == session_id
|
||||
|
||||
return make_result(
|
||||
"skyvern_session_get",
|
||||
browser_context=BrowserContext(mode="cloud_session", session_id=session_id) if is_current else None,
|
||||
data={
|
||||
"session_id": session.browser_session_id,
|
||||
"status": session.status,
|
||||
"started_at": session.started_at.isoformat() if session.started_at else None,
|
||||
"completed_at": session.completed_at.isoformat() if session.completed_at else None,
|
||||
"timeout": session.timeout,
|
||||
"runnable_id": session.runnable_id,
|
||||
"is_current": is_current,
|
||||
},
|
||||
timing_ms=timer.timing_ms,
|
||||
)
|
||||
|
||||
|
||||
async def skyvern_session_connect(
|
||||
session_id: Annotated[str | None, Field(description="Cloud session ID (pbs_...)")] = None,
|
||||
cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Connect to an existing browser -- a cloud session by ID or any browser via CDP URL.
|
||||
|
||||
Use this to resume work in a previously created session or attach to an external browser.
|
||||
"""
|
||||
if not session_id and not cdp_url:
|
||||
return make_result(
|
||||
"skyvern_session_connect",
|
||||
ok=False,
|
||||
error=make_error(
|
||||
ErrorCode.INVALID_INPUT,
|
||||
"Must provide session_id or cdp_url",
|
||||
"Specify which browser to connect to",
|
||||
),
|
||||
)
|
||||
|
||||
with Timer() as timer:
|
||||
try:
|
||||
browser, ctx = await resolve_browser(session_id=session_id, cdp_url=cdp_url)
|
||||
timer.mark("sdk")
|
||||
except Exception as e:
|
||||
return make_result(
|
||||
"skyvern_session_connect",
|
||||
ok=False,
|
||||
timing_ms=timer.timing_ms,
|
||||
error=make_error(ErrorCode.BROWSER_NOT_FOUND, str(e), "Check the session ID or CDP URL is valid"),
|
||||
)
|
||||
|
||||
return make_result(
|
||||
"skyvern_session_connect",
|
||||
browser_context=ctx,
|
||||
data={"connected": True},
|
||||
timing_ms=timer.timing_ms,
|
||||
)
|
||||
@@ -10,11 +10,11 @@ import psutil
|
||||
import typer
|
||||
import uvicorn
|
||||
from dotenv import load_dotenv, set_key
|
||||
from mcp.server.fastmcp import FastMCP
|
||||
from rich.panel import Panel
|
||||
from rich.prompt import Confirm
|
||||
|
||||
from skyvern.cli.console import console
|
||||
from skyvern.cli.mcp_tools import mcp # Uses standalone fastmcp (v2.x)
|
||||
from skyvern.cli.utils import start_services
|
||||
from skyvern.client import SkyvernEnvironment
|
||||
from skyvern.config import settings
|
||||
@@ -27,8 +27,6 @@ from skyvern.utils.env_paths import resolve_backend_env_path, resolve_frontend_e
|
||||
|
||||
run_app = typer.Typer(help="Commands to run Skyvern services such as the API server or UI.")
|
||||
|
||||
mcp = FastMCP("Skyvern")
|
||||
|
||||
|
||||
@mcp.tool()
|
||||
async def skyvern_run_task(prompt: str, url: str) -> dict[str, Any]:
|
||||
@@ -53,12 +51,9 @@ async def skyvern_run_task(prompt: str, url: str) -> dict[str, Any]:
|
||||
res = await skyvern_agent.run_task(prompt=prompt, url=url, user_agent="skyvern-mcp", wait_for_completion=True)
|
||||
|
||||
output = res.model_dump()["output"]
|
||||
# Primary: use app_url from API response (handles both task and workflow run IDs correctly)
|
||||
if res.app_url:
|
||||
task_url = res.app_url
|
||||
else:
|
||||
# Fallback when app_url is not available (e.g., older API versions)
|
||||
# Determine route based on run_id prefix: 'wr_' for workflows, otherwise tasks
|
||||
if res.run_id and res.run_id.startswith("wr_"):
|
||||
task_url = f"{settings.SKYVERN_APP_URL.rstrip('/')}/runs/{res.run_id}/overview"
|
||||
else:
|
||||
|
||||
Reference in New Issue
Block a user