Extract shared core from MCP tools, add CLI browser commands (#4768)

This commit is contained in:
Marc Kelechava
2026-02-17 11:24:56 -08:00
committed by GitHub
parent aacc612365
commit 7c5be8fefe
14 changed files with 1304 additions and 113 deletions

View File

@@ -0,0 +1,93 @@
import logging
import typer
from dotenv import load_dotenv
from skyvern.forge.sdk.forge_log import setup_logger as _setup_logger
from skyvern.utils.env_paths import resolve_backend_env_path
from ..credentials import credentials_app
from ..docs import docs_app
from ..init_command import init_browser, init_env
from ..quickstart import quickstart_app
from ..run_commands import run_app
from ..status import status_app
from ..stop_commands import stop_app
from ..tasks import tasks_app
from ..workflow import workflow_app
from .browser import browser_app
_cli_logging_configured = False
def configure_cli_logging() -> None:
"""Configure CLI log levels once at runtime (not at import time)."""
global _cli_logging_configured
if _cli_logging_configured:
return
_cli_logging_configured = True
# Suppress noisy SDK/third-party logs for CLI execution only.
for logger_name in ("skyvern", "httpx", "litellm", "playwright", "httpcore"):
logging.getLogger(logger_name).setLevel(logging.WARNING)
_setup_logger()
cli_app = typer.Typer(
help=("""[bold]Skyvern CLI[/bold]\nManage and run your local Skyvern environment."""),
no_args_is_help=True,
rich_markup_mode="rich",
)
@cli_app.callback()
def cli_callback() -> None:
"""Configure CLI logging before command execution."""
configure_cli_logging()
cli_app.add_typer(
run_app,
name="run",
help="Run Skyvern services like the API server, UI, and MCP.",
)
cli_app.add_typer(workflow_app, name="workflow", help="Workflow management commands.")
cli_app.add_typer(tasks_app, name="tasks", help="Task management commands.")
cli_app.add_typer(credentials_app, name="credentials", help="Manage stored credentials for secure login.")
cli_app.add_typer(docs_app, name="docs", help="Open Skyvern documentation.")
cli_app.add_typer(status_app, name="status", help="Check if Skyvern services are running.")
cli_app.add_typer(stop_app, name="stop", help="Stop Skyvern services.")
init_app = typer.Typer(
invoke_without_command=True,
help="Interactively configure Skyvern and its dependencies.",
)
cli_app.add_typer(init_app, name="init")
# Add quickstart command
cli_app.add_typer(
quickstart_app, name="quickstart", help="One-command setup and start for Skyvern (combines init and run)."
)
# Browser automation commands
cli_app.add_typer(browser_app, name="browser", help="Browser automation commands.")
@init_app.callback()
def init_callback(
ctx: typer.Context,
no_postgres: bool = typer.Option(False, "--no-postgres", help="Skip starting PostgreSQL container"),
) -> None:
"""Run full initialization when no subcommand is provided."""
if ctx.invoked_subcommand is None:
init_env(no_postgres=no_postgres)
@init_app.command(name="browser")
def init_browser_command() -> None:
"""Initialize only the browser configuration."""
init_browser()
if __name__ == "__main__": # pragma: no cover - manual CLI invocation
load_dotenv(resolve_backend_env_path())
cli_app()

View File

@@ -0,0 +1,9 @@
from dotenv import load_dotenv
from skyvern.utils.env_paths import resolve_backend_env_path
from . import cli_app
if __name__ == "__main__": # pragma: no cover - manual CLI invocation
load_dotenv(resolve_backend_env_path())
cli_app()

View File

@@ -0,0 +1,52 @@
from __future__ import annotations
import json
import sys
from typing import Any
from rich.console import Console
from rich.table import Table
console = Console()
def output(
data: Any,
*,
action: str = "",
json_mode: bool = False,
) -> None:
if json_mode:
envelope: dict[str, Any] = {"ok": True, "action": action, "data": data, "error": None}
json.dump(envelope, sys.stdout, indent=2, default=str)
sys.stdout.write("\n")
return
if isinstance(data, list) and data and isinstance(data[0], dict):
table = Table()
for key in data[0]:
table.add_column(key.replace("_", " ").title())
for row in data:
table.add_row(*[str(v) for v in row.values()])
console.print(table)
elif isinstance(data, dict):
for key, value in data.items():
console.print(f"[bold]{key}:[/bold] {value}")
else:
console.print(str(data))
def output_error(message: str, *, hint: str = "", json_mode: bool = False, exit_code: int = 1) -> None:
if json_mode:
envelope: dict[str, Any] = {
"ok": False,
"action": "",
"data": None,
"error": {"message": message, "hint": hint},
}
json.dump(envelope, sys.stdout, indent=2, default=str)
sys.stdout.write("\n")
raise SystemExit(exit_code)
console.print(f"[red]Error: {message}[/red]")
if hint:
console.print(f"[yellow]Hint: {hint}[/yellow]")
raise SystemExit(exit_code)

View File

@@ -0,0 +1,48 @@
from __future__ import annotations
import json
from dataclasses import asdict, dataclass
from datetime import datetime, timezone
from pathlib import Path
STATE_DIR = Path.home() / ".skyvern"
STATE_FILE = STATE_DIR / "state.json"
_TTL_SECONDS = 86400 # 24 hours
@dataclass
class CLIState:
session_id: str | None = None
cdp_url: str | None = None
mode: str | None = None # "cloud", "local", or "cdp"
created_at: str | None = None
def save_state(state: CLIState) -> None:
STATE_DIR.mkdir(parents=True, exist_ok=True)
STATE_DIR.chmod(0o700)
data = asdict(state)
data["created_at"] = datetime.now(timezone.utc).isoformat()
STATE_FILE.write_text(json.dumps(data))
STATE_FILE.chmod(0o600)
def load_state() -> CLIState | None:
if not STATE_FILE.exists():
return None
try:
data = json.loads(STATE_FILE.read_text())
created_at = data.get("created_at")
if created_at:
age = (datetime.now(timezone.utc) - datetime.fromisoformat(created_at)).total_seconds()
if age > _TTL_SECONDS:
return None
return CLIState(**{k: v for k, v in data.items() if k in CLIState.__dataclass_fields__})
except Exception:
return None
def clear_state() -> None:
if STATE_FILE.exists():
STATE_FILE.unlink()

View File

@@ -0,0 +1,325 @@
from __future__ import annotations
import asyncio
from dataclasses import asdict, dataclass
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Literal
import typer
from skyvern.cli.commands._output import output, output_error
from skyvern.cli.commands._state import CLIState, clear_state, load_state, save_state
from skyvern.cli.core.artifacts import save_artifact
from skyvern.cli.core.browser_ops import do_act, do_extract, do_navigate, do_screenshot
from skyvern.cli.core.client import get_skyvern
from skyvern.cli.core.guards import GuardError, check_password_prompt, validate_wait_until
from skyvern.cli.core.session_ops import do_session_close, do_session_create, do_session_list
browser_app = typer.Typer(help="Browser automation commands.", no_args_is_help=True)
session_app = typer.Typer(help="Manage browser sessions.", no_args_is_help=True)
browser_app.add_typer(session_app, name="session")
@dataclass(frozen=True)
class ConnectionTarget:
mode: Literal["cloud", "cdp"]
session_id: str | None = None
cdp_url: str | None = None
def _resolve_connection(session: str | None, cdp: str | None) -> ConnectionTarget:
if session and cdp:
raise typer.BadParameter("Pass only one of --session or --cdp.")
if session:
return ConnectionTarget(mode="cloud", session_id=session)
if cdp:
return ConnectionTarget(mode="cdp", cdp_url=cdp)
state = load_state()
if state:
if state.mode == "cdp" and state.cdp_url:
return ConnectionTarget(mode="cdp", cdp_url=state.cdp_url)
if state.session_id:
return ConnectionTarget(mode="cloud", session_id=state.session_id)
if state.cdp_url:
return ConnectionTarget(mode="cdp", cdp_url=state.cdp_url)
raise typer.BadParameter(
"No active browser connection. Create one with: skyvern browser session create\n"
"Or connect with: skyvern browser session connect --cdp ws://...\n"
"Or specify: --session pbs_... / --cdp ws://..."
)
async def _connect_browser(connection: ConnectionTarget) -> Any:
skyvern = get_skyvern()
if connection.mode == "cloud":
if not connection.session_id:
raise typer.BadParameter("Cloud mode requires --session or an active cloud session in state.")
return await skyvern.connect_to_cloud_browser_session(connection.session_id)
if not connection.cdp_url:
raise typer.BadParameter("CDP mode requires --cdp or an active CDP URL in state.")
return await skyvern.connect_to_browser_over_cdp(connection.cdp_url)
# ---------------------------------------------------------------------------
# Session commands
# ---------------------------------------------------------------------------
@session_app.command("create")
def session_create(
timeout: int = typer.Option(60, help="Session timeout in minutes."),
proxy: str | None = typer.Option(None, help="Proxy location (e.g. RESIDENTIAL)."),
local: bool = typer.Option(False, "--local", help="Launch a local browser instead of cloud."),
headless: bool = typer.Option(False, "--headless", help="Run local browser headless."),
json_output: bool = typer.Option(False, "--json", help="Output as JSON."),
) -> None:
"""Create a new browser session."""
if local:
output_error(
"Local browser sessions are not yet supported in CLI mode.",
hint="Use MCP (skyvern run mcp) for local browser sessions, or omit --local for cloud sessions.",
json_mode=json_output,
)
async def _run() -> dict:
skyvern = get_skyvern()
_browser, result = await do_session_create(
skyvern,
timeout=timeout,
proxy_location=proxy,
)
save_state(CLIState(session_id=result.session_id, cdp_url=None, mode="cloud"))
return {
"session_id": result.session_id,
"mode": "cloud",
"timeout_minutes": result.timeout_minutes,
}
try:
data = asyncio.run(_run())
output(data, action="session_create", json_mode=json_output)
except GuardError as e:
output_error(str(e), hint=e.hint, json_mode=json_output)
except typer.BadParameter:
raise
except Exception as e:
output_error(str(e), hint="Check your API key and network connection.", json_mode=json_output)
@session_app.command("close")
def session_close(
session: str | None = typer.Option(None, help="Browser session ID to close."),
cdp: str | None = typer.Option(None, "--cdp", help="CDP WebSocket URL to detach from."),
json_output: bool = typer.Option(False, "--json", help="Output as JSON."),
) -> None:
"""Close a browser session."""
async def _run() -> dict:
connection = _resolve_connection(session, cdp)
if connection.mode == "cdp":
clear_state()
return {"cdp_url": connection.cdp_url, "closed": False, "detached": True}
if not connection.session_id:
raise typer.BadParameter("Cloud mode requires a browser session ID.")
skyvern = get_skyvern()
result = await do_session_close(skyvern, connection.session_id)
clear_state()
return {"session_id": result.session_id, "closed": result.closed}
try:
data = asyncio.run(_run())
output(data, action="session_close", json_mode=json_output)
except typer.BadParameter:
raise
except Exception as e:
output_error(str(e), hint="Verify the session ID or CDP URL is correct.", json_mode=json_output)
@session_app.command("connect")
def session_connect(
session: str | None = typer.Option(None, help="Cloud browser session ID."),
cdp: str | None = typer.Option(None, "--cdp", help="CDP WebSocket URL."),
json_output: bool = typer.Option(False, "--json", help="Output as JSON."),
) -> None:
"""Connect to an existing browser session (cloud or CDP) and persist it as active state."""
if not session and not cdp:
raise typer.BadParameter("Specify one of --session or --cdp.")
async def _run() -> dict:
connection = _resolve_connection(session, cdp)
browser = await _connect_browser(connection)
await browser.get_working_page()
if connection.mode == "cdp":
save_state(CLIState(session_id=None, cdp_url=connection.cdp_url, mode="cdp"))
return {"connected": True, "mode": "cdp", "cdp_url": connection.cdp_url}
save_state(CLIState(session_id=connection.session_id, cdp_url=None, mode="cloud"))
return {"connected": True, "mode": "cloud", "session_id": connection.session_id}
try:
data = asyncio.run(_run())
output(data, action="session_connect", json_mode=json_output)
except typer.BadParameter:
raise
except Exception as e:
output_error(str(e), hint="Verify the session ID or CDP URL is reachable.", json_mode=json_output)
@session_app.command("list")
def session_list(
json_output: bool = typer.Option(False, "--json", help="Output as JSON."),
) -> None:
"""List all browser sessions."""
async def _run() -> list[dict]:
skyvern = get_skyvern()
sessions = await do_session_list(skyvern)
return [asdict(s) for s in sessions]
try:
data = asyncio.run(_run())
output(data, action="session_list", json_mode=json_output)
except Exception as e:
output_error(str(e), hint="Check your API key and network connection.", json_mode=json_output)
# ---------------------------------------------------------------------------
# Browser commands
# ---------------------------------------------------------------------------
@browser_app.command("navigate")
def navigate(
url: str = typer.Option(..., help="URL to navigate to."),
session: str | None = typer.Option(None, help="Browser session ID."),
cdp: str | None = typer.Option(None, "--cdp", help="CDP WebSocket URL."),
timeout: int = typer.Option(30000, help="Navigation timeout in milliseconds."),
wait_until: str | None = typer.Option(None, help="Wait condition: load, domcontentloaded, networkidle, commit."),
json_output: bool = typer.Option(False, "--json", help="Output as JSON."),
) -> None:
"""Navigate to a URL in the browser session."""
async def _run() -> dict:
validate_wait_until(wait_until)
connection = _resolve_connection(session, cdp)
browser = await _connect_browser(connection)
page = await browser.get_working_page()
result = await do_navigate(page, url, timeout=timeout, wait_until=wait_until)
return {"url": result.url, "title": result.title}
try:
data = asyncio.run(_run())
output(data, action="navigate", json_mode=json_output)
except GuardError as e:
output_error(str(e), hint=e.hint, json_mode=json_output)
except typer.BadParameter:
raise
except Exception as e:
output_error(str(e), hint="Check the URL is valid and the session is active.", json_mode=json_output)
@browser_app.command("screenshot")
def screenshot(
session: str | None = typer.Option(None, help="Browser session ID."),
cdp: str | None = typer.Option(None, "--cdp", help="CDP WebSocket URL."),
full_page: bool = typer.Option(False, "--full-page", help="Capture the full scrollable page."),
selector: str | None = typer.Option(None, help="CSS selector to screenshot."),
output_path: str | None = typer.Option(None, "--output", help="Custom output file path."),
json_output: bool = typer.Option(False, "--json", help="Output as JSON."),
) -> None:
"""Take a screenshot of the current page."""
async def _run() -> dict:
connection = _resolve_connection(session, cdp)
browser = await _connect_browser(connection)
page = await browser.get_working_page()
result = await do_screenshot(page, full_page=full_page, selector=selector)
if output_path:
path = Path(output_path)
path.parent.mkdir(parents=True, exist_ok=True)
path.write_bytes(result.data)
return {"path": str(path), "bytes": len(result.data), "full_page": result.full_page}
timestamp = datetime.now(timezone.utc).strftime("%Y%m%d_%H%M%S")
artifact = save_artifact(
content=result.data,
kind="screenshot",
filename=f"screenshot_{timestamp}.png",
mime="image/png",
session_id=connection.session_id,
)
return {"path": artifact.path, "bytes": artifact.bytes, "full_page": result.full_page}
try:
data = asyncio.run(_run())
output(data, action="screenshot", json_mode=json_output)
except GuardError as e:
output_error(str(e), hint=e.hint, json_mode=json_output)
except typer.BadParameter:
raise
except Exception as e:
output_error(str(e), hint="Ensure the session is active and the page has loaded.", json_mode=json_output)
@browser_app.command("act")
def act(
prompt: str = typer.Option(..., help="Natural language action to perform."),
session: str | None = typer.Option(None, help="Browser session ID."),
cdp: str | None = typer.Option(None, "--cdp", help="CDP WebSocket URL."),
json_output: bool = typer.Option(False, "--json", help="Output as JSON."),
) -> None:
"""Perform a natural language action on the current page."""
async def _run() -> dict:
check_password_prompt(prompt)
connection = _resolve_connection(session, cdp)
browser = await _connect_browser(connection)
page = await browser.get_working_page()
result = await do_act(page, prompt)
return {"prompt": result.prompt, "completed": result.completed}
try:
data = asyncio.run(_run())
output(data, action="act", json_mode=json_output)
except GuardError as e:
output_error(str(e), hint=e.hint, json_mode=json_output)
except typer.BadParameter:
raise
except Exception as e:
output_error(str(e), hint="Simplify the prompt or break into steps.", json_mode=json_output)
@browser_app.command("extract")
def extract(
prompt: str = typer.Option(..., help="What data to extract from the page."),
session: str | None = typer.Option(None, help="Browser session ID."),
cdp: str | None = typer.Option(None, "--cdp", help="CDP WebSocket URL."),
schema: str | None = typer.Option(None, help="JSON schema for structured extraction."),
json_output: bool = typer.Option(False, "--json", help="Output as JSON."),
) -> None:
"""Extract data from the current page using natural language."""
async def _run() -> dict:
connection = _resolve_connection(session, cdp)
browser = await _connect_browser(connection)
page = await browser.get_working_page()
result = await do_extract(page, prompt, schema=schema)
return {"prompt": prompt, "extracted": result.extracted}
try:
data = asyncio.run(_run())
output(data, action="extract", json_mode=json_output)
except GuardError as e:
output_error(str(e), hint=e.hint, json_mode=json_output)
except typer.BadParameter:
raise
except Exception as e:
output_error(str(e), hint="Simplify the prompt or provide a JSON schema.", json_mode=json_output)