Remove setup.sh in favor of skyvern CLI (#4737)

This commit is contained in:
Shuchang Zheng
2026-02-12 20:43:27 -08:00
committed by GitHub
parent 08d3b04d14
commit 155c07f8be
77 changed files with 12358 additions and 10 deletions

View File

@@ -2,6 +2,7 @@
__all__ = [
"cli_app",
"credentials_app",
"quickstart_app",
"run_app",
"workflow_app",
@@ -12,6 +13,7 @@ __all__ = [
]
from .commands import cli_app, init_app # init_app is defined in commands.py
from .credentials import credentials_app
from .docs import docs_app
from .quickstart import quickstart_app
from .run_commands import run_app

View File

@@ -3,6 +3,7 @@ from dotenv import load_dotenv
from skyvern.utils.env_paths import resolve_backend_env_path
from .credentials import credentials_app
from .docs import docs_app
from .init_command import init_browser, init_env
from .quickstart import quickstart_app
@@ -24,6 +25,7 @@ cli_app.add_typer(
)
cli_app.add_typer(workflow_app, name="workflow", help="Workflow management commands.")
cli_app.add_typer(tasks_app, name="tasks", help="Task management commands.")
cli_app.add_typer(credentials_app, name="credentials", help="Manage stored credentials for secure login.")
cli_app.add_typer(docs_app, name="docs", help="Open Skyvern documentation.")
cli_app.add_typer(status_app, name="status", help="Check if Skyvern services are running.")
cli_app.add_typer(stop_app, name="stop", help="Stop Skyvern services.")

225
skyvern/cli/credentials.py Normal file
View File

@@ -0,0 +1,225 @@
"""Credential management CLI commands.
Provides `skyvern credentials add/list/get/delete` for managing stored
credentials. Passwords and secrets are collected via getpass (stdin) so they
never appear in shell history or LLM conversation logs.
"""
from __future__ import annotations
import os
import typer
from dotenv import load_dotenv
from rich.table import Table
from skyvern.client import Skyvern
from skyvern.client.types.non_empty_credit_card_credential import NonEmptyCreditCardCredential
from skyvern.client.types.non_empty_password_credential import NonEmptyPasswordCredential
from skyvern.client.types.secret_credential import SecretCredential
from skyvern.config import settings
from skyvern.utils.env_paths import resolve_backend_env_path
from .console import console
credentials_app = typer.Typer(help="Manage stored credentials for secure login.")
@credentials_app.callback()
def credentials_callback(
ctx: typer.Context,
api_key: str | None = typer.Option(
None,
"--api-key",
help="Skyvern API key",
envvar="SKYVERN_API_KEY",
),
) -> None:
"""Store API key in Typer context."""
ctx.obj = {"api_key": api_key}
def _get_client(api_key: str | None = None) -> Skyvern:
"""Instantiate a Skyvern SDK client using environment variables."""
load_dotenv(resolve_backend_env_path())
key = api_key or os.getenv("SKYVERN_API_KEY") or settings.SKYVERN_API_KEY
return Skyvern(base_url=settings.SKYVERN_BASE_URL, api_key=key)
@credentials_app.command("add")
def add_credential(
ctx: typer.Context,
name: str = typer.Option(..., "--name", "-n", help="Human-readable credential name"),
credential_type: str = typer.Option(
"password",
"--type",
"-t",
help="Credential type: password, credit_card, or secret",
),
username: str | None = typer.Option(None, "--username", "-u", help="Username (for password type)"),
) -> None:
"""Create a credential with secrets entered securely via stdin."""
valid_types = ("password", "credit_card", "secret")
if credential_type not in valid_types:
console.print(f"[red]Invalid credential type: {credential_type}. Use one of: {', '.join(valid_types)}[/red]")
raise typer.Exit(code=1)
client = _get_client(ctx.obj.get("api_key") if ctx.obj else None)
if credential_type == "password":
if not username:
username = typer.prompt("Username")
password = typer.prompt("Password", hide_input=True)
if not password:
console.print("[red]Password cannot be empty.[/red]")
raise typer.Exit(code=1)
totp = typer.prompt("TOTP secret (leave blank to skip)", default="", hide_input=True)
credential = NonEmptyPasswordCredential(
username=username,
password=password,
totp=totp if totp else None,
)
elif credential_type == "credit_card":
card_number = typer.prompt("Card number", hide_input=True)
if not card_number:
console.print("[red]Card number cannot be empty.[/red]")
raise typer.Exit(code=1)
cvv = typer.prompt("CVV", hide_input=True)
if not cvv:
console.print("[red]CVV cannot be empty.[/red]")
raise typer.Exit(code=1)
exp_month = typer.prompt("Expiration month (MM)")
exp_year = typer.prompt("Expiration year (YYYY)")
brand = typer.prompt("Card brand (e.g. visa, mastercard)")
holder_name = typer.prompt("Cardholder name")
credential = NonEmptyCreditCardCredential(
card_number=card_number,
card_cvv=cvv,
card_exp_month=exp_month,
card_exp_year=exp_year,
card_brand=brand,
card_holder_name=holder_name,
)
else:
secret_value = typer.prompt("Secret value", hide_input=True)
if not secret_value:
console.print("[red]Secret value cannot be empty.[/red]")
raise typer.Exit(code=1)
secret_label = typer.prompt("Secret label (leave blank to skip)", default="")
credential = SecretCredential(
secret_value=secret_value,
secret_label=secret_label if secret_label else None,
)
try:
result = client.create_credential(
name=name,
credential_type=credential_type,
credential=credential,
)
except Exception as e:
console.print(f"[red]Failed to create credential: {e}[/red]")
raise typer.Exit(code=1)
console.print(f"[green]Created credential:[/green] {result.credential_id}")
@credentials_app.command("list")
def list_credentials(
ctx: typer.Context,
page: int = typer.Option(1, "--page", help="Page number"),
page_size: int = typer.Option(10, "--page-size", help="Results per page"),
) -> None:
"""List stored credentials (metadata only, never passwords)."""
client = _get_client(ctx.obj.get("api_key") if ctx.obj else None)
try:
credentials = client.get_credentials(page=page, page_size=page_size)
except Exception as e:
console.print(f"[red]Failed to list credentials: {e}[/red]")
raise typer.Exit(code=1)
if not credentials:
console.print("No credentials found.")
return
table = Table(title="Credentials")
table.add_column("ID", style="cyan")
table.add_column("Name", style="green")
table.add_column("Type")
table.add_column("Details")
for cred in credentials:
details = ""
c = cred.credential
if hasattr(c, "username"):
details = f"username={c.username}"
elif hasattr(c, "last_four"):
details = f"****{c.last_four} ({c.brand})"
elif hasattr(c, "secret_label") and c.secret_label:
details = f"label={c.secret_label}"
table.add_row(cred.credential_id, cred.name, str(cred.credential_type), details)
console.print(table)
@credentials_app.command("get")
def get_credential(
ctx: typer.Context,
credential_id: str = typer.Argument(..., help="Credential ID (starts with cred_)"),
) -> None:
"""Show metadata for a single credential."""
client = _get_client(ctx.obj.get("api_key") if ctx.obj else None)
try:
cred = client.get_credential(credential_id)
except Exception as e:
console.print(f"[red]Failed to get credential: {e}[/red]")
raise typer.Exit(code=1)
table = Table(title=f"Credential: {cred.name}")
table.add_column("Field", style="cyan")
table.add_column("Value")
table.add_row("ID", cred.credential_id)
table.add_row("Name", cred.name)
table.add_row("Type", str(cred.credential_type))
c = cred.credential
if hasattr(c, "username"):
table.add_row("Username", c.username)
if hasattr(c, "totp_type") and c.totp_type:
table.add_row("TOTP Type", str(c.totp_type))
elif hasattr(c, "last_four"):
table.add_row("Card Last Four", c.last_four)
table.add_row("Card Brand", c.brand)
elif hasattr(c, "secret_label") and c.secret_label:
table.add_row("Secret Label", c.secret_label)
console.print(table)
@credentials_app.command("delete")
def delete_credential(
ctx: typer.Context,
credential_id: str = typer.Argument(..., help="Credential ID to delete (starts with cred_)"),
yes: bool = typer.Option(False, "--yes", "-y", help="Skip confirmation prompt"),
) -> None:
"""Permanently delete a stored credential."""
if not yes:
confirm = typer.confirm(f"Delete credential {credential_id}?")
if not confirm:
console.print("Aborted.")
raise typer.Exit()
client = _get_client(ctx.obj.get("api_key") if ctx.obj else None)
try:
client.delete_credential(credential_id)
except Exception as e:
console.print(f"[red]Failed to delete credential: {e}[/red]")
raise typer.Exit(code=1)
console.print(f"[green]Deleted credential:[/green] {credential_id}")

View File

@@ -16,6 +16,8 @@ from .browser import (
skyvern_click,
skyvern_evaluate,
skyvern_extract,
skyvern_hover,
skyvern_login,
skyvern_navigate,
skyvern_press_key,
skyvern_run_task,
@@ -26,6 +28,11 @@ from .browser import (
skyvern_validate,
skyvern_wait,
)
from .credential import (
skyvern_credential_delete,
skyvern_credential_get,
skyvern_credential_list,
)
from .session import (
skyvern_session_close,
skyvern_session_connect,
@@ -52,7 +59,21 @@ IMPORTANT: Do NOT use curl, wget, HTTP requests, fetch, or the Bash tool to inte
## When to Use Skyvern vs Other Browser Tools
If both Skyvern and another browser automation server (like Playwright MCP) are available, ALWAYS prefer Skyvern tools because:
| Scenario | Use | Why |
|----------|-----|-----|
| Visit a website | skyvern_navigate | First step — opens the page |
| See what's on the page | skyvern_screenshot | Visual understanding before acting |
| Get data from a page | skyvern_extract | AI-powered structured extraction |
| Do something on a page (click, fill, scroll) | skyvern_act | Natural language actions |
| Click/type/select a specific element | skyvern_click / skyvern_type / skyvern_select_option | Precision targeting by selector or AI intent |
| Hover over a menu | skyvern_hover | Reveal dropdowns, tooltips, hidden content |
| Check if something is true | skyvern_validate | AI assertion ("is the user logged in?") |
| Run a quick one-off task | skyvern_run_task | Autonomous agent, one-time, nothing saved |
| Log into a website | skyvern_login | Secure login using stored credentials |
| Find stored credentials | skyvern_credential_list | Browse saved credentials by name |
| Build an automation (any multi-step task) | skyvern_workflow_create | Reusable, versioned, per-step observability |
| Run an existing automation | skyvern_workflow_run | Execute saved workflow with parameters |
| Run JavaScript | skyvern_evaluate | Read DOM state, get values |
1. **No snapshot step needed** — Skyvern tools accept natural language intent (e.g., intent="the Submit button"), so you can click, type, and interact without first capturing a page snapshot to get element refs. Playwright's browser_click requires a `ref` from a prior browser_snapshot call — Skyvern skips that step entirely.
@@ -73,7 +94,7 @@ The ONLY cases where Playwright MCP tools are appropriate instead of Skyvern:
- `browser_file_upload` — uploading files via file chooser
- `browser_tabs` — managing multiple browser tabs
- `browser_run_code` — running raw Playwright code snippets
- `browser_hover` / `browser_drag` — hovering over elements and drag-and-drop interactions
- `browser_drag` — drag-and-drop interactions
For ALL other browser interactions — navigation, clicking, typing, extraction, forms, scrolling, waiting, screenshots, validation — use Skyvern tools.
@@ -86,6 +107,7 @@ For ALL other browser interactions — navigation, clicking, typing, extraction,
| "Get all product prices" | skyvern_extract | AI-powered extraction — returns JSON, no code needed |
| "Click the login button" / "Fill out this form" | skyvern_act | Natural language actions — one call, multiple steps |
| "Click this specific element" | skyvern_click / skyvern_type / skyvern_select_option | Precision targeting by selector or AI intent |
| "Hover over this menu" | skyvern_hover | Reveal dropdowns, tooltips, hidden content |
| "Is checkout complete?" | skyvern_validate | AI assertion — returns true/false |
| "Log in and download the report" | skyvern_run_task | Autonomous AI agent — one-time, nothing saved |
| "Fill out this 6-page application form" | skyvern_workflow_create | One block per page, versioned, parameterized |
@@ -99,13 +121,21 @@ For ALL other browser interactions — navigation, clicking, typing, extraction,
1. ALWAYS use Skyvern MCP tools directly — do NOT fall back to curl, wget, Python requests, or Bash commands for web interaction. The tools ARE the interface.
2. Create a session (skyvern_session_create) before using browser tools. Workflow and block tools do NOT need a session.
3. NEVER scrape by guessing API endpoints or writing HTTP requests — use skyvern_navigate + skyvern_extract.
4. NEVER write Python scripts unless the user explicitly asks for a script. Use the MCP tools.
5. After page-changing actions (skyvern_click, skyvern_act), use skyvern_screenshot to verify the result.
6. Workflow tools (list, create, run, status) do NOT need a browser session.
7. skyvern_extract and skyvern_validate read the CURRENT page — navigate first.
4. NEVER create single-block workflows with long prompts — split into multiple blocks.
5. NEVER import from skyvern.cli.mcp_tools — use `from skyvern import Skyvern` for SDK scripts.
6. After page-changing actions (skyvern_click, skyvern_hover, skyvern_act), use skyvern_screenshot to verify the result.
7. NEVER type passwords, secrets, or credentials using any tool. Credentials must be created via the Skyvern CLI (`skyvern credentials add`) or the Skyvern web UI before use. Use `skyvern_credential_list` to find stored credentials, then `skyvern_login(credential_id=...)` to authenticate. If no credentials exist, tell the user to run `skyvern credentials add` in their terminal.
8. ALWAYS prefer cloud sessions (default). Only use local=true if the user explicitly asks for a local browser.
## Cross-Tool Dependencies
- Workflow tools (list, create, run, status) do NOT need a browser session
- Credential lookup tools (list, get, delete) do NOT need a browser session
- skyvern_login requires a browser session AND a credential_id — create credentials via `skyvern credentials add` CLI or the Skyvern web UI first
- skyvern_extract and skyvern_validate read the CURRENT page — navigate first
- skyvern_run_task is a one-off throwaway agent run — for reusable automations, use skyvern_workflow_create instead
## Tool Modes (precision tools)
Precision tools (skyvern_click, skyvern_type, skyvern_select_option, skyvern_scroll, skyvern_press_key, skyvern_wait)
Precision tools (skyvern_click, skyvern_hover, skyvern_type, skyvern_select_option, skyvern_scroll, skyvern_press_key, skyvern_wait)
support three modes. When unsure, use `intent`. For multiple actions in sequence, prefer skyvern_act.
1. **Intent mode** — AI-powered element finding:
@@ -117,6 +147,26 @@ support three modes. When unsure, use `intent`. For multiple actions in sequence
3. **Selector mode** — deterministic CSS/XPath targeting:
`skyvern_click(selector="#submit-btn")`
## Examples
| User says | Use |
|-----------|-----|
| "Go to amazon.com" | skyvern_navigate |
| "What's on this page?" | skyvern_screenshot |
| "Get all product prices" | skyvern_extract |
| "Click the login button" | skyvern_act or skyvern_click |
| "Fill out this form" | skyvern_act |
| "What credentials do I have?" | skyvern_credential_list |
| "Log into this website" | skyvern_login (secure login with stored credentials) |
| "Log in and download the report" | skyvern_run_task (one-off) or skyvern_workflow_create (keep it) |
| "Is checkout complete?" | skyvern_validate |
| "Fill out this 6-page application form" | skyvern_workflow_create (one block per page) |
| "Set up a reusable automation" | Explore with browser tools, then skyvern_workflow_create |
| "Create a workflow that monitors prices" | skyvern_workflow_create |
| "Run the login workflow" | skyvern_workflow_run |
| "Is my workflow done?" | skyvern_workflow_status |
| "Automate this process" | skyvern_workflow_create (always prefer MCP tools over scripts) |
| "Write a Python script to do this" | Skyvern SDK (ONLY when user explicitly asks for a script) |
## Getting Started
**Visiting a website**: Create a session (skyvern_session_create), navigate and interact, close with skyvern_session_close when done.
@@ -127,7 +177,17 @@ support three modes. When unsure, use `intent`. For multiple actions in sequence
**Testing feasibility** (try before you build): Walk through the site interactively — use skyvern_act on each page and skyvern_screenshot to verify results. This is faster feedback than skyvern_run_task (which runs autonomously and may take minutes). Once you've confirmed each step works, compose them into a workflow.
**Managing automations**: No browser session needed — use workflow tools directly (skyvern_workflow_list, skyvern_workflow_run, skyvern_workflow_status).
**Logging into a website** (secure credential-based login):
1. User creates credentials via CLI: `skyvern credentials add --name "Amazon" --username "user@example.com"` (password entered securely via terminal prompt)
2. Find the credential: skyvern_credential_list
3. Create a session: skyvern_session_create
4. Navigate to login page: skyvern_navigate
5. Log in: skyvern_login(credential_id="cred_...") — AI handles the full login flow
6. Verify: skyvern_screenshot
**Managing automations** (running, listing, or monitoring workflows):
No browser session needed — use workflow tools directly:
skyvern_workflow_list, skyvern_workflow_run, skyvern_workflow_status, etc.
## Building Workflows
@@ -212,12 +272,14 @@ mcp.tool()(skyvern_act)
mcp.tool()(skyvern_extract)
mcp.tool()(skyvern_validate)
mcp.tool()(skyvern_run_task)
mcp.tool()(skyvern_login)
mcp.tool()(skyvern_navigate)
mcp.tool()(skyvern_screenshot)
mcp.tool()(skyvern_evaluate)
# -- Precision tools (selector/intent-based browser primitives) --
mcp.tool()(skyvern_click)
mcp.tool()(skyvern_hover)
mcp.tool()(skyvern_type)
mcp.tool()(skyvern_scroll)
mcp.tool()(skyvern_select_option)
@@ -228,6 +290,11 @@ mcp.tool()(skyvern_wait)
mcp.tool()(skyvern_block_schema)
mcp.tool()(skyvern_block_validate)
# -- Credential lookup (no browser needed) --
mcp.tool()(skyvern_credential_list)
mcp.tool()(skyvern_credential_get)
mcp.tool()(skyvern_credential_delete)
# -- Workflow management (CRUD + execution, no browser needed) --
mcp.tool()(skyvern_workflow_list)
mcp.tool()(skyvern_workflow_get)
@@ -251,11 +318,13 @@ __all__ = [
"skyvern_extract",
"skyvern_validate",
"skyvern_run_task",
"skyvern_login",
"skyvern_navigate",
"skyvern_screenshot",
"skyvern_evaluate",
# Precision (selector/intent browser primitives)
"skyvern_click",
"skyvern_hover",
"skyvern_type",
"skyvern_scroll",
"skyvern_select_option",
@@ -264,6 +333,10 @@ __all__ = [
# Block discovery + validation
"skyvern_block_schema",
"skyvern_block_validate",
# Credential lookup
"skyvern_credential_list",
"skyvern_credential_get",
"skyvern_credential_delete",
# Workflow management
"skyvern_workflow_list",
"skyvern_workflow_get",

View File

@@ -3,12 +3,16 @@ from __future__ import annotations
import asyncio
import base64
import json
import logging
import re
from datetime import datetime, timezone
from typing import Annotated, Any
from playwright.async_api import TimeoutError as PlaywrightTimeoutError
from pydantic import Field
from skyvern.schemas.run_blocks import CredentialType
from ._common import (
ErrorCode,
Timer,
@@ -18,6 +22,24 @@ from ._common import (
)
from ._session import BrowserNotAvailableError, get_page, no_browser_error
LOG = logging.getLogger(__name__)
_PASSWORD_PATTERN = re.compile(
r"\bpass(?:word|phrase|code)s?\b|\bsecret\b|\bcredential\b|\bpin\s*(?:code)?\b|\bpwd\b|\bpasswd\b",
re.IGNORECASE,
)
_CREDENTIAL_ERROR_HINT = (
"Use skyvern_login with a stored credential to authenticate. "
"Create credentials via CLI: skyvern credentials add. "
"Never pass passwords through tool calls."
)
_JS_PASSWORD_PATTERN = re.compile(
r"""(?:type\s*=\s*['"]?password|\.type\s*===?\s*['"]password|input\[type=password\]).*?\.value\s*=""",
re.IGNORECASE,
)
def _resolve_ai_mode(
selector: str | None,
@@ -211,6 +233,96 @@ async def skyvern_click(
)
async def skyvern_hover(
session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
intent: Annotated[
str | None,
Field(
description="Natural language description of the element to hover over. Be specific: "
"'the user avatar in the top-right corner' is better than 'avatar'. "
"Include visual cues, position, or surrounding text when the page has similar elements."
),
] = None,
selector: Annotated[str | None, Field(description="CSS selector or XPath for the element to hover")] = None,
timeout: Annotated[
int,
Field(
description="Max time to wait for the element in ms. Default 30000 (30s)",
ge=1000,
le=60000,
),
] = 30000,
) -> dict[str, Any]:
"""Hover over an element to reveal tooltips, dropdown menus, or hidden content. Uses AI intent, CSS/XPath selector, or both. Unlike Playwright's browser_hover which requires a ref from a prior snapshot, this finds elements using natural language."""
ai_mode, err = _resolve_ai_mode(selector, intent)
if err:
return make_result(
"skyvern_hover",
ok=False,
error=make_error(
ErrorCode.INVALID_INPUT,
"Must provide intent, selector, or both",
"Use intent='describe what to hover' for AI-powered hovering, or selector='#css-selector' for precise targeting",
),
)
try:
page, ctx = await get_page(session_id=session_id, cdp_url=cdp_url)
except BrowserNotAvailableError:
return make_result("skyvern_hover", ok=False, error=no_browser_error())
with Timer() as timer:
try:
if ai_mode is not None:
loc = page.locator(selector=selector, prompt=intent, ai=ai_mode) # type: ignore[arg-type]
else:
assert selector is not None
loc = page.locator(selector)
await loc.hover(timeout=timeout)
timer.mark("sdk")
except PlaywrightTimeoutError as e:
return make_result(
"skyvern_hover",
ok=False,
browser_context=ctx,
timing_ms=timer.timing_ms,
error=make_error(
ErrorCode.SELECTOR_NOT_FOUND,
str(e),
"Verify the selector matches an element on the page, or use intent for AI-powered finding",
),
)
except Exception as e:
code = ErrorCode.AI_FALLBACK_FAILED if ai_mode else ErrorCode.ACTION_FAILED
return make_result(
"skyvern_hover",
ok=False,
browser_context=ctx,
timing_ms=timer.timing_ms,
error=make_error(
code,
str(e),
"The element may be hidden or not interactable",
),
)
data: dict[str, Any] = {"selector": selector, "intent": intent, "ai_mode": ai_mode}
if selector and intent:
data["sdk_equivalent"] = f'await page.locator("{selector}", prompt="{intent}").hover()'
elif ai_mode:
data["sdk_equivalent"] = f'await page.locator(prompt="{intent}").hover()'
elif selector:
data["sdk_equivalent"] = f'await page.locator("{selector}").hover()'
return make_result(
"skyvern_hover",
browser_context=ctx,
data=data,
timing_ms=timer.timing_ms,
)
async def skyvern_type(
text: Annotated[str, "Text to type into the element"],
session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
@@ -237,9 +349,23 @@ async def skyvern_type(
) -> dict[str, Any]:
"""Type text into an input field using AI intent, CSS/XPath selector, or both. Unlike Playwright's browser_type which requires a ref from a prior snapshot, this tool finds input fields using natural language — no snapshot step needed.
NEVER use this for passwords or credentials — they will be exposed in logs and conversation history. Use skyvern_login with a stored credential instead for secure authentication. Create credentials via CLI: skyvern credentials add.
For dropdowns, use skyvern_select_option instead. For pressing keys (Enter, Tab), use skyvern_press_key.
Clears existing content by default (set clear=false to append).
"""
# Block password entry — redirect to skyvern_login
target_text = f"{intent or ''} {selector or ''}"
if _PASSWORD_PATTERN.search(target_text):
return make_result(
"skyvern_type",
ok=False,
error=make_error(
ErrorCode.INVALID_INPUT,
"Cannot type into password fields — credentials must not be passed through tool calls",
_CREDENTIAL_ERROR_HINT,
),
)
ai_mode, err = _resolve_ai_mode(selector, intent)
if err:
return make_result(
@@ -257,6 +383,29 @@ async def skyvern_type(
except BrowserNotAvailableError:
return make_result("skyvern_type", ok=False, error=no_browser_error())
# DOM-level guard: check if the target element is a password field
if selector:
try:
is_password_field = await page.evaluate(
"(s) => { const el = document.querySelector(s); return el && el.type === 'password' }",
selector,
)
except Exception as exc:
# Selector may not be a valid CSS selector (e.g. xpath=...) or page may
# not be ready. Fall through to the existing regex guard in that case.
LOG.debug("DOM password check failed for selector %r: %s", selector, exc)
is_password_field = False
if is_password_field:
return make_result(
"skyvern_type",
ok=False,
error=make_error(
ErrorCode.INVALID_INPUT,
"Cannot type into password fields — credentials must not be passed through tool calls",
_CREDENTIAL_ERROR_HINT,
),
)
with Timer() as timer:
try:
if clear:
@@ -746,6 +895,18 @@ async def skyvern_evaluate(
Security: This executes arbitrary JS in the page context. Only use with trusted expressions.
"""
# Block JS that sets password field values
if _JS_PASSWORD_PATTERN.search(expression):
return make_result(
"skyvern_evaluate",
ok=False,
error=make_error(
ErrorCode.INVALID_INPUT,
"Cannot set password field values via JavaScript — credentials must not be passed through tool calls",
_CREDENTIAL_ERROR_HINT,
),
)
try:
page, ctx = await get_page(session_id=session_id, cdp_url=cdp_url)
except BrowserNotAvailableError:
@@ -872,9 +1033,22 @@ async def skyvern_act(
The AI agent interprets the prompt and executes the appropriate browser actions.
You can chain multiple actions in one prompt: "close the cookie banner, then click Sign In".
NEVER include passwords or credentials in the prompt. Use skyvern_login with a stored credential instead. Create credentials via CLI: skyvern credentials add.
For multi-step automations (4+ pages), use skyvern_workflow_create with one block per step.
For quick one-off multi-page tasks, use skyvern_run_task.
"""
# Block login/password actions — redirect to skyvern_login
if _PASSWORD_PATTERN.search(prompt):
return make_result(
"skyvern_act",
ok=False,
error=make_error(
ErrorCode.INVALID_INPUT,
"Cannot perform password/credential actions — credentials must not be passed through tool calls",
_CREDENTIAL_ERROR_HINT,
),
)
try:
page, ctx = await get_page(session_id=session_id, cdp_url=cdp_url)
except BrowserNotAvailableError:
@@ -921,6 +1095,18 @@ async def skyvern_run_task(
For anything reusable, multi-step, or worth keeping, use skyvern_workflow_create instead — it produces a versioned, rerunnable workflow with per-step observability.
For simple single-step actions on the current page, use skyvern_act instead.
"""
# Block password/credential actions — redirect to skyvern_login
if _PASSWORD_PATTERN.search(prompt):
return make_result(
"skyvern_run_task",
ok=False,
error=make_error(
ErrorCode.INVALID_INPUT,
"Cannot perform password/credential actions — credentials must not be passed through tool calls",
_CREDENTIAL_ERROR_HINT,
),
)
try:
page, ctx = await get_page(session_id=session_id, cdp_url=cdp_url)
except BrowserNotAvailableError:
@@ -975,3 +1161,155 @@ async def skyvern_run_task(
},
timing_ms=timer.timing_ms,
)
# Maps credential_type string → required fields for validation
_CREDENTIAL_REQUIRED_FIELDS: dict[CredentialType, list[str]] = {
CredentialType.skyvern: ["credential_id"],
CredentialType.bitwarden: ["bitwarden_item_id"],
CredentialType.onepassword: ["onepassword_vault_id", "onepassword_item_id"],
CredentialType.azure_vault: ["azure_vault_name", "azure_vault_username_key", "azure_vault_password_key"],
}
async def skyvern_login(
credential_type: Annotated[
str, Field(description="Credential provider: 'skyvern', 'bitwarden', '1password', or 'azure_vault'")
] = "skyvern",
session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
url: Annotated[str | None, Field(description="Login page URL. Uses current page if omitted")] = None,
credential_id: Annotated[str | None, Field(description="Skyvern credential ID (for type='skyvern')")] = None,
bitwarden_item_id: Annotated[str | None, Field(description="Bitwarden item ID (for type='bitwarden')")] = None,
bitwarden_collection_id: Annotated[str | None, Field(description="Bitwarden collection ID (optional)")] = None,
onepassword_vault_id: Annotated[str | None, Field(description="1Password vault ID (for type='1password')")] = None,
onepassword_item_id: Annotated[str | None, Field(description="1Password item ID (for type='1password')")] = None,
azure_vault_name: Annotated[str | None, Field(description="Azure Vault name (for type='azure_vault')")] = None,
azure_vault_username_key: Annotated[str | None, Field(description="Azure Vault username key")] = None,
azure_vault_password_key: Annotated[str | None, Field(description="Azure Vault password key")] = None,
azure_vault_totp_secret_key: Annotated[str | None, Field(description="Azure Vault TOTP key (optional)")] = None,
prompt: Annotated[str | None, Field(description="Additional login instructions")] = None,
totp_identifier: Annotated[str | None, Field(description="TOTP identifier for 2FA")] = None,
totp_url: Annotated[str | None, Field(description="URL to fetch TOTP codes")] = None,
timeout_seconds: Annotated[int, Field(description="Timeout in seconds (default 180)", ge=10, le=600)] = 180,
) -> dict[str, Any]:
"""Log into a website using stored credentials from Skyvern, Bitwarden, 1Password, or Azure Vault. Passwords are never exposed in prompts.
Requires a browser session. The AI agent handles the full login flow — finding fields, entering credentials, handling 2FA — so you don't need to write selectors.
After login, use skyvern_screenshot to verify success, then continue with other browser tools.
"""
# Validate credential_type
try:
cred_type = CredentialType(credential_type)
except ValueError:
valid = ", ".join(f"'{v.value}'" for v in CredentialType)
return make_result(
"skyvern_login",
ok=False,
error=make_error(
ErrorCode.INVALID_INPUT,
f"Invalid credential_type: '{credential_type}'",
f"Use one of: {valid}",
),
)
# Validate required fields per credential type
local_vars = {
"credential_id": credential_id,
"bitwarden_item_id": bitwarden_item_id,
"onepassword_vault_id": onepassword_vault_id,
"onepassword_item_id": onepassword_item_id,
"azure_vault_name": azure_vault_name,
"azure_vault_username_key": azure_vault_username_key,
"azure_vault_password_key": azure_vault_password_key,
}
missing = [f for f in _CREDENTIAL_REQUIRED_FIELDS[cred_type] if not local_vars.get(f)]
if missing:
return make_result(
"skyvern_login",
ok=False,
error=make_error(
ErrorCode.INVALID_INPUT,
f"Missing required fields for credential_type='{cred_type.value}': {', '.join(missing)}",
f"Provide: {', '.join(missing)}",
),
)
try:
page, ctx = await get_page(session_id=session_id, cdp_url=cdp_url)
except BrowserNotAvailableError:
return make_result("skyvern_login", ok=False, error=no_browser_error())
# Common kwargs shared across all credential types
_common_kwargs: dict[str, Any] = {"url": url, "prompt": prompt, "timeout": timeout_seconds}
if totp_identifier is not None:
_common_kwargs["totp_identifier"] = totp_identifier
if totp_url is not None:
_common_kwargs["totp_url"] = totp_url
with Timer() as timer:
try:
# Dispatch per credential type to satisfy mypy's overloaded signatures
if cred_type == CredentialType.skyvern:
assert credential_id is not None
response = await page.agent.login(
credential_type=CredentialType.skyvern,
credential_id=credential_id,
**_common_kwargs,
)
elif cred_type == CredentialType.bitwarden:
assert bitwarden_item_id is not None
response = await page.agent.login(
credential_type=CredentialType.bitwarden,
bitwarden_item_id=bitwarden_item_id,
bitwarden_collection_id=bitwarden_collection_id,
**_common_kwargs,
)
elif cred_type == CredentialType.onepassword:
assert onepassword_vault_id is not None and onepassword_item_id is not None
response = await page.agent.login(
credential_type=CredentialType.onepassword,
onepassword_vault_id=onepassword_vault_id,
onepassword_item_id=onepassword_item_id,
**_common_kwargs,
)
else:
assert azure_vault_name is not None
assert azure_vault_username_key is not None
assert azure_vault_password_key is not None
response = await page.agent.login(
credential_type=CredentialType.azure_vault,
azure_vault_name=azure_vault_name,
azure_vault_username_key=azure_vault_username_key,
azure_vault_password_key=azure_vault_password_key,
azure_vault_totp_secret_key=azure_vault_totp_secret_key,
**_common_kwargs,
)
timer.mark("sdk")
except Exception as e:
return make_result(
"skyvern_login",
ok=False,
browser_context=ctx,
timing_ms=timer.timing_ms,
error=make_error(
ErrorCode.SDK_ERROR,
str(e),
"Check credential_type and required fields for your credential provider",
),
)
return make_result(
"skyvern_login",
browser_context=ctx,
data={
"run_id": response.run_id,
"status": response.status,
"output": response.output,
"failure_reason": response.failure_reason,
"recording_url": response.recording_url,
"app_url": response.app_url,
"sdk_equivalent": f"await page.agent.login(credential_type=CredentialType.{cred_type.name})",
},
timing_ms=timer.timing_ms,
)

View File

@@ -0,0 +1,303 @@
"""Skyvern MCP credential tools — CRUD for stored credentials.
Tools for listing, creating, and deleting credentials stored in Skyvern.
Credentials are used with skyvern_login to authenticate on websites without
exposing passwords in prompts. These tools do not require a browser session.
"""
from __future__ import annotations
from typing import Annotated, Any
from pydantic import Field
from skyvern.client.core.api_error import ApiError
from ._common import ErrorCode, Timer, make_error, make_result
from ._session import get_skyvern
def _not_found_error(tool: str, credential_id: str, timer: Timer) -> dict[str, Any]:
return make_result(
tool,
ok=False,
timing_ms=timer.timing_ms,
error=make_error(
ErrorCode.INVALID_INPUT,
f"Credential not found: {credential_id}",
"Use skyvern_credential_list to find valid credential IDs",
),
)
def _validate_credential_id(credential_id: str, tool: str) -> dict[str, Any] | None:
if "/" in credential_id or "\\" in credential_id:
return make_result(
tool,
ok=False,
error=make_error(
ErrorCode.INVALID_INPUT,
"credential_id must not contain path separators",
"Provide a valid credential ID (starts with cred_)",
),
)
if not credential_id.startswith("cred_"):
return make_result(
tool,
ok=False,
error=make_error(
ErrorCode.INVALID_INPUT,
f"Invalid credential_id format: {credential_id!r}",
"Credential IDs start with cred_. Use skyvern_credential_list to find valid IDs.",
),
)
return None
def _serialize_credential(cred: Any) -> dict[str, Any]:
"""Pick the fields we expose from a CredentialResponse.
Uses Any to avoid tight coupling with Fern-generated client types.
Passwords and secrets are never returned — only metadata.
"""
data: dict[str, Any] = {
"credential_id": cred.credential_id,
"name": cred.name,
"credential_type": str(cred.credential_type),
}
# Serialize the credential metadata (no secrets)
c = cred.credential
if hasattr(c, "username"):
data["username"] = c.username
data["totp_type"] = str(c.totp_type) if hasattr(c, "totp_type") and c.totp_type else None
elif hasattr(c, "last_four"):
data["card_last_four"] = c.last_four
data["card_brand"] = c.brand
elif hasattr(c, "secret_label"):
data["secret_label"] = c.secret_label
return data
async def skyvern_credential_list(
page: Annotated[int, Field(description="Page number (1-based)", ge=1)] = 1,
page_size: Annotated[int, Field(description="Results per page", ge=1, le=100)] = 10,
) -> dict[str, Any]:
"""List stored credentials. Returns credential IDs and names — never passwords or secrets.
Use this to find a credential_id for skyvern_login. Credentials are stored securely in Skyvern's vault.
"""
skyvern = get_skyvern()
with Timer() as timer:
try:
credentials = await skyvern.get_credentials(page=page, page_size=page_size)
timer.mark("sdk")
except Exception as e:
return make_result(
"skyvern_credential_list",
ok=False,
timing_ms=timer.timing_ms,
error=make_error(ErrorCode.API_ERROR, str(e), "Check your API key and Skyvern connection"),
)
return make_result(
"skyvern_credential_list",
data={
"credentials": [_serialize_credential(c) for c in credentials],
"page": page,
"page_size": page_size,
"count": len(credentials),
"has_more": len(credentials) == page_size,
},
timing_ms=timer.timing_ms,
)
# NOTE: Intentionally NOT registered as an MCP tool. Passwords must never flow through
# MCP tool calls. Credential creation happens via CLI (`skyvern credentials add`) or
# web UI. This function is preserved for programmatic SDK use only.
async def skyvern_credential_create(
name: Annotated[str, Field(description="Human-readable name (e.g., 'Amazon Login', 'Salesforce Prod')")],
credential_type: Annotated[
str,
Field(description="Type of credential: 'password', 'credit_card', or 'secret'"),
] = "password",
username: Annotated[str | None, Field(description="Username or email (required for password type)")] = None,
password: Annotated[str | None, Field(description="Password (required for password type)")] = None,
totp: Annotated[str | None, Field(description="TOTP secret for 2FA (e.g., 'JBSWY3DPEHPK3PXP')")] = None,
card_number: Annotated[str | None, Field(description="Full card number (for credit_card type)")] = None,
card_cvv: Annotated[str | None, Field(description="Card CVV (for credit_card type)")] = None,
card_exp_month: Annotated[str | None, Field(description="Expiration month (for credit_card type)")] = None,
card_exp_year: Annotated[str | None, Field(description="Expiration year (for credit_card type)")] = None,
card_brand: Annotated[str | None, Field(description="Card brand, e.g. 'visa' (for credit_card type)")] = None,
card_holder_name: Annotated[str | None, Field(description="Cardholder name (for credit_card type)")] = None,
secret_value: Annotated[str | None, Field(description="Secret value (for secret type)")] = None,
secret_label: Annotated[str | None, Field(description="Label for the secret (for secret type)")] = None,
) -> dict[str, Any]:
"""Store a credential securely in Skyvern's vault. Returns a credential_id for use with skyvern_login.
The credential is encrypted and stored server-side. After creation, only metadata (username, card last 4) is returned — never the password or secret itself.
"""
valid_types = ("password", "credit_card", "secret")
if credential_type not in valid_types:
return make_result(
"skyvern_credential_create",
ok=False,
error=make_error(
ErrorCode.INVALID_INPUT,
f"Invalid credential_type: '{credential_type}'",
f"Use one of: {', '.join(valid_types)}",
),
)
# Build credential payload per type
credential_data: dict[str, Any]
if credential_type == "password":
if not username or not password:
return make_result(
"skyvern_credential_create",
ok=False,
error=make_error(
ErrorCode.INVALID_INPUT,
"username and password are required for credential_type='password'",
"Provide both username and password",
),
)
credential_data = {"username": username, "password": password}
if totp:
credential_data["totp"] = totp
elif credential_type == "credit_card":
cc_fields = {
"card_number": card_number,
"card_cvv": card_cvv,
"card_exp_month": card_exp_month,
"card_exp_year": card_exp_year,
"card_brand": card_brand,
"card_holder_name": card_holder_name,
}
missing = [k for k, v in cc_fields.items() if not v]
if missing:
return make_result(
"skyvern_credential_create",
ok=False,
error=make_error(
ErrorCode.INVALID_INPUT,
f"Missing required fields for credit_card: {', '.join(missing)}",
f"Provide: {', '.join(missing)}",
),
)
credential_data = cc_fields # type: ignore[assignment]
else:
if not secret_value:
return make_result(
"skyvern_credential_create",
ok=False,
error=make_error(
ErrorCode.INVALID_INPUT,
"secret_value is required for credential_type='secret'",
"Provide secret_value",
),
)
credential_data = {"secret_value": secret_value}
if secret_label:
credential_data["secret_label"] = secret_label
skyvern = get_skyvern()
with Timer() as timer:
try:
result = await skyvern.create_credential(
name=name,
credential_type=credential_type, # type: ignore[arg-type]
credential=credential_data, # type: ignore[arg-type]
)
timer.mark("sdk")
except Exception as e:
return make_result(
"skyvern_credential_create",
ok=False,
timing_ms=timer.timing_ms,
error=make_error(ErrorCode.API_ERROR, str(e), "Check your API key and credential data"),
)
return make_result(
"skyvern_credential_create",
data=_serialize_credential(result),
timing_ms=timer.timing_ms,
)
async def skyvern_credential_get(
credential_id: Annotated[str, Field(description="Credential ID (starts with cred_)")],
) -> dict[str, Any]:
"""Get a stored credential's metadata by ID. Returns name, type, and username — never the password or secret."""
if err := _validate_credential_id(credential_id, "skyvern_credential_get"):
return err
skyvern = get_skyvern()
with Timer() as timer:
try:
result = await skyvern.get_credential(credential_id)
timer.mark("sdk")
except ApiError as e:
if e.status_code == 404:
return _not_found_error("skyvern_credential_get", credential_id, timer)
return make_result(
"skyvern_credential_get",
ok=False,
timing_ms=timer.timing_ms,
error=make_error(ErrorCode.API_ERROR, str(e), "Check your API key and Skyvern connection"),
)
except Exception as e:
return make_result(
"skyvern_credential_get",
ok=False,
timing_ms=timer.timing_ms,
error=make_error(ErrorCode.API_ERROR, str(e), "Check your API key and Skyvern connection"),
)
return make_result(
"skyvern_credential_get",
data=_serialize_credential(result),
timing_ms=timer.timing_ms,
)
async def skyvern_credential_delete(
credential_id: Annotated[str, Field(description="Credential ID to delete (starts with cred_)")],
) -> dict[str, Any]:
"""Permanently delete a stored credential. This cannot be undone."""
if err := _validate_credential_id(credential_id, "skyvern_credential_delete"):
return err
skyvern = get_skyvern()
with Timer() as timer:
try:
await skyvern.delete_credential(credential_id)
timer.mark("sdk")
except ApiError as e:
if e.status_code == 404:
return _not_found_error("skyvern_credential_delete", credential_id, timer)
return make_result(
"skyvern_credential_delete",
ok=False,
timing_ms=timer.timing_ms,
error=make_error(ErrorCode.API_ERROR, str(e), "Check your API key and Skyvern connection"),
)
except Exception as e:
return make_result(
"skyvern_credential_delete",
ok=False,
timing_ms=timer.timing_ms,
error=make_error(ErrorCode.API_ERROR, str(e), "Check your API key and Skyvern connection"),
)
return make_result(
"skyvern_credential_delete",
data={"credential_id": credential_id, "deleted": True},
timing_ms=timer.timing_ms,
)