Remove setup.sh in favor of skyvern CLI (#4737)

This commit is contained in:
Shuchang Zheng
2026-02-12 20:43:27 -08:00
committed by GitHub
parent 08d3b04d14
commit 155c07f8be
77 changed files with 12358 additions and 10 deletions

View File

@@ -3,12 +3,16 @@ from __future__ import annotations
import asyncio
import base64
import json
import logging
import re
from datetime import datetime, timezone
from typing import Annotated, Any
from playwright.async_api import TimeoutError as PlaywrightTimeoutError
from pydantic import Field
from skyvern.schemas.run_blocks import CredentialType
from ._common import (
ErrorCode,
Timer,
@@ -18,6 +22,24 @@ from ._common import (
)
from ._session import BrowserNotAvailableError, get_page, no_browser_error
LOG = logging.getLogger(__name__)
_PASSWORD_PATTERN = re.compile(
r"\bpass(?:word|phrase|code)s?\b|\bsecret\b|\bcredential\b|\bpin\s*(?:code)?\b|\bpwd\b|\bpasswd\b",
re.IGNORECASE,
)
_CREDENTIAL_ERROR_HINT = (
"Use skyvern_login with a stored credential to authenticate. "
"Create credentials via CLI: skyvern credentials add. "
"Never pass passwords through tool calls."
)
_JS_PASSWORD_PATTERN = re.compile(
r"""(?:type\s*=\s*['"]?password|\.type\s*===?\s*['"]password|input\[type=password\]).*?\.value\s*=""",
re.IGNORECASE,
)
def _resolve_ai_mode(
selector: str | None,
@@ -211,6 +233,96 @@ async def skyvern_click(
)
async def skyvern_hover(
session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
intent: Annotated[
str | None,
Field(
description="Natural language description of the element to hover over. Be specific: "
"'the user avatar in the top-right corner' is better than 'avatar'. "
"Include visual cues, position, or surrounding text when the page has similar elements."
),
] = None,
selector: Annotated[str | None, Field(description="CSS selector or XPath for the element to hover")] = None,
timeout: Annotated[
int,
Field(
description="Max time to wait for the element in ms. Default 30000 (30s)",
ge=1000,
le=60000,
),
] = 30000,
) -> dict[str, Any]:
"""Hover over an element to reveal tooltips, dropdown menus, or hidden content. Uses AI intent, CSS/XPath selector, or both. Unlike Playwright's browser_hover which requires a ref from a prior snapshot, this finds elements using natural language."""
ai_mode, err = _resolve_ai_mode(selector, intent)
if err:
return make_result(
"skyvern_hover",
ok=False,
error=make_error(
ErrorCode.INVALID_INPUT,
"Must provide intent, selector, or both",
"Use intent='describe what to hover' for AI-powered hovering, or selector='#css-selector' for precise targeting",
),
)
try:
page, ctx = await get_page(session_id=session_id, cdp_url=cdp_url)
except BrowserNotAvailableError:
return make_result("skyvern_hover", ok=False, error=no_browser_error())
with Timer() as timer:
try:
if ai_mode is not None:
loc = page.locator(selector=selector, prompt=intent, ai=ai_mode) # type: ignore[arg-type]
else:
assert selector is not None
loc = page.locator(selector)
await loc.hover(timeout=timeout)
timer.mark("sdk")
except PlaywrightTimeoutError as e:
return make_result(
"skyvern_hover",
ok=False,
browser_context=ctx,
timing_ms=timer.timing_ms,
error=make_error(
ErrorCode.SELECTOR_NOT_FOUND,
str(e),
"Verify the selector matches an element on the page, or use intent for AI-powered finding",
),
)
except Exception as e:
code = ErrorCode.AI_FALLBACK_FAILED if ai_mode else ErrorCode.ACTION_FAILED
return make_result(
"skyvern_hover",
ok=False,
browser_context=ctx,
timing_ms=timer.timing_ms,
error=make_error(
code,
str(e),
"The element may be hidden or not interactable",
),
)
data: dict[str, Any] = {"selector": selector, "intent": intent, "ai_mode": ai_mode}
if selector and intent:
data["sdk_equivalent"] = f'await page.locator("{selector}", prompt="{intent}").hover()'
elif ai_mode:
data["sdk_equivalent"] = f'await page.locator(prompt="{intent}").hover()'
elif selector:
data["sdk_equivalent"] = f'await page.locator("{selector}").hover()'
return make_result(
"skyvern_hover",
browser_context=ctx,
data=data,
timing_ms=timer.timing_ms,
)
async def skyvern_type(
text: Annotated[str, "Text to type into the element"],
session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
@@ -237,9 +349,23 @@ async def skyvern_type(
) -> dict[str, Any]:
"""Type text into an input field using AI intent, CSS/XPath selector, or both. Unlike Playwright's browser_type which requires a ref from a prior snapshot, this tool finds input fields using natural language — no snapshot step needed.
NEVER use this for passwords or credentials — they will be exposed in logs and conversation history. Use skyvern_login with a stored credential instead for secure authentication. Create credentials via CLI: skyvern credentials add.
For dropdowns, use skyvern_select_option instead. For pressing keys (Enter, Tab), use skyvern_press_key.
Clears existing content by default (set clear=false to append).
"""
# Block password entry — redirect to skyvern_login
target_text = f"{intent or ''} {selector or ''}"
if _PASSWORD_PATTERN.search(target_text):
return make_result(
"skyvern_type",
ok=False,
error=make_error(
ErrorCode.INVALID_INPUT,
"Cannot type into password fields — credentials must not be passed through tool calls",
_CREDENTIAL_ERROR_HINT,
),
)
ai_mode, err = _resolve_ai_mode(selector, intent)
if err:
return make_result(
@@ -257,6 +383,29 @@ async def skyvern_type(
except BrowserNotAvailableError:
return make_result("skyvern_type", ok=False, error=no_browser_error())
# DOM-level guard: check if the target element is a password field
if selector:
try:
is_password_field = await page.evaluate(
"(s) => { const el = document.querySelector(s); return el && el.type === 'password' }",
selector,
)
except Exception as exc:
# Selector may not be a valid CSS selector (e.g. xpath=...) or page may
# not be ready. Fall through to the existing regex guard in that case.
LOG.debug("DOM password check failed for selector %r: %s", selector, exc)
is_password_field = False
if is_password_field:
return make_result(
"skyvern_type",
ok=False,
error=make_error(
ErrorCode.INVALID_INPUT,
"Cannot type into password fields — credentials must not be passed through tool calls",
_CREDENTIAL_ERROR_HINT,
),
)
with Timer() as timer:
try:
if clear:
@@ -746,6 +895,18 @@ async def skyvern_evaluate(
Security: This executes arbitrary JS in the page context. Only use with trusted expressions.
"""
# Block JS that sets password field values
if _JS_PASSWORD_PATTERN.search(expression):
return make_result(
"skyvern_evaluate",
ok=False,
error=make_error(
ErrorCode.INVALID_INPUT,
"Cannot set password field values via JavaScript — credentials must not be passed through tool calls",
_CREDENTIAL_ERROR_HINT,
),
)
try:
page, ctx = await get_page(session_id=session_id, cdp_url=cdp_url)
except BrowserNotAvailableError:
@@ -872,9 +1033,22 @@ async def skyvern_act(
The AI agent interprets the prompt and executes the appropriate browser actions.
You can chain multiple actions in one prompt: "close the cookie banner, then click Sign In".
NEVER include passwords or credentials in the prompt. Use skyvern_login with a stored credential instead. Create credentials via CLI: skyvern credentials add.
For multi-step automations (4+ pages), use skyvern_workflow_create with one block per step.
For quick one-off multi-page tasks, use skyvern_run_task.
"""
# Block login/password actions — redirect to skyvern_login
if _PASSWORD_PATTERN.search(prompt):
return make_result(
"skyvern_act",
ok=False,
error=make_error(
ErrorCode.INVALID_INPUT,
"Cannot perform password/credential actions — credentials must not be passed through tool calls",
_CREDENTIAL_ERROR_HINT,
),
)
try:
page, ctx = await get_page(session_id=session_id, cdp_url=cdp_url)
except BrowserNotAvailableError:
@@ -921,6 +1095,18 @@ async def skyvern_run_task(
For anything reusable, multi-step, or worth keeping, use skyvern_workflow_create instead — it produces a versioned, rerunnable workflow with per-step observability.
For simple single-step actions on the current page, use skyvern_act instead.
"""
# Block password/credential actions — redirect to skyvern_login
if _PASSWORD_PATTERN.search(prompt):
return make_result(
"skyvern_run_task",
ok=False,
error=make_error(
ErrorCode.INVALID_INPUT,
"Cannot perform password/credential actions — credentials must not be passed through tool calls",
_CREDENTIAL_ERROR_HINT,
),
)
try:
page, ctx = await get_page(session_id=session_id, cdp_url=cdp_url)
except BrowserNotAvailableError:
@@ -975,3 +1161,155 @@ async def skyvern_run_task(
},
timing_ms=timer.timing_ms,
)
# Maps credential_type string → required fields for validation
_CREDENTIAL_REQUIRED_FIELDS: dict[CredentialType, list[str]] = {
CredentialType.skyvern: ["credential_id"],
CredentialType.bitwarden: ["bitwarden_item_id"],
CredentialType.onepassword: ["onepassword_vault_id", "onepassword_item_id"],
CredentialType.azure_vault: ["azure_vault_name", "azure_vault_username_key", "azure_vault_password_key"],
}
async def skyvern_login(
credential_type: Annotated[
str, Field(description="Credential provider: 'skyvern', 'bitwarden', '1password', or 'azure_vault'")
] = "skyvern",
session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
url: Annotated[str | None, Field(description="Login page URL. Uses current page if omitted")] = None,
credential_id: Annotated[str | None, Field(description="Skyvern credential ID (for type='skyvern')")] = None,
bitwarden_item_id: Annotated[str | None, Field(description="Bitwarden item ID (for type='bitwarden')")] = None,
bitwarden_collection_id: Annotated[str | None, Field(description="Bitwarden collection ID (optional)")] = None,
onepassword_vault_id: Annotated[str | None, Field(description="1Password vault ID (for type='1password')")] = None,
onepassword_item_id: Annotated[str | None, Field(description="1Password item ID (for type='1password')")] = None,
azure_vault_name: Annotated[str | None, Field(description="Azure Vault name (for type='azure_vault')")] = None,
azure_vault_username_key: Annotated[str | None, Field(description="Azure Vault username key")] = None,
azure_vault_password_key: Annotated[str | None, Field(description="Azure Vault password key")] = None,
azure_vault_totp_secret_key: Annotated[str | None, Field(description="Azure Vault TOTP key (optional)")] = None,
prompt: Annotated[str | None, Field(description="Additional login instructions")] = None,
totp_identifier: Annotated[str | None, Field(description="TOTP identifier for 2FA")] = None,
totp_url: Annotated[str | None, Field(description="URL to fetch TOTP codes")] = None,
timeout_seconds: Annotated[int, Field(description="Timeout in seconds (default 180)", ge=10, le=600)] = 180,
) -> dict[str, Any]:
"""Log into a website using stored credentials from Skyvern, Bitwarden, 1Password, or Azure Vault. Passwords are never exposed in prompts.
Requires a browser session. The AI agent handles the full login flow — finding fields, entering credentials, handling 2FA — so you don't need to write selectors.
After login, use skyvern_screenshot to verify success, then continue with other browser tools.
"""
# Validate credential_type
try:
cred_type = CredentialType(credential_type)
except ValueError:
valid = ", ".join(f"'{v.value}'" for v in CredentialType)
return make_result(
"skyvern_login",
ok=False,
error=make_error(
ErrorCode.INVALID_INPUT,
f"Invalid credential_type: '{credential_type}'",
f"Use one of: {valid}",
),
)
# Validate required fields per credential type
local_vars = {
"credential_id": credential_id,
"bitwarden_item_id": bitwarden_item_id,
"onepassword_vault_id": onepassword_vault_id,
"onepassword_item_id": onepassword_item_id,
"azure_vault_name": azure_vault_name,
"azure_vault_username_key": azure_vault_username_key,
"azure_vault_password_key": azure_vault_password_key,
}
missing = [f for f in _CREDENTIAL_REQUIRED_FIELDS[cred_type] if not local_vars.get(f)]
if missing:
return make_result(
"skyvern_login",
ok=False,
error=make_error(
ErrorCode.INVALID_INPUT,
f"Missing required fields for credential_type='{cred_type.value}': {', '.join(missing)}",
f"Provide: {', '.join(missing)}",
),
)
try:
page, ctx = await get_page(session_id=session_id, cdp_url=cdp_url)
except BrowserNotAvailableError:
return make_result("skyvern_login", ok=False, error=no_browser_error())
# Common kwargs shared across all credential types
_common_kwargs: dict[str, Any] = {"url": url, "prompt": prompt, "timeout": timeout_seconds}
if totp_identifier is not None:
_common_kwargs["totp_identifier"] = totp_identifier
if totp_url is not None:
_common_kwargs["totp_url"] = totp_url
with Timer() as timer:
try:
# Dispatch per credential type to satisfy mypy's overloaded signatures
if cred_type == CredentialType.skyvern:
assert credential_id is not None
response = await page.agent.login(
credential_type=CredentialType.skyvern,
credential_id=credential_id,
**_common_kwargs,
)
elif cred_type == CredentialType.bitwarden:
assert bitwarden_item_id is not None
response = await page.agent.login(
credential_type=CredentialType.bitwarden,
bitwarden_item_id=bitwarden_item_id,
bitwarden_collection_id=bitwarden_collection_id,
**_common_kwargs,
)
elif cred_type == CredentialType.onepassword:
assert onepassword_vault_id is not None and onepassword_item_id is not None
response = await page.agent.login(
credential_type=CredentialType.onepassword,
onepassword_vault_id=onepassword_vault_id,
onepassword_item_id=onepassword_item_id,
**_common_kwargs,
)
else:
assert azure_vault_name is not None
assert azure_vault_username_key is not None
assert azure_vault_password_key is not None
response = await page.agent.login(
credential_type=CredentialType.azure_vault,
azure_vault_name=azure_vault_name,
azure_vault_username_key=azure_vault_username_key,
azure_vault_password_key=azure_vault_password_key,
azure_vault_totp_secret_key=azure_vault_totp_secret_key,
**_common_kwargs,
)
timer.mark("sdk")
except Exception as e:
return make_result(
"skyvern_login",
ok=False,
browser_context=ctx,
timing_ms=timer.timing_ms,
error=make_error(
ErrorCode.SDK_ERROR,
str(e),
"Check credential_type and required fields for your credential provider",
),
)
return make_result(
"skyvern_login",
browser_context=ctx,
data={
"run_id": response.run_id,
"status": response.status,
"output": response.output,
"failure_reason": response.failure_reason,
"recording_url": response.recording_url,
"app_url": response.app_url,
"sdk_equivalent": f"await page.agent.login(credential_type=CredentialType.{cred_type.name})",
},
timing_ms=timer.timing_ms,
)