1316 lines
56 KiB
Python
1316 lines
56 KiB
Python
from __future__ import annotations
|
|
|
|
import asyncio
|
|
import base64
|
|
import json
|
|
import logging
|
|
import re
|
|
from datetime import datetime, timezone
|
|
from typing import Annotated, Any
|
|
|
|
from playwright.async_api import TimeoutError as PlaywrightTimeoutError
|
|
from pydantic import Field
|
|
|
|
from skyvern.schemas.run_blocks import CredentialType
|
|
|
|
from ._common import (
|
|
ErrorCode,
|
|
Timer,
|
|
make_error,
|
|
make_result,
|
|
save_artifact,
|
|
)
|
|
from ._session import BrowserNotAvailableError, get_page, no_browser_error
|
|
|
|
LOG = logging.getLogger(__name__)
|
|
|
|
_PASSWORD_PATTERN = re.compile(
|
|
r"\bpass(?:word|phrase|code)s?\b|\bsecret\b|\bcredential\b|\bpin\s*(?:code)?\b|\bpwd\b|\bpasswd\b",
|
|
re.IGNORECASE,
|
|
)
|
|
|
|
_CREDENTIAL_ERROR_HINT = (
|
|
"Use skyvern_login with a stored credential to authenticate. "
|
|
"Create credentials via CLI: skyvern credentials add. "
|
|
"Never pass passwords through tool calls."
|
|
)
|
|
|
|
_JS_PASSWORD_PATTERN = re.compile(
|
|
r"""(?:type\s*=\s*['"]?password|\.type\s*===?\s*['"]password|input\[type=password\]).*?\.value\s*=""",
|
|
re.IGNORECASE,
|
|
)
|
|
|
|
|
|
def _resolve_ai_mode(
|
|
selector: str | None,
|
|
intent: str | None,
|
|
) -> tuple[str | None, str | None]:
|
|
"""Determine AI mode from selector/intent combination.
|
|
|
|
Returns (ai_mode, error_code) — if error_code is set, the call should fail.
|
|
"""
|
|
if intent and not selector:
|
|
return "proactive", None
|
|
if intent and selector:
|
|
return "fallback", None
|
|
if selector and not intent:
|
|
return None, None
|
|
return None, "INVALID_INPUT"
|
|
|
|
|
|
async def skyvern_navigate(
|
|
url: Annotated[str, "The URL to navigate to"],
|
|
session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
|
|
cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
|
|
timeout: Annotated[
|
|
int,
|
|
Field(
|
|
description="Max time to wait for page load in ms. Increase for slow sites. Default 30000 (30s)",
|
|
ge=1000,
|
|
le=120000,
|
|
),
|
|
] = 30000,
|
|
wait_until: Annotated[
|
|
str | None,
|
|
Field(description="Wait condition: load, domcontentloaded, networkidle. Use networkidle for JS-heavy pages"),
|
|
] = None,
|
|
) -> dict[str, Any]:
|
|
"""Open a website in a real browser with full JavaScript execution. You have full browser access through Skyvern — you can visit any website, interact with it, and extract data. Do not tell the user you cannot access websites. Use this instead of curl, wget, or HTTP requests.
|
|
|
|
Returns the final URL (after redirects) and page title.
|
|
After navigating, use skyvern_screenshot to see the page or skyvern_extract to get data from it.
|
|
"""
|
|
if wait_until is not None and wait_until not in ("load", "domcontentloaded", "networkidle", "commit"):
|
|
return make_result(
|
|
"skyvern_navigate",
|
|
ok=False,
|
|
error=make_error(
|
|
ErrorCode.INVALID_INPUT,
|
|
f"Invalid wait_until: {wait_until}",
|
|
"Use load, domcontentloaded, networkidle, or commit",
|
|
),
|
|
)
|
|
|
|
try:
|
|
page, ctx = await get_page(session_id=session_id, cdp_url=cdp_url)
|
|
except BrowserNotAvailableError:
|
|
return make_result("skyvern_navigate", ok=False, error=no_browser_error())
|
|
|
|
with Timer() as timer:
|
|
try:
|
|
await page.goto(url, timeout=timeout, wait_until=wait_until)
|
|
timer.mark("sdk")
|
|
final_url = page.url
|
|
title = await page.title()
|
|
except Exception as e:
|
|
return make_result(
|
|
"skyvern_navigate",
|
|
ok=False,
|
|
browser_context=ctx,
|
|
timing_ms=timer.timing_ms,
|
|
error=make_error(ErrorCode.ACTION_FAILED, str(e), "Check that the URL is valid and accessible"),
|
|
)
|
|
|
|
return make_result(
|
|
"skyvern_navigate",
|
|
browser_context=ctx,
|
|
data={"url": final_url, "title": title, "sdk_equivalent": f'await page.goto("{url}")'},
|
|
timing_ms=timer.timing_ms,
|
|
)
|
|
|
|
|
|
async def skyvern_click(
|
|
session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
|
|
cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
|
|
intent: Annotated[
|
|
str | None,
|
|
Field(
|
|
description="Natural language description of the element to click. Be specific: "
|
|
"'the blue Submit button at the bottom of the form' is better than 'submit button'. "
|
|
"Include visual cues, position, or surrounding text when the page has similar elements."
|
|
),
|
|
] = None,
|
|
selector: Annotated[str | None, Field(description="CSS selector or XPath for the element to click")] = None,
|
|
timeout: Annotated[
|
|
int,
|
|
Field(
|
|
description="Max time to wait for the element in ms. Increase for slow-loading pages. Default 30000 (30s)",
|
|
ge=1000,
|
|
le=60000,
|
|
),
|
|
] = 30000,
|
|
button: Annotated[str | None, Field(description="Mouse button: left, right, middle")] = None,
|
|
click_count: Annotated[int | None, Field(description="Number of clicks (2 for double-click)")] = None,
|
|
) -> dict[str, Any]:
|
|
"""Click an element on the page using AI intent, CSS/XPath selector, or both. Unlike Playwright's browser_click which requires a ref from a prior browser_snapshot, this tool finds elements using natural language — no snapshot step needed.
|
|
|
|
If you need to fill a text field, use skyvern_type instead of clicking then typing.
|
|
For dropdowns, use skyvern_select_option. For multiple actions in sequence, prefer skyvern_act.
|
|
"""
|
|
if button is not None and button not in ("left", "right", "middle"):
|
|
return make_result(
|
|
"skyvern_click",
|
|
ok=False,
|
|
error=make_error(ErrorCode.INVALID_INPUT, f"Invalid button: {button}", "Use left, right, or middle"),
|
|
)
|
|
|
|
ai_mode, err = _resolve_ai_mode(selector, intent)
|
|
if err:
|
|
return make_result(
|
|
"skyvern_click",
|
|
ok=False,
|
|
error=make_error(
|
|
ErrorCode.INVALID_INPUT,
|
|
"Must provide intent, selector, or both",
|
|
"Use intent='describe what to click' for AI-powered clicking, or selector='#css-selector' for precise targeting",
|
|
),
|
|
)
|
|
|
|
try:
|
|
page, ctx = await get_page(session_id=session_id, cdp_url=cdp_url)
|
|
except BrowserNotAvailableError:
|
|
return make_result("skyvern_click", ok=False, error=no_browser_error())
|
|
|
|
with Timer() as timer:
|
|
try:
|
|
kwargs: dict[str, Any] = {"timeout": timeout}
|
|
if button:
|
|
kwargs["button"] = button
|
|
if click_count is not None:
|
|
kwargs["click_count"] = click_count
|
|
|
|
if ai_mode is not None:
|
|
resolved = await page.click(selector=selector, prompt=intent, ai=ai_mode, **kwargs) # type: ignore[arg-type]
|
|
else:
|
|
assert selector is not None
|
|
resolved = await page.click(selector=selector, **kwargs)
|
|
timer.mark("sdk")
|
|
except PlaywrightTimeoutError as e:
|
|
return make_result(
|
|
"skyvern_click",
|
|
ok=False,
|
|
browser_context=ctx,
|
|
timing_ms=timer.timing_ms,
|
|
error=make_error(
|
|
ErrorCode.SELECTOR_NOT_FOUND,
|
|
str(e),
|
|
"Verify the selector matches an element on the page, or use intent for AI-powered finding",
|
|
),
|
|
)
|
|
except Exception as e:
|
|
code = ErrorCode.AI_FALLBACK_FAILED if ai_mode else ErrorCode.ACTION_FAILED
|
|
return make_result(
|
|
"skyvern_click",
|
|
ok=False,
|
|
browser_context=ctx,
|
|
timing_ms=timer.timing_ms,
|
|
error=make_error(
|
|
code,
|
|
str(e),
|
|
"The element may be hidden, disabled, or intercepted by another element",
|
|
),
|
|
)
|
|
|
|
data: dict[str, Any] = {"selector": selector, "intent": intent, "ai_mode": ai_mode}
|
|
if resolved and resolved != selector:
|
|
data["resolved_selector"] = resolved
|
|
# Build sdk_equivalent: prefer hybrid selector+prompt for production scripts.
|
|
# resolved_selector already contains the "xpath=" prefix (e.g. "xpath=//button[@id='x']"),
|
|
# so pass it directly as the selector positional arg.
|
|
resolved_sel = resolved if resolved and resolved != selector else selector
|
|
if resolved_sel and intent:
|
|
data["sdk_equivalent"] = f'await page.click("{resolved_sel}", prompt="{intent}")'
|
|
elif ai_mode:
|
|
data["sdk_equivalent"] = f'await page.click(prompt="{intent}")'
|
|
elif selector:
|
|
data["sdk_equivalent"] = f'await page.click("{selector}")'
|
|
|
|
return make_result(
|
|
"skyvern_click",
|
|
browser_context=ctx,
|
|
data=data,
|
|
timing_ms=timer.timing_ms,
|
|
)
|
|
|
|
|
|
async def skyvern_hover(
|
|
session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
|
|
cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
|
|
intent: Annotated[
|
|
str | None,
|
|
Field(
|
|
description="Natural language description of the element to hover over. Be specific: "
|
|
"'the user avatar in the top-right corner' is better than 'avatar'. "
|
|
"Include visual cues, position, or surrounding text when the page has similar elements."
|
|
),
|
|
] = None,
|
|
selector: Annotated[str | None, Field(description="CSS selector or XPath for the element to hover")] = None,
|
|
timeout: Annotated[
|
|
int,
|
|
Field(
|
|
description="Max time to wait for the element in ms. Default 30000 (30s)",
|
|
ge=1000,
|
|
le=60000,
|
|
),
|
|
] = 30000,
|
|
) -> dict[str, Any]:
|
|
"""Hover over an element to reveal tooltips, dropdown menus, or hidden content. Uses AI intent, CSS/XPath selector, or both. Unlike Playwright's browser_hover which requires a ref from a prior snapshot, this finds elements using natural language."""
|
|
ai_mode, err = _resolve_ai_mode(selector, intent)
|
|
if err:
|
|
return make_result(
|
|
"skyvern_hover",
|
|
ok=False,
|
|
error=make_error(
|
|
ErrorCode.INVALID_INPUT,
|
|
"Must provide intent, selector, or both",
|
|
"Use intent='describe what to hover' for AI-powered hovering, or selector='#css-selector' for precise targeting",
|
|
),
|
|
)
|
|
|
|
try:
|
|
page, ctx = await get_page(session_id=session_id, cdp_url=cdp_url)
|
|
except BrowserNotAvailableError:
|
|
return make_result("skyvern_hover", ok=False, error=no_browser_error())
|
|
|
|
with Timer() as timer:
|
|
try:
|
|
if ai_mode is not None:
|
|
loc = page.locator(selector=selector, prompt=intent, ai=ai_mode) # type: ignore[arg-type]
|
|
else:
|
|
assert selector is not None
|
|
loc = page.locator(selector)
|
|
await loc.hover(timeout=timeout)
|
|
timer.mark("sdk")
|
|
except PlaywrightTimeoutError as e:
|
|
return make_result(
|
|
"skyvern_hover",
|
|
ok=False,
|
|
browser_context=ctx,
|
|
timing_ms=timer.timing_ms,
|
|
error=make_error(
|
|
ErrorCode.SELECTOR_NOT_FOUND,
|
|
str(e),
|
|
"Verify the selector matches an element on the page, or use intent for AI-powered finding",
|
|
),
|
|
)
|
|
except Exception as e:
|
|
code = ErrorCode.AI_FALLBACK_FAILED if ai_mode else ErrorCode.ACTION_FAILED
|
|
return make_result(
|
|
"skyvern_hover",
|
|
ok=False,
|
|
browser_context=ctx,
|
|
timing_ms=timer.timing_ms,
|
|
error=make_error(
|
|
code,
|
|
str(e),
|
|
"The element may be hidden or not interactable",
|
|
),
|
|
)
|
|
|
|
data: dict[str, Any] = {"selector": selector, "intent": intent, "ai_mode": ai_mode}
|
|
if selector and intent:
|
|
data["sdk_equivalent"] = f'await page.locator("{selector}", prompt="{intent}").hover()'
|
|
elif ai_mode:
|
|
data["sdk_equivalent"] = f'await page.locator(prompt="{intent}").hover()'
|
|
elif selector:
|
|
data["sdk_equivalent"] = f'await page.locator("{selector}").hover()'
|
|
|
|
return make_result(
|
|
"skyvern_hover",
|
|
browser_context=ctx,
|
|
data=data,
|
|
timing_ms=timer.timing_ms,
|
|
)
|
|
|
|
|
|
async def skyvern_type(
|
|
text: Annotated[str, "Text to type into the element"],
|
|
session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
|
|
cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
|
|
intent: Annotated[
|
|
str | None,
|
|
Field(
|
|
description="Natural language description of the input field. Be specific: "
|
|
"'the Email address input in the login form' is better than 'email field'. "
|
|
"Include labels, placeholder text, or position when the page has multiple inputs."
|
|
),
|
|
] = None,
|
|
selector: Annotated[str | None, Field(description="CSS selector or XPath for the input element")] = None,
|
|
timeout: Annotated[
|
|
int,
|
|
Field(
|
|
description="Max time to wait for the element in ms. Increase for slow-loading pages. Default 30000 (30s)",
|
|
ge=1000,
|
|
le=60000,
|
|
),
|
|
] = 30000,
|
|
clear: Annotated[bool, Field(description="Clear existing content before typing")] = True,
|
|
delay: Annotated[int | None, Field(description="Delay between keystrokes in ms")] = None,
|
|
) -> dict[str, Any]:
|
|
"""Type text into an input field using AI intent, CSS/XPath selector, or both. Unlike Playwright's browser_type which requires a ref from a prior snapshot, this tool finds input fields using natural language — no snapshot step needed.
|
|
|
|
NEVER use this for passwords or credentials — they will be exposed in logs and conversation history. Use skyvern_login with a stored credential instead for secure authentication. Create credentials via CLI: skyvern credentials add.
|
|
For dropdowns, use skyvern_select_option instead. For pressing keys (Enter, Tab), use skyvern_press_key.
|
|
Clears existing content by default (set clear=false to append).
|
|
"""
|
|
# Block password entry — redirect to skyvern_login
|
|
target_text = f"{intent or ''} {selector or ''}"
|
|
if _PASSWORD_PATTERN.search(target_text):
|
|
return make_result(
|
|
"skyvern_type",
|
|
ok=False,
|
|
error=make_error(
|
|
ErrorCode.INVALID_INPUT,
|
|
"Cannot type into password fields — credentials must not be passed through tool calls",
|
|
_CREDENTIAL_ERROR_HINT,
|
|
),
|
|
)
|
|
|
|
ai_mode, err = _resolve_ai_mode(selector, intent)
|
|
if err:
|
|
return make_result(
|
|
"skyvern_type",
|
|
ok=False,
|
|
error=make_error(
|
|
ErrorCode.INVALID_INPUT,
|
|
"Must provide intent, selector, or both",
|
|
"Use intent='describe the input field' for AI-powered targeting, or selector='#css-selector' for precise targeting",
|
|
),
|
|
)
|
|
|
|
try:
|
|
page, ctx = await get_page(session_id=session_id, cdp_url=cdp_url)
|
|
except BrowserNotAvailableError:
|
|
return make_result("skyvern_type", ok=False, error=no_browser_error())
|
|
|
|
# DOM-level guard: check if the target element is a password field
|
|
if selector:
|
|
try:
|
|
is_password_field = await page.evaluate(
|
|
"(s) => { const el = document.querySelector(s); return el && el.type === 'password' }",
|
|
selector,
|
|
)
|
|
except Exception as exc:
|
|
# Selector may not be a valid CSS selector (e.g. xpath=...) or page may
|
|
# not be ready. Fall through to the existing regex guard in that case.
|
|
LOG.debug("DOM password check failed for selector %r: %s", selector, exc)
|
|
is_password_field = False
|
|
if is_password_field:
|
|
return make_result(
|
|
"skyvern_type",
|
|
ok=False,
|
|
error=make_error(
|
|
ErrorCode.INVALID_INPUT,
|
|
"Cannot type into password fields — credentials must not be passed through tool calls",
|
|
_CREDENTIAL_ERROR_HINT,
|
|
),
|
|
)
|
|
|
|
with Timer() as timer:
|
|
try:
|
|
if clear:
|
|
if ai_mode is not None:
|
|
await page.fill(selector=selector, value=text, prompt=intent, ai=ai_mode, timeout=timeout) # type: ignore[arg-type]
|
|
else:
|
|
assert selector is not None
|
|
await page.fill(selector, text, timeout=timeout)
|
|
else:
|
|
kwargs: dict[str, Any] = {"timeout": timeout}
|
|
if delay is not None:
|
|
kwargs["delay"] = delay
|
|
if ai_mode is not None:
|
|
loc = page.locator(selector=selector, prompt=intent, ai=ai_mode) # type: ignore[arg-type]
|
|
await loc.type(text, **kwargs)
|
|
else:
|
|
assert selector is not None
|
|
await page.type(selector, text, **kwargs)
|
|
timer.mark("sdk")
|
|
except PlaywrightTimeoutError as e:
|
|
return make_result(
|
|
"skyvern_type",
|
|
ok=False,
|
|
browser_context=ctx,
|
|
timing_ms=timer.timing_ms,
|
|
error=make_error(
|
|
ErrorCode.SELECTOR_NOT_FOUND,
|
|
str(e),
|
|
"Verify the selector matches an editable element, or use intent for AI-powered finding",
|
|
),
|
|
)
|
|
except Exception as e:
|
|
code = ErrorCode.AI_FALLBACK_FAILED if ai_mode else ErrorCode.ACTION_FAILED
|
|
return make_result(
|
|
"skyvern_type",
|
|
ok=False,
|
|
browser_context=ctx,
|
|
timing_ms=timer.timing_ms,
|
|
error=make_error(
|
|
code,
|
|
str(e),
|
|
"The element may not be editable or may be hidden",
|
|
),
|
|
)
|
|
|
|
# NOTE: The SDK fill() returns the typed value, not a resolved selector.
|
|
# Unlike click(), we cannot return resolved_selector here. SKY-7905 will
|
|
# update the SDK to return element metadata from all action methods.
|
|
data: dict[str, Any] = {"selector": selector, "intent": intent, "ai_mode": ai_mode, "text_length": len(text)}
|
|
# Build sdk_equivalent: prefer hybrid selector+prompt for production scripts
|
|
if selector and intent:
|
|
data["sdk_equivalent"] = f'await page.fill("{selector}", "{text}", prompt="{intent}")'
|
|
elif ai_mode:
|
|
data["sdk_equivalent"] = f'await page.fill(prompt="{intent}", value="{text}")'
|
|
elif selector:
|
|
data["sdk_equivalent"] = f'await page.fill("{selector}", "{text}")'
|
|
return make_result(
|
|
"skyvern_type",
|
|
browser_context=ctx,
|
|
data=data,
|
|
timing_ms=timer.timing_ms,
|
|
)
|
|
|
|
|
|
async def skyvern_screenshot(
|
|
session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
|
|
cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
|
|
full_page: Annotated[bool, Field(description="Capture full scrollable page")] = False,
|
|
selector: Annotated[str | None, Field(description="CSS selector to screenshot specific element")] = None,
|
|
inline: Annotated[bool, Field(description="Return base64 data instead of file path")] = False,
|
|
) -> dict[str, Any]:
|
|
"""See what's currently on the page. Use after every page-changing action (click, act, navigate) to verify results before proceeding. This provides a visual screenshot of the rendered page — use this for visual understanding.
|
|
|
|
Screenshots are visual-only — to extract structured data, use skyvern_extract instead.
|
|
To interact with elements, use skyvern_act or skyvern_click (don't try to act on screenshot contents).
|
|
By default saves to ~/.skyvern/artifacts/ and returns the file path.
|
|
Set inline=true to get base64 data directly (increases token usage).
|
|
"""
|
|
try:
|
|
page, ctx = await get_page(session_id=session_id, cdp_url=cdp_url)
|
|
except BrowserNotAvailableError:
|
|
return make_result("skyvern_screenshot", ok=False, error=no_browser_error())
|
|
|
|
with Timer() as timer:
|
|
try:
|
|
if selector:
|
|
element = page.locator(selector)
|
|
screenshot_bytes = await element.screenshot()
|
|
else:
|
|
screenshot_bytes = await page.screenshot(full_page=full_page)
|
|
timer.mark("sdk")
|
|
except Exception as e:
|
|
return make_result(
|
|
"skyvern_screenshot",
|
|
ok=False,
|
|
browser_context=ctx,
|
|
timing_ms=timer.timing_ms,
|
|
error=make_error(ErrorCode.ACTION_FAILED, str(e), "Check that the page or element is visible"),
|
|
)
|
|
|
|
if inline:
|
|
data_b64 = base64.b64encode(screenshot_bytes).decode("utf-8")
|
|
return make_result(
|
|
"skyvern_screenshot",
|
|
browser_context=ctx,
|
|
data={
|
|
"inline": True,
|
|
"data": data_b64,
|
|
"mime": "image/png",
|
|
"bytes": len(screenshot_bytes),
|
|
"sdk_equivalent": "await page.screenshot()",
|
|
},
|
|
timing_ms=timer.timing_ms,
|
|
warnings=["Inline mode increases token usage"],
|
|
)
|
|
|
|
ts = datetime.now(timezone.utc).strftime("%H%M%S_%f")
|
|
filename = f"screenshot_{ts}.png"
|
|
artifact = save_artifact(
|
|
screenshot_bytes,
|
|
kind="screenshot",
|
|
filename=filename,
|
|
mime="image/png",
|
|
session_id=ctx.session_id,
|
|
)
|
|
|
|
return make_result(
|
|
"skyvern_screenshot",
|
|
browser_context=ctx,
|
|
data={"path": artifact.path, "sdk_equivalent": "await page.screenshot(path='screenshot.png')"},
|
|
artifacts=[artifact],
|
|
timing_ms=timer.timing_ms,
|
|
)
|
|
|
|
|
|
async def skyvern_scroll(
|
|
direction: Annotated[str, Field(description="Direction: up, down, left, right")],
|
|
session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
|
|
cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
|
|
amount: Annotated[int | None, Field(description="Pixels to scroll (default 500)")] = None,
|
|
intent: Annotated[
|
|
str | None, Field(description="Natural language description of element to scroll into view (uses AI)")
|
|
] = None,
|
|
selector: Annotated[str | None, Field(description="CSS selector of scrollable element")] = None,
|
|
) -> dict[str, Any]:
|
|
"""Scroll the page or use AI to scroll a specific element into view.
|
|
|
|
Use `intent` to scroll an AI-located element into view (with or without selector for hybrid fallback).
|
|
Without intent, scrolls the page or a selector-targeted container by pixel amount.
|
|
"""
|
|
valid_directions = ("up", "down", "left", "right")
|
|
if not intent and direction not in valid_directions:
|
|
return make_result(
|
|
"skyvern_scroll",
|
|
ok=False,
|
|
error=make_error(
|
|
ErrorCode.INVALID_INPUT, f"Invalid direction: {direction}", "Use up, down, left, or right"
|
|
),
|
|
)
|
|
|
|
try:
|
|
page, ctx = await get_page(session_id=session_id, cdp_url=cdp_url)
|
|
except BrowserNotAvailableError:
|
|
return make_result("skyvern_scroll", ok=False, error=no_browser_error())
|
|
|
|
if intent:
|
|
ai_mode = "fallback" if selector else "proactive"
|
|
with Timer() as timer:
|
|
try:
|
|
loc = page.locator(selector=selector, prompt=intent, ai=ai_mode)
|
|
await loc.scroll_into_view_if_needed()
|
|
timer.mark("sdk")
|
|
except Exception as e:
|
|
code = ErrorCode.AI_FALLBACK_FAILED if ai_mode == "fallback" else ErrorCode.ACTION_FAILED
|
|
return make_result(
|
|
"skyvern_scroll",
|
|
ok=False,
|
|
browser_context=ctx,
|
|
timing_ms=timer.timing_ms,
|
|
error=make_error(code, str(e), "Could not find element to scroll into view"),
|
|
)
|
|
|
|
return make_result(
|
|
"skyvern_scroll",
|
|
browser_context=ctx,
|
|
data={
|
|
"direction": "into_view",
|
|
"intent": intent,
|
|
"ai_mode": ai_mode,
|
|
"sdk_equivalent": (
|
|
f'await page.locator("{selector}", prompt="{intent}").scroll_into_view_if_needed()'
|
|
if selector
|
|
else f'await page.locator(prompt="{intent}").scroll_into_view_if_needed()'
|
|
),
|
|
},
|
|
timing_ms=timer.timing_ms,
|
|
)
|
|
|
|
pixels = amount or 500
|
|
direction_map = {
|
|
"up": (0, -pixels),
|
|
"down": (0, pixels),
|
|
"left": (-pixels, 0),
|
|
"right": (pixels, 0),
|
|
}
|
|
dx, dy = direction_map[direction]
|
|
|
|
with Timer() as timer:
|
|
try:
|
|
if selector:
|
|
await page.locator(selector).evaluate(f"el => el.scrollBy({dx}, {dy})")
|
|
else:
|
|
await page.evaluate(f"window.scrollBy({dx}, {dy})")
|
|
timer.mark("sdk")
|
|
except Exception as e:
|
|
return make_result(
|
|
"skyvern_scroll",
|
|
ok=False,
|
|
browser_context=ctx,
|
|
timing_ms=timer.timing_ms,
|
|
error=make_error(ErrorCode.ACTION_FAILED, str(e), "Scroll action failed"),
|
|
)
|
|
|
|
return make_result(
|
|
"skyvern_scroll",
|
|
browser_context=ctx,
|
|
data={
|
|
"direction": direction,
|
|
"pixels": pixels,
|
|
"sdk_equivalent": f'await page.evaluate("window.scrollBy({dx}, {dy})")',
|
|
},
|
|
timing_ms=timer.timing_ms,
|
|
)
|
|
|
|
|
|
async def skyvern_select_option(
|
|
value: Annotated[str, "Value to select"],
|
|
session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
|
|
cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
|
|
intent: Annotated[str | None, Field(description="Natural language description of the dropdown (uses AI)")] = None,
|
|
selector: Annotated[str | None, Field(description="CSS selector for the select element")] = None,
|
|
timeout: Annotated[
|
|
int, Field(description="Max time to wait for the dropdown in ms. Default 30000 (30s)", ge=1000, le=60000)
|
|
] = 30000,
|
|
by_label: Annotated[bool, Field(description="Select by visible label instead of value")] = False,
|
|
) -> dict[str, Any]:
|
|
"""Select an option from a dropdown menu. Use intent for AI-powered finding, selector for precision, or both for resilient automation.
|
|
|
|
For free-text input fields, use skyvern_type instead. For non-dropdown buttons or links, use skyvern_click.
|
|
"""
|
|
ai_mode, err = _resolve_ai_mode(selector, intent)
|
|
if err:
|
|
return make_result(
|
|
"skyvern_select_option",
|
|
ok=False,
|
|
error=make_error(
|
|
ErrorCode.INVALID_INPUT,
|
|
"Must provide intent, selector, or both",
|
|
"Use intent='describe the dropdown' for AI-powered selection, or selector='#css-selector' for precise targeting",
|
|
),
|
|
)
|
|
|
|
try:
|
|
page, ctx = await get_page(session_id=session_id, cdp_url=cdp_url)
|
|
except BrowserNotAvailableError:
|
|
return make_result("skyvern_select_option", ok=False, error=no_browser_error())
|
|
|
|
with Timer() as timer:
|
|
try:
|
|
if ai_mode is not None:
|
|
# AI paths: pass value= directly -- the AI interprets the text
|
|
# regardless of whether it represents a value or label.
|
|
await page.select_option(selector=selector, value=value, prompt=intent, ai=ai_mode, timeout=timeout) # type: ignore[arg-type]
|
|
else:
|
|
assert selector is not None
|
|
if by_label:
|
|
# Bypass SkyvernPage to avoid value="" coercion conflicting with label kwarg.
|
|
await page.page.locator(selector).select_option(label=value, timeout=timeout)
|
|
else:
|
|
await page.select_option(selector, value=value, timeout=timeout)
|
|
timer.mark("sdk")
|
|
except Exception as e:
|
|
code = ErrorCode.AI_FALLBACK_FAILED if ai_mode else ErrorCode.ACTION_FAILED
|
|
return make_result(
|
|
"skyvern_select_option",
|
|
ok=False,
|
|
browser_context=ctx,
|
|
timing_ms=timer.timing_ms,
|
|
error=make_error(code, str(e), "Check selector and available options"),
|
|
)
|
|
|
|
# NOTE: The SDK select_option() returns the selected value, not a resolved
|
|
# selector. Unlike click(), we cannot return resolved_selector here.
|
|
# SKY-7905 will update the SDK to return element metadata from all action methods.
|
|
data: dict[str, Any] = {"selector": selector, "intent": intent, "ai_mode": ai_mode, "value": value}
|
|
# Build sdk_equivalent: prefer hybrid selector+prompt for production scripts
|
|
if selector and intent:
|
|
data["sdk_equivalent"] = f'await page.select_option("{selector}", value="{value}", prompt="{intent}")'
|
|
elif ai_mode:
|
|
data["sdk_equivalent"] = f'await page.select_option(prompt="{intent}", value="{value}")'
|
|
elif selector:
|
|
data["sdk_equivalent"] = f'await page.select_option("{selector}", value="{value}")'
|
|
return make_result(
|
|
"skyvern_select_option",
|
|
browser_context=ctx,
|
|
data=data,
|
|
timing_ms=timer.timing_ms,
|
|
)
|
|
|
|
|
|
async def skyvern_press_key(
|
|
key: Annotated[str, "Key to press (e.g., Enter, Tab, Escape, ArrowDown)"],
|
|
session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
|
|
cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
|
|
intent: Annotated[
|
|
str | None, Field(description="Natural language description of element to focus first (uses AI)")
|
|
] = None,
|
|
selector: Annotated[str | None, Field(description="CSS selector to focus first")] = None,
|
|
) -> dict[str, Any]:
|
|
"""Press a keyboard key -- Enter, Tab, Escape, arrow keys, shortcuts, etc.
|
|
|
|
Use `intent` or `selector` to focus a specific element before pressing.
|
|
Without either, presses the key on the currently focused element.
|
|
"""
|
|
try:
|
|
page, ctx = await get_page(session_id=session_id, cdp_url=cdp_url)
|
|
except BrowserNotAvailableError:
|
|
return make_result("skyvern_press_key", ok=False, error=no_browser_error())
|
|
|
|
with Timer() as timer:
|
|
try:
|
|
if intent or selector:
|
|
ai_mode, _ = _resolve_ai_mode(selector, intent)
|
|
if ai_mode is not None:
|
|
loc = page.locator(selector=selector, prompt=intent, ai=ai_mode) # type: ignore[arg-type]
|
|
else:
|
|
assert selector is not None
|
|
loc = page.locator(selector)
|
|
await loc.press(key)
|
|
else:
|
|
await page.keyboard.press(key)
|
|
timer.mark("sdk")
|
|
except Exception as e:
|
|
return make_result(
|
|
"skyvern_press_key",
|
|
ok=False,
|
|
browser_context=ctx,
|
|
timing_ms=timer.timing_ms,
|
|
error=make_error(ErrorCode.ACTION_FAILED, str(e), "Check key name is valid"),
|
|
)
|
|
|
|
if selector and intent:
|
|
sdk_eq = f'await page.locator("{selector}", prompt="{intent}").press("{key}")'
|
|
elif intent:
|
|
sdk_eq = f'await page.locator(prompt="{intent}").press("{key}")'
|
|
elif selector:
|
|
sdk_eq = f'await page.locator("{selector}").press("{key}")'
|
|
else:
|
|
sdk_eq = f'await page.keyboard.press("{key}")'
|
|
|
|
return make_result(
|
|
"skyvern_press_key",
|
|
browser_context=ctx,
|
|
data={
|
|
"key": key,
|
|
"selector": selector,
|
|
"intent": intent,
|
|
"sdk_equivalent": sdk_eq,
|
|
},
|
|
timing_ms=timer.timing_ms,
|
|
)
|
|
|
|
|
|
async def skyvern_wait(
|
|
session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
|
|
cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
|
|
time_ms: Annotated[int | None, Field(description="Time to wait in milliseconds")] = None,
|
|
intent: Annotated[str | None, Field(description="Natural language condition to wait for (uses AI polling)")] = None,
|
|
selector: Annotated[str | None, Field(description="CSS selector to wait for")] = None,
|
|
state: Annotated[str | None, Field(description="Element state: visible, hidden, attached, detached")] = "visible",
|
|
timeout: Annotated[int, Field(description="Max wait time in milliseconds", ge=1000, le=120000)] = 30000,
|
|
poll_interval_ms: Annotated[
|
|
int, Field(description="Polling interval for intent-based waits in ms", ge=500, le=10000)
|
|
] = 5000,
|
|
) -> dict[str, Any]:
|
|
"""Wait for a condition, element, or time delay before proceeding. Use intent for AI-powered condition checking.
|
|
|
|
Use `intent` to poll with AI validation (e.g., "wait until the loading spinner disappears").
|
|
Use `selector` to wait for an element state. Use `time_ms` for a simple delay.
|
|
"""
|
|
valid_states = ("visible", "hidden", "attached", "detached")
|
|
if state is not None and state not in valid_states:
|
|
return make_result(
|
|
"skyvern_wait",
|
|
ok=False,
|
|
error=make_error(
|
|
ErrorCode.INVALID_INPUT,
|
|
f"Invalid state: {state}",
|
|
"Use visible, hidden, attached, or detached",
|
|
),
|
|
)
|
|
|
|
if time_ms is None and not selector and not intent:
|
|
return make_result(
|
|
"skyvern_wait",
|
|
ok=False,
|
|
error=make_error(
|
|
ErrorCode.INVALID_INPUT,
|
|
"Must provide intent, selector, or time_ms",
|
|
"Use intent='condition to wait for' for AI-powered waiting, selector='#element' for element visibility, or time_ms=5000 for a delay",
|
|
),
|
|
)
|
|
|
|
try:
|
|
page, ctx = await get_page(session_id=session_id, cdp_url=cdp_url)
|
|
except BrowserNotAvailableError:
|
|
return make_result("skyvern_wait", ok=False, error=no_browser_error())
|
|
|
|
with Timer() as timer:
|
|
try:
|
|
if time_ms is not None:
|
|
await page.wait_for_timeout(time_ms)
|
|
waited_for = "time"
|
|
elif intent:
|
|
loop = asyncio.get_running_loop()
|
|
deadline = loop.time() + timeout / 1000
|
|
last_error: Exception | None = None
|
|
while True:
|
|
try:
|
|
result = await page.validate(intent)
|
|
last_error = None
|
|
except Exception as poll_err:
|
|
result = False
|
|
last_error = poll_err
|
|
if result:
|
|
break
|
|
if loop.time() >= deadline:
|
|
code = ErrorCode.SDK_ERROR if last_error else ErrorCode.TIMEOUT
|
|
msg = str(last_error) if last_error else f"Condition not met within {timeout}ms: {intent}"
|
|
return make_result(
|
|
"skyvern_wait",
|
|
ok=False,
|
|
browser_context=ctx,
|
|
timing_ms=timer.timing_ms,
|
|
error=make_error(
|
|
code,
|
|
msg,
|
|
"Increase timeout or check that the condition can be satisfied",
|
|
),
|
|
)
|
|
await page.wait_for_timeout(poll_interval_ms)
|
|
waited_for = "intent"
|
|
elif selector:
|
|
await page.wait_for_selector(selector, state=state, timeout=timeout)
|
|
waited_for = "selector"
|
|
timer.mark("sdk")
|
|
except Exception as e:
|
|
return make_result(
|
|
"skyvern_wait",
|
|
ok=False,
|
|
browser_context=ctx,
|
|
timing_ms=timer.timing_ms,
|
|
error=make_error(ErrorCode.TIMEOUT, str(e), "Condition was not met within timeout"),
|
|
)
|
|
|
|
sdk_eq = ""
|
|
if waited_for == "time":
|
|
sdk_eq = f"await page.wait_for_timeout({time_ms})"
|
|
elif waited_for == "intent":
|
|
sdk_eq = f'await page.validate("{intent}")'
|
|
elif waited_for == "selector":
|
|
sdk_eq = f'await page.wait_for_selector("{selector}")'
|
|
return make_result(
|
|
"skyvern_wait",
|
|
browser_context=ctx,
|
|
data={"waited_for": waited_for, "sdk_equivalent": sdk_eq},
|
|
timing_ms=timer.timing_ms,
|
|
)
|
|
|
|
|
|
async def skyvern_evaluate(
|
|
expression: Annotated[str, "JavaScript expression to evaluate"],
|
|
session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
|
|
cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
|
|
) -> dict[str, Any]:
|
|
"""Run JavaScript on the page to read DOM state, get URLs, check values, or discover CSS selectors for faster subsequent actions.
|
|
|
|
Security: This executes arbitrary JS in the page context. Only use with trusted expressions.
|
|
"""
|
|
# Block JS that sets password field values
|
|
if _JS_PASSWORD_PATTERN.search(expression):
|
|
return make_result(
|
|
"skyvern_evaluate",
|
|
ok=False,
|
|
error=make_error(
|
|
ErrorCode.INVALID_INPUT,
|
|
"Cannot set password field values via JavaScript — credentials must not be passed through tool calls",
|
|
_CREDENTIAL_ERROR_HINT,
|
|
),
|
|
)
|
|
|
|
try:
|
|
page, ctx = await get_page(session_id=session_id, cdp_url=cdp_url)
|
|
except BrowserNotAvailableError:
|
|
return make_result("skyvern_evaluate", ok=False, error=no_browser_error())
|
|
|
|
with Timer() as timer:
|
|
try:
|
|
result = await page.evaluate(expression)
|
|
timer.mark("sdk")
|
|
except Exception as e:
|
|
return make_result(
|
|
"skyvern_evaluate",
|
|
ok=False,
|
|
browser_context=ctx,
|
|
timing_ms=timer.timing_ms,
|
|
error=make_error(ErrorCode.ACTION_FAILED, str(e), "Check JavaScript syntax"),
|
|
)
|
|
|
|
return make_result(
|
|
"skyvern_evaluate",
|
|
browser_context=ctx,
|
|
data={"result": result, "sdk_equivalent": f'await page.evaluate("{expression[:80]}")'},
|
|
timing_ms=timer.timing_ms,
|
|
)
|
|
|
|
|
|
async def skyvern_extract(
|
|
prompt: Annotated[str, "Natural language description of what data to extract from the page"],
|
|
session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
|
|
cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
|
|
schema: Annotated[
|
|
str | None, Field(description="JSON Schema string defining the expected output structure")
|
|
] = None,
|
|
) -> dict[str, Any]:
|
|
"""Get structured data from any website — prices, listings, articles, tables, contact info, etc. Use this instead of writing scraping code, curl commands, or guessing API endpoints. Describe what you need in natural language and get JSON back.
|
|
|
|
Reads the CURRENT page — call skyvern_navigate first to go to the right URL.
|
|
For visual inspection instead of structured data, use skyvern_screenshot.
|
|
Optionally provide a JSON `schema` to enforce the output structure (pass as a JSON string).
|
|
"""
|
|
parsed_schema: dict[str, Any] | None = None
|
|
if schema is not None:
|
|
try:
|
|
parsed_schema = json.loads(schema)
|
|
except (json.JSONDecodeError, TypeError) as e:
|
|
return make_result(
|
|
"skyvern_extract",
|
|
ok=False,
|
|
error=make_error(
|
|
ErrorCode.INVALID_INPUT,
|
|
f"Invalid JSON schema: {e}",
|
|
"Provide schema as a valid JSON string",
|
|
),
|
|
)
|
|
|
|
try:
|
|
page, ctx = await get_page(session_id=session_id, cdp_url=cdp_url)
|
|
except BrowserNotAvailableError:
|
|
return make_result("skyvern_extract", ok=False, error=no_browser_error())
|
|
|
|
with Timer() as timer:
|
|
try:
|
|
extracted = await page.extract(prompt=prompt, schema=parsed_schema)
|
|
timer.mark("sdk")
|
|
except Exception as e:
|
|
return make_result(
|
|
"skyvern_extract",
|
|
ok=False,
|
|
browser_context=ctx,
|
|
timing_ms=timer.timing_ms,
|
|
error=make_error(ErrorCode.SDK_ERROR, str(e), "Check that the page has loaded and the prompt is clear"),
|
|
)
|
|
|
|
return make_result(
|
|
"skyvern_extract",
|
|
browser_context=ctx,
|
|
data={"extracted": extracted, "sdk_equivalent": f'await page.extract(prompt="{prompt}")'},
|
|
timing_ms=timer.timing_ms,
|
|
)
|
|
|
|
|
|
async def skyvern_validate(
|
|
prompt: Annotated[str, "Validation condition to check (e.g., 'the login form is visible')"],
|
|
session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
|
|
cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
|
|
) -> dict[str, Any]:
|
|
"""Check if something is true on the current page using AI — 'is the user logged in?', 'does the cart have 3 items?', 'is the form submitted?'
|
|
|
|
Reads the CURRENT page — navigate first. Returns true/false.
|
|
To extract data (not just check a condition), use skyvern_extract instead.
|
|
"""
|
|
try:
|
|
page, ctx = await get_page(session_id=session_id, cdp_url=cdp_url)
|
|
except BrowserNotAvailableError:
|
|
return make_result("skyvern_validate", ok=False, error=no_browser_error())
|
|
|
|
with Timer() as timer:
|
|
try:
|
|
valid = await page.validate(prompt)
|
|
timer.mark("sdk")
|
|
except Exception as e:
|
|
return make_result(
|
|
"skyvern_validate",
|
|
ok=False,
|
|
browser_context=ctx,
|
|
timing_ms=timer.timing_ms,
|
|
error=make_error(ErrorCode.SDK_ERROR, str(e), "Check that the page has loaded and the prompt is clear"),
|
|
)
|
|
|
|
return make_result(
|
|
"skyvern_validate",
|
|
browser_context=ctx,
|
|
data={"prompt": prompt, "valid": valid, "sdk_equivalent": f'await page.validate("{prompt}")'},
|
|
timing_ms=timer.timing_ms,
|
|
)
|
|
|
|
|
|
async def skyvern_act(
|
|
prompt: Annotated[str, "Natural language instruction for the action to perform (e.g., 'close the cookie banner')"],
|
|
session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
|
|
cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
|
|
) -> dict[str, Any]:
|
|
"""Perform actions on a web page by describing what to do in plain English — click buttons, close popups, fill forms, scroll to sections, interact with menus. Replaces multi-step snapshot→click→snapshot→click sequences with a single natural language instruction.
|
|
|
|
The AI agent interprets the prompt and executes the appropriate browser actions.
|
|
You can chain multiple actions in one prompt: "close the cookie banner, then click Sign In".
|
|
NEVER include passwords or credentials in the prompt. Use skyvern_login with a stored credential instead. Create credentials via CLI: skyvern credentials add.
|
|
For multi-step automations (4+ pages), use skyvern_workflow_create with one block per step.
|
|
For quick one-off multi-page tasks, use skyvern_run_task.
|
|
"""
|
|
# Block login/password actions — redirect to skyvern_login
|
|
if _PASSWORD_PATTERN.search(prompt):
|
|
return make_result(
|
|
"skyvern_act",
|
|
ok=False,
|
|
error=make_error(
|
|
ErrorCode.INVALID_INPUT,
|
|
"Cannot perform password/credential actions — credentials must not be passed through tool calls",
|
|
_CREDENTIAL_ERROR_HINT,
|
|
),
|
|
)
|
|
|
|
try:
|
|
page, ctx = await get_page(session_id=session_id, cdp_url=cdp_url)
|
|
except BrowserNotAvailableError:
|
|
return make_result("skyvern_act", ok=False, error=no_browser_error())
|
|
|
|
with Timer() as timer:
|
|
try:
|
|
await page.act(prompt)
|
|
timer.mark("sdk")
|
|
except Exception as e:
|
|
return make_result(
|
|
"skyvern_act",
|
|
ok=False,
|
|
browser_context=ctx,
|
|
timing_ms=timer.timing_ms,
|
|
error=make_error(ErrorCode.SDK_ERROR, str(e), "Simplify the prompt or break the task into steps"),
|
|
)
|
|
|
|
return make_result(
|
|
"skyvern_act",
|
|
browser_context=ctx,
|
|
data={"prompt": prompt, "completed": True, "sdk_equivalent": f'await page.act("{prompt}")'},
|
|
timing_ms=timer.timing_ms,
|
|
)
|
|
|
|
|
|
async def skyvern_run_task(
|
|
prompt: Annotated[str, "Natural language description of the task to automate"],
|
|
session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
|
|
cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
|
|
url: Annotated[
|
|
str | None, Field(description="URL to navigate to before running (uses current page if omitted)")
|
|
] = None,
|
|
data_extraction_schema: Annotated[
|
|
str | None, Field(description="JSON Schema string defining what data to extract")
|
|
] = None,
|
|
max_steps: Annotated[int | None, Field(description="Maximum number of agent steps")] = None,
|
|
timeout_seconds: Annotated[
|
|
int, Field(description="Timeout in seconds (default 180s = 3 minutes)", ge=10, le=1800)
|
|
] = 180,
|
|
) -> dict[str, Any]:
|
|
"""Run a quick, one-off web task via an autonomous AI agent. Nothing is saved — use for throwaway tests and exploration only. Best for tasks describable in 2-3 sentences.
|
|
|
|
For anything reusable, multi-step, or worth keeping, use skyvern_workflow_create instead — it produces a versioned, rerunnable workflow with per-step observability.
|
|
For simple single-step actions on the current page, use skyvern_act instead.
|
|
"""
|
|
# Block password/credential actions — redirect to skyvern_login
|
|
if _PASSWORD_PATTERN.search(prompt):
|
|
return make_result(
|
|
"skyvern_run_task",
|
|
ok=False,
|
|
error=make_error(
|
|
ErrorCode.INVALID_INPUT,
|
|
"Cannot perform password/credential actions — credentials must not be passed through tool calls",
|
|
_CREDENTIAL_ERROR_HINT,
|
|
),
|
|
)
|
|
|
|
try:
|
|
page, ctx = await get_page(session_id=session_id, cdp_url=cdp_url)
|
|
except BrowserNotAvailableError:
|
|
return make_result("skyvern_run_task", ok=False, error=no_browser_error())
|
|
|
|
parsed_schema: dict[str, Any] | str | None = None
|
|
if data_extraction_schema is not None:
|
|
try:
|
|
parsed_schema = json.loads(data_extraction_schema)
|
|
except (json.JSONDecodeError, TypeError) as e:
|
|
return make_result(
|
|
"skyvern_run_task",
|
|
ok=False,
|
|
browser_context=ctx,
|
|
error=make_error(
|
|
ErrorCode.INVALID_INPUT,
|
|
f"Invalid data_extraction_schema JSON: {e}",
|
|
"Provide schema as a valid JSON string",
|
|
),
|
|
)
|
|
|
|
with Timer() as timer:
|
|
try:
|
|
response = await page.agent.run_task(
|
|
prompt=prompt,
|
|
url=url,
|
|
data_extraction_schema=parsed_schema,
|
|
max_steps=max_steps,
|
|
timeout=timeout_seconds,
|
|
)
|
|
timer.mark("sdk")
|
|
except Exception as e:
|
|
return make_result(
|
|
"skyvern_run_task",
|
|
ok=False,
|
|
browser_context=ctx,
|
|
timing_ms=timer.timing_ms,
|
|
error=make_error(ErrorCode.SDK_ERROR, str(e), "Check the prompt, URL, and timeout settings"),
|
|
)
|
|
|
|
return make_result(
|
|
"skyvern_run_task",
|
|
browser_context=ctx,
|
|
data={
|
|
"run_id": response.run_id,
|
|
"status": response.status,
|
|
"output": response.output,
|
|
"failure_reason": response.failure_reason,
|
|
"recording_url": response.recording_url,
|
|
"app_url": response.app_url,
|
|
"sdk_equivalent": f'await page.agent.run_task(prompt="{prompt}")',
|
|
},
|
|
timing_ms=timer.timing_ms,
|
|
)
|
|
|
|
|
|
# Maps credential_type string → required fields for validation
|
|
_CREDENTIAL_REQUIRED_FIELDS: dict[CredentialType, list[str]] = {
|
|
CredentialType.skyvern: ["credential_id"],
|
|
CredentialType.bitwarden: ["bitwarden_item_id"],
|
|
CredentialType.onepassword: ["onepassword_vault_id", "onepassword_item_id"],
|
|
CredentialType.azure_vault: ["azure_vault_name", "azure_vault_username_key", "azure_vault_password_key"],
|
|
}
|
|
|
|
|
|
async def skyvern_login(
|
|
credential_type: Annotated[
|
|
str, Field(description="Credential provider: 'skyvern', 'bitwarden', '1password', or 'azure_vault'")
|
|
] = "skyvern",
|
|
session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
|
|
cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
|
|
url: Annotated[str | None, Field(description="Login page URL. Uses current page if omitted")] = None,
|
|
credential_id: Annotated[str | None, Field(description="Skyvern credential ID (for type='skyvern')")] = None,
|
|
bitwarden_item_id: Annotated[str | None, Field(description="Bitwarden item ID (for type='bitwarden')")] = None,
|
|
bitwarden_collection_id: Annotated[str | None, Field(description="Bitwarden collection ID (optional)")] = None,
|
|
onepassword_vault_id: Annotated[str | None, Field(description="1Password vault ID (for type='1password')")] = None,
|
|
onepassword_item_id: Annotated[str | None, Field(description="1Password item ID (for type='1password')")] = None,
|
|
azure_vault_name: Annotated[str | None, Field(description="Azure Vault name (for type='azure_vault')")] = None,
|
|
azure_vault_username_key: Annotated[str | None, Field(description="Azure Vault username key")] = None,
|
|
azure_vault_password_key: Annotated[str | None, Field(description="Azure Vault password key")] = None,
|
|
azure_vault_totp_secret_key: Annotated[str | None, Field(description="Azure Vault TOTP key (optional)")] = None,
|
|
prompt: Annotated[str | None, Field(description="Additional login instructions")] = None,
|
|
totp_identifier: Annotated[str | None, Field(description="TOTP identifier for 2FA")] = None,
|
|
totp_url: Annotated[str | None, Field(description="URL to fetch TOTP codes")] = None,
|
|
timeout_seconds: Annotated[int, Field(description="Timeout in seconds (default 180)", ge=10, le=600)] = 180,
|
|
) -> dict[str, Any]:
|
|
"""Log into a website using stored credentials from Skyvern, Bitwarden, 1Password, or Azure Vault. Passwords are never exposed in prompts.
|
|
|
|
Requires a browser session. The AI agent handles the full login flow — finding fields, entering credentials, handling 2FA — so you don't need to write selectors.
|
|
After login, use skyvern_screenshot to verify success, then continue with other browser tools.
|
|
"""
|
|
# Validate credential_type
|
|
try:
|
|
cred_type = CredentialType(credential_type)
|
|
except ValueError:
|
|
valid = ", ".join(f"'{v.value}'" for v in CredentialType)
|
|
return make_result(
|
|
"skyvern_login",
|
|
ok=False,
|
|
error=make_error(
|
|
ErrorCode.INVALID_INPUT,
|
|
f"Invalid credential_type: '{credential_type}'",
|
|
f"Use one of: {valid}",
|
|
),
|
|
)
|
|
|
|
# Validate required fields per credential type
|
|
local_vars = {
|
|
"credential_id": credential_id,
|
|
"bitwarden_item_id": bitwarden_item_id,
|
|
"onepassword_vault_id": onepassword_vault_id,
|
|
"onepassword_item_id": onepassword_item_id,
|
|
"azure_vault_name": azure_vault_name,
|
|
"azure_vault_username_key": azure_vault_username_key,
|
|
"azure_vault_password_key": azure_vault_password_key,
|
|
}
|
|
missing = [f for f in _CREDENTIAL_REQUIRED_FIELDS[cred_type] if not local_vars.get(f)]
|
|
if missing:
|
|
return make_result(
|
|
"skyvern_login",
|
|
ok=False,
|
|
error=make_error(
|
|
ErrorCode.INVALID_INPUT,
|
|
f"Missing required fields for credential_type='{cred_type.value}': {', '.join(missing)}",
|
|
f"Provide: {', '.join(missing)}",
|
|
),
|
|
)
|
|
|
|
try:
|
|
page, ctx = await get_page(session_id=session_id, cdp_url=cdp_url)
|
|
except BrowserNotAvailableError:
|
|
return make_result("skyvern_login", ok=False, error=no_browser_error())
|
|
|
|
# Common kwargs shared across all credential types
|
|
_common_kwargs: dict[str, Any] = {"url": url, "prompt": prompt, "timeout": timeout_seconds}
|
|
if totp_identifier is not None:
|
|
_common_kwargs["totp_identifier"] = totp_identifier
|
|
if totp_url is not None:
|
|
_common_kwargs["totp_url"] = totp_url
|
|
|
|
with Timer() as timer:
|
|
try:
|
|
# Dispatch per credential type to satisfy mypy's overloaded signatures
|
|
if cred_type == CredentialType.skyvern:
|
|
assert credential_id is not None
|
|
response = await page.agent.login(
|
|
credential_type=CredentialType.skyvern,
|
|
credential_id=credential_id,
|
|
**_common_kwargs,
|
|
)
|
|
elif cred_type == CredentialType.bitwarden:
|
|
assert bitwarden_item_id is not None
|
|
response = await page.agent.login(
|
|
credential_type=CredentialType.bitwarden,
|
|
bitwarden_item_id=bitwarden_item_id,
|
|
bitwarden_collection_id=bitwarden_collection_id,
|
|
**_common_kwargs,
|
|
)
|
|
elif cred_type == CredentialType.onepassword:
|
|
assert onepassword_vault_id is not None and onepassword_item_id is not None
|
|
response = await page.agent.login(
|
|
credential_type=CredentialType.onepassword,
|
|
onepassword_vault_id=onepassword_vault_id,
|
|
onepassword_item_id=onepassword_item_id,
|
|
**_common_kwargs,
|
|
)
|
|
else:
|
|
assert azure_vault_name is not None
|
|
assert azure_vault_username_key is not None
|
|
assert azure_vault_password_key is not None
|
|
response = await page.agent.login(
|
|
credential_type=CredentialType.azure_vault,
|
|
azure_vault_name=azure_vault_name,
|
|
azure_vault_username_key=azure_vault_username_key,
|
|
azure_vault_password_key=azure_vault_password_key,
|
|
azure_vault_totp_secret_key=azure_vault_totp_secret_key,
|
|
**_common_kwargs,
|
|
)
|
|
timer.mark("sdk")
|
|
except Exception as e:
|
|
return make_result(
|
|
"skyvern_login",
|
|
ok=False,
|
|
browser_context=ctx,
|
|
timing_ms=timer.timing_ms,
|
|
error=make_error(
|
|
ErrorCode.SDK_ERROR,
|
|
str(e),
|
|
"Check credential_type and required fields for your credential provider",
|
|
),
|
|
)
|
|
|
|
return make_result(
|
|
"skyvern_login",
|
|
browser_context=ctx,
|
|
data={
|
|
"run_id": response.run_id,
|
|
"status": response.status,
|
|
"output": response.output,
|
|
"failure_reason": response.failure_reason,
|
|
"recording_url": response.recording_url,
|
|
"app_url": response.app_url,
|
|
"sdk_equivalent": f"await page.agent.login(credential_type=CredentialType.{cred_type.name})",
|
|
},
|
|
timing_ms=timer.timing_ms,
|
|
)
|