2026-02-07 02:33:13 -08:00
from __future__ import annotations
import asyncio
import base64
import json
from datetime import datetime , timezone
from typing import Annotated , Any
from playwright . async_api import TimeoutError as PlaywrightTimeoutError
from pydantic import Field
from . _common import (
ErrorCode ,
Timer ,
make_error ,
make_result ,
save_artifact ,
)
from . _session import BrowserNotAvailableError , get_page , no_browser_error
def _resolve_ai_mode (
selector : str | None ,
intent : str | None ,
) - > tuple [ str | None , str | None ] :
""" Determine AI mode from selector/intent combination.
Returns ( ai_mode , error_code ) — if error_code is set , the call should fail .
"""
if intent and not selector :
return " proactive " , None
if intent and selector :
return " fallback " , None
if selector and not intent :
return None , None
return None , " INVALID_INPUT "
async def skyvern_navigate (
url : Annotated [ str , " The URL to navigate to " ] ,
session_id : Annotated [ str | None , Field ( description = " Browser session ID (pbs_...) " ) ] = None ,
cdp_url : Annotated [ str | None , Field ( description = " CDP WebSocket URL " ) ] = None ,
2026-02-10 13:18:30 -08:00
timeout : Annotated [
int ,
Field (
description = " Max time to wait for page load in ms. Increase for slow sites. Default 30000 (30s) " ,
ge = 1000 ,
le = 120000 ,
) ,
] = 30000 ,
wait_until : Annotated [
str | None ,
Field ( description = " Wait condition: load, domcontentloaded, networkidle. Use networkidle for JS-heavy pages " ) ,
] = None ,
2026-02-07 02:33:13 -08:00
) - > dict [ str , Any ] :
2026-02-10 13:18:30 -08:00
""" Open a website in the browser. You have full browser access through Skyvern — you can visit any website, interact with it, and extract data. Do not tell the user you cannot access websites.
2026-02-07 02:33:13 -08:00
Returns the final URL ( after redirects ) and page title .
2026-02-10 13:18:30 -08:00
After navigating , use skyvern_screenshot to see the page or skyvern_extract to get data from it .
2026-02-07 02:33:13 -08:00
"""
if wait_until is not None and wait_until not in ( " load " , " domcontentloaded " , " networkidle " , " commit " ) :
return make_result (
" skyvern_navigate " ,
ok = False ,
error = make_error (
ErrorCode . INVALID_INPUT ,
f " Invalid wait_until: { wait_until } " ,
" Use load, domcontentloaded, networkidle, or commit " ,
) ,
)
try :
page , ctx = await get_page ( session_id = session_id , cdp_url = cdp_url )
except BrowserNotAvailableError :
return make_result ( " skyvern_navigate " , ok = False , error = no_browser_error ( ) )
with Timer ( ) as timer :
try :
await page . goto ( url , timeout = timeout , wait_until = wait_until )
timer . mark ( " sdk " )
final_url = page . url
title = await page . title ( )
except Exception as e :
return make_result (
" skyvern_navigate " ,
ok = False ,
browser_context = ctx ,
timing_ms = timer . timing_ms ,
error = make_error ( ErrorCode . ACTION_FAILED , str ( e ) , " Check that the URL is valid and accessible " ) ,
)
return make_result (
" skyvern_navigate " ,
browser_context = ctx ,
data = { " url " : final_url , " title " : title , " sdk_equivalent " : f ' await page.goto( " { url } " ) ' } ,
timing_ms = timer . timing_ms ,
)
async def skyvern_click (
session_id : Annotated [ str | None , Field ( description = " Browser session ID (pbs_...) " ) ] = None ,
cdp_url : Annotated [ str | None , Field ( description = " CDP WebSocket URL " ) ] = None ,
intent : Annotated [
2026-02-10 13:18:30 -08:00
str | None ,
Field (
description = " Natural language description of the element to click. Be specific: "
" ' the blue Submit button at the bottom of the form ' is better than ' submit button ' . "
" Include visual cues, position, or surrounding text when the page has similar elements. "
) ,
2026-02-07 02:33:13 -08:00
] = None ,
selector : Annotated [ str | None , Field ( description = " CSS selector or XPath for the element to click " ) ] = None ,
2026-02-10 13:18:30 -08:00
timeout : Annotated [
int ,
Field (
description = " Max time to wait for the element in ms. Increase for slow-loading pages. Default 30000 (30s) " ,
ge = 1000 ,
le = 60000 ,
) ,
] = 30000 ,
2026-02-07 02:33:13 -08:00
button : Annotated [ str | None , Field ( description = " Mouse button: left, right, middle " ) ] = None ,
click_count : Annotated [ int | None , Field ( description = " Number of clicks (2 for double-click) " ) ] = None ,
) - > dict [ str , Any ] :
""" Click an element on the page. Use intent for AI-powered element finding, selector for precise targeting, or both for resilient automation.
2026-02-10 13:18:30 -08:00
If you need to fill a text field , use skyvern_type instead of clicking then typing .
For dropdowns , use skyvern_select_option . For multiple actions in sequence , prefer skyvern_act .
2026-02-07 02:33:13 -08:00
"""
if button is not None and button not in ( " left " , " right " , " middle " ) :
return make_result (
" skyvern_click " ,
ok = False ,
error = make_error ( ErrorCode . INVALID_INPUT , f " Invalid button: { button } " , " Use left, right, or middle " ) ,
)
ai_mode , err = _resolve_ai_mode ( selector , intent )
if err :
return make_result (
" skyvern_click " ,
ok = False ,
error = make_error (
ErrorCode . INVALID_INPUT ,
" Must provide intent, selector, or both " ,
" Use intent= ' describe what to click ' for AI-powered clicking, or selector= ' #css-selector ' for precise targeting " ,
) ,
)
try :
page , ctx = await get_page ( session_id = session_id , cdp_url = cdp_url )
except BrowserNotAvailableError :
return make_result ( " skyvern_click " , ok = False , error = no_browser_error ( ) )
with Timer ( ) as timer :
try :
kwargs : dict [ str , Any ] = { " timeout " : timeout }
if button :
kwargs [ " button " ] = button
if click_count is not None :
kwargs [ " click_count " ] = click_count
if ai_mode is not None :
resolved = await page . click ( selector = selector , prompt = intent , ai = ai_mode , * * kwargs ) # type: ignore[arg-type]
else :
assert selector is not None
resolved = await page . click ( selector = selector , * * kwargs )
timer . mark ( " sdk " )
except PlaywrightTimeoutError as e :
return make_result (
" skyvern_click " ,
ok = False ,
browser_context = ctx ,
timing_ms = timer . timing_ms ,
error = make_error (
ErrorCode . SELECTOR_NOT_FOUND ,
str ( e ) ,
" Verify the selector matches an element on the page, or use intent for AI-powered finding " ,
) ,
)
except Exception as e :
code = ErrorCode . AI_FALLBACK_FAILED if ai_mode else ErrorCode . ACTION_FAILED
return make_result (
" skyvern_click " ,
ok = False ,
browser_context = ctx ,
timing_ms = timer . timing_ms ,
error = make_error (
code ,
str ( e ) ,
" The element may be hidden, disabled, or intercepted by another element " ,
) ,
)
data : dict [ str , Any ] = { " selector " : selector , " intent " : intent , " ai_mode " : ai_mode }
if resolved and resolved != selector :
data [ " resolved_selector " ] = resolved
# Build sdk_equivalent: prefer hybrid selector+prompt for production scripts.
# resolved_selector already contains the "xpath=" prefix (e.g. "xpath=//button[@id='x']"),
# so pass it directly as the selector positional arg.
resolved_sel = resolved if resolved and resolved != selector else selector
if resolved_sel and intent :
data [ " sdk_equivalent " ] = f ' await page.click( " { resolved_sel } " , prompt= " { intent } " ) '
elif ai_mode :
data [ " sdk_equivalent " ] = f ' await page.click(prompt= " { intent } " ) '
elif selector :
data [ " sdk_equivalent " ] = f ' await page.click( " { selector } " ) '
return make_result (
" skyvern_click " ,
browser_context = ctx ,
data = data ,
timing_ms = timer . timing_ms ,
)
async def skyvern_type (
text : Annotated [ str , " Text to type into the element " ] ,
session_id : Annotated [ str | None , Field ( description = " Browser session ID (pbs_...) " ) ] = None ,
cdp_url : Annotated [ str | None , Field ( description = " CDP WebSocket URL " ) ] = None ,
intent : Annotated [
2026-02-10 13:18:30 -08:00
str | None ,
Field (
description = " Natural language description of the input field. Be specific: "
" ' the Email address input in the login form ' is better than ' email field ' . "
" Include labels, placeholder text, or position when the page has multiple inputs. "
) ,
2026-02-07 02:33:13 -08:00
] = None ,
selector : Annotated [ str | None , Field ( description = " CSS selector or XPath for the input element " ) ] = None ,
2026-02-10 13:18:30 -08:00
timeout : Annotated [
int ,
Field (
description = " Max time to wait for the element in ms. Increase for slow-loading pages. Default 30000 (30s) " ,
ge = 1000 ,
le = 60000 ,
) ,
] = 30000 ,
2026-02-07 02:33:13 -08:00
clear : Annotated [ bool , Field ( description = " Clear existing content before typing " ) ] = True ,
delay : Annotated [ int | None , Field ( description = " Delay between keystrokes in ms " ) ] = None ,
) - > dict [ str , Any ] :
""" Type text into an input field. Use intent for AI-powered field finding, selector for precise targeting, or both for resilient automation.
2026-02-10 13:18:30 -08:00
For dropdowns , use skyvern_select_option instead . For pressing keys ( Enter , Tab ) , use skyvern_press_key .
Clears existing content by default ( set clear = false to append ) .
2026-02-07 02:33:13 -08:00
"""
ai_mode , err = _resolve_ai_mode ( selector , intent )
if err :
return make_result (
" skyvern_type " ,
ok = False ,
error = make_error (
ErrorCode . INVALID_INPUT ,
" Must provide intent, selector, or both " ,
" Use intent= ' describe the input field ' for AI-powered targeting, or selector= ' #css-selector ' for precise targeting " ,
) ,
)
try :
page , ctx = await get_page ( session_id = session_id , cdp_url = cdp_url )
except BrowserNotAvailableError :
return make_result ( " skyvern_type " , ok = False , error = no_browser_error ( ) )
with Timer ( ) as timer :
try :
if clear :
if ai_mode is not None :
await page . fill ( selector = selector , value = text , prompt = intent , ai = ai_mode , timeout = timeout ) # type: ignore[arg-type]
else :
assert selector is not None
await page . fill ( selector , text , timeout = timeout )
else :
kwargs : dict [ str , Any ] = { " timeout " : timeout }
if delay is not None :
kwargs [ " delay " ] = delay
if ai_mode is not None :
loc = page . locator ( selector = selector , prompt = intent , ai = ai_mode ) # type: ignore[arg-type]
await loc . type ( text , * * kwargs )
else :
assert selector is not None
await page . type ( selector , text , * * kwargs )
timer . mark ( " sdk " )
except PlaywrightTimeoutError as e :
return make_result (
" skyvern_type " ,
ok = False ,
browser_context = ctx ,
timing_ms = timer . timing_ms ,
error = make_error (
ErrorCode . SELECTOR_NOT_FOUND ,
str ( e ) ,
" Verify the selector matches an editable element, or use intent for AI-powered finding " ,
) ,
)
except Exception as e :
code = ErrorCode . AI_FALLBACK_FAILED if ai_mode else ErrorCode . ACTION_FAILED
return make_result (
" skyvern_type " ,
ok = False ,
browser_context = ctx ,
timing_ms = timer . timing_ms ,
error = make_error (
code ,
str ( e ) ,
" The element may not be editable or may be hidden " ,
) ,
)
# NOTE: The SDK fill() returns the typed value, not a resolved selector.
# Unlike click(), we cannot return resolved_selector here. SKY-7905 will
# update the SDK to return element metadata from all action methods.
data : dict [ str , Any ] = { " selector " : selector , " intent " : intent , " ai_mode " : ai_mode , " text_length " : len ( text ) }
# Build sdk_equivalent: prefer hybrid selector+prompt for production scripts
if selector and intent :
data [ " sdk_equivalent " ] = f ' await page.fill( " { selector } " , " { text } " , prompt= " { intent } " ) '
elif ai_mode :
data [ " sdk_equivalent " ] = f ' await page.fill(prompt= " { intent } " , value= " { text } " ) '
elif selector :
data [ " sdk_equivalent " ] = f ' await page.fill( " { selector } " , " { text } " ) '
return make_result (
" skyvern_type " ,
browser_context = ctx ,
data = data ,
timing_ms = timer . timing_ms ,
)
async def skyvern_screenshot (
session_id : Annotated [ str | None , Field ( description = " Browser session ID (pbs_...) " ) ] = None ,
cdp_url : Annotated [ str | None , Field ( description = " CDP WebSocket URL " ) ] = None ,
full_page : Annotated [ bool , Field ( description = " Capture full scrollable page " ) ] = False ,
selector : Annotated [ str | None , Field ( description = " CSS selector to screenshot specific element " ) ] = None ,
inline : Annotated [ bool , Field ( description = " Return base64 data instead of file path " ) ] = False ,
) - > dict [ str , Any ] :
2026-02-10 13:18:30 -08:00
""" See what ' s currently on the page. Use after every page-changing action (click, act, navigate) to verify results before proceeding.
2026-02-07 02:33:13 -08:00
2026-02-10 13:18:30 -08:00
Screenshots are visual - only — to extract structured data , use skyvern_extract instead .
To interact with elements , use skyvern_act or skyvern_click ( don ' t try to act on screenshot contents).
2026-02-07 02:33:13 -08:00
By default saves to ~ / . skyvern / artifacts / and returns the file path .
Set inline = true to get base64 data directly ( increases token usage ) .
"""
try :
page , ctx = await get_page ( session_id = session_id , cdp_url = cdp_url )
except BrowserNotAvailableError :
return make_result ( " skyvern_screenshot " , ok = False , error = no_browser_error ( ) )
with Timer ( ) as timer :
try :
if selector :
element = page . locator ( selector )
screenshot_bytes = await element . screenshot ( )
else :
screenshot_bytes = await page . screenshot ( full_page = full_page )
timer . mark ( " sdk " )
except Exception as e :
return make_result (
" skyvern_screenshot " ,
ok = False ,
browser_context = ctx ,
timing_ms = timer . timing_ms ,
error = make_error ( ErrorCode . ACTION_FAILED , str ( e ) , " Check that the page or element is visible " ) ,
)
if inline :
data_b64 = base64 . b64encode ( screenshot_bytes ) . decode ( " utf-8 " )
return make_result (
" skyvern_screenshot " ,
browser_context = ctx ,
data = {
" inline " : True ,
" data " : data_b64 ,
" mime " : " image/png " ,
" bytes " : len ( screenshot_bytes ) ,
" sdk_equivalent " : " await page.screenshot() " ,
} ,
timing_ms = timer . timing_ms ,
warnings = [ " Inline mode increases token usage " ] ,
)
ts = datetime . now ( timezone . utc ) . strftime ( " % H % M % S_ %f " )
filename = f " screenshot_ { ts } .png "
artifact = save_artifact (
screenshot_bytes ,
kind = " screenshot " ,
filename = filename ,
mime = " image/png " ,
session_id = ctx . session_id ,
)
return make_result (
" skyvern_screenshot " ,
browser_context = ctx ,
data = { " path " : artifact . path , " sdk_equivalent " : " await page.screenshot(path= ' screenshot.png ' ) " } ,
artifacts = [ artifact ] ,
timing_ms = timer . timing_ms ,
)
async def skyvern_scroll (
direction : Annotated [ str , Field ( description = " Direction: up, down, left, right " ) ] ,
session_id : Annotated [ str | None , Field ( description = " Browser session ID (pbs_...) " ) ] = None ,
cdp_url : Annotated [ str | None , Field ( description = " CDP WebSocket URL " ) ] = None ,
amount : Annotated [ int | None , Field ( description = " Pixels to scroll (default 500) " ) ] = None ,
intent : Annotated [
str | None , Field ( description = " Natural language description of element to scroll into view (uses AI) " )
] = None ,
selector : Annotated [ str | None , Field ( description = " CSS selector of scrollable element " ) ] = None ,
) - > dict [ str , Any ] :
""" Scroll the page or use AI to scroll a specific element into view.
Use ` intent ` to scroll an AI - located element into view ( with or without selector for hybrid fallback ) .
Without intent , scrolls the page or a selector - targeted container by pixel amount .
"""
valid_directions = ( " up " , " down " , " left " , " right " )
if not intent and direction not in valid_directions :
return make_result (
" skyvern_scroll " ,
ok = False ,
error = make_error (
ErrorCode . INVALID_INPUT , f " Invalid direction: { direction } " , " Use up, down, left, or right "
) ,
)
try :
page , ctx = await get_page ( session_id = session_id , cdp_url = cdp_url )
except BrowserNotAvailableError :
return make_result ( " skyvern_scroll " , ok = False , error = no_browser_error ( ) )
if intent :
ai_mode = " fallback " if selector else " proactive "
with Timer ( ) as timer :
try :
loc = page . locator ( selector = selector , prompt = intent , ai = ai_mode )
await loc . scroll_into_view_if_needed ( )
timer . mark ( " sdk " )
except Exception as e :
code = ErrorCode . AI_FALLBACK_FAILED if ai_mode == " fallback " else ErrorCode . ACTION_FAILED
return make_result (
" skyvern_scroll " ,
ok = False ,
browser_context = ctx ,
timing_ms = timer . timing_ms ,
error = make_error ( code , str ( e ) , " Could not find element to scroll into view " ) ,
)
return make_result (
" skyvern_scroll " ,
browser_context = ctx ,
data = {
" direction " : " into_view " ,
" intent " : intent ,
" ai_mode " : ai_mode ,
" sdk_equivalent " : (
f ' await page.locator( " { selector } " , prompt= " { intent } " ).scroll_into_view_if_needed() '
if selector
else f ' await page.locator(prompt= " { intent } " ).scroll_into_view_if_needed() '
) ,
} ,
timing_ms = timer . timing_ms ,
)
pixels = amount or 500
direction_map = {
" up " : ( 0 , - pixels ) ,
" down " : ( 0 , pixels ) ,
" left " : ( - pixels , 0 ) ,
" right " : ( pixels , 0 ) ,
}
dx , dy = direction_map [ direction ]
with Timer ( ) as timer :
try :
if selector :
await page . locator ( selector ) . evaluate ( f " el => el.scrollBy( { dx } , { dy } ) " )
else :
await page . evaluate ( f " window.scrollBy( { dx } , { dy } ) " )
timer . mark ( " sdk " )
except Exception as e :
return make_result (
" skyvern_scroll " ,
ok = False ,
browser_context = ctx ,
timing_ms = timer . timing_ms ,
error = make_error ( ErrorCode . ACTION_FAILED , str ( e ) , " Scroll action failed " ) ,
)
return make_result (
" skyvern_scroll " ,
browser_context = ctx ,
data = {
" direction " : direction ,
" pixels " : pixels ,
" sdk_equivalent " : f ' await page.evaluate( " window.scrollBy( { dx } , { dy } ) " ) ' ,
} ,
timing_ms = timer . timing_ms ,
)
async def skyvern_select_option (
value : Annotated [ str , " Value to select " ] ,
session_id : Annotated [ str | None , Field ( description = " Browser session ID (pbs_...) " ) ] = None ,
cdp_url : Annotated [ str | None , Field ( description = " CDP WebSocket URL " ) ] = None ,
intent : Annotated [ str | None , Field ( description = " Natural language description of the dropdown (uses AI) " ) ] = None ,
selector : Annotated [ str | None , Field ( description = " CSS selector for the select element " ) ] = None ,
2026-02-10 13:18:30 -08:00
timeout : Annotated [
int , Field ( description = " Max time to wait for the dropdown in ms. Default 30000 (30s) " , ge = 1000 , le = 60000 )
] = 30000 ,
2026-02-07 02:33:13 -08:00
by_label : Annotated [ bool , Field ( description = " Select by visible label instead of value " ) ] = False ,
) - > dict [ str , Any ] :
2026-02-10 13:18:30 -08:00
""" Select an option from a dropdown menu. Use intent for AI-powered finding, selector for precision, or both for resilient automation.
2026-02-07 02:33:13 -08:00
2026-02-10 13:18:30 -08:00
For free - text input fields , use skyvern_type instead . For non - dropdown buttons or links , use skyvern_click .
2026-02-07 02:33:13 -08:00
"""
ai_mode , err = _resolve_ai_mode ( selector , intent )
if err :
return make_result (
" skyvern_select_option " ,
ok = False ,
error = make_error (
ErrorCode . INVALID_INPUT ,
" Must provide intent, selector, or both " ,
" Use intent= ' describe the dropdown ' for AI-powered selection, or selector= ' #css-selector ' for precise targeting " ,
) ,
)
try :
page , ctx = await get_page ( session_id = session_id , cdp_url = cdp_url )
except BrowserNotAvailableError :
return make_result ( " skyvern_select_option " , ok = False , error = no_browser_error ( ) )
with Timer ( ) as timer :
try :
if ai_mode is not None :
# AI paths: pass value= directly -- the AI interprets the text
# regardless of whether it represents a value or label.
await page . select_option ( selector = selector , value = value , prompt = intent , ai = ai_mode , timeout = timeout ) # type: ignore[arg-type]
else :
assert selector is not None
if by_label :
# Bypass SkyvernPage to avoid value="" coercion conflicting with label kwarg.
await page . page . locator ( selector ) . select_option ( label = value , timeout = timeout )
else :
await page . select_option ( selector , value = value , timeout = timeout )
timer . mark ( " sdk " )
except Exception as e :
code = ErrorCode . AI_FALLBACK_FAILED if ai_mode else ErrorCode . ACTION_FAILED
return make_result (
" skyvern_select_option " ,
ok = False ,
browser_context = ctx ,
timing_ms = timer . timing_ms ,
error = make_error ( code , str ( e ) , " Check selector and available options " ) ,
)
# NOTE: The SDK select_option() returns the selected value, not a resolved
# selector. Unlike click(), we cannot return resolved_selector here.
# SKY-7905 will update the SDK to return element metadata from all action methods.
data : dict [ str , Any ] = { " selector " : selector , " intent " : intent , " ai_mode " : ai_mode , " value " : value }
# Build sdk_equivalent: prefer hybrid selector+prompt for production scripts
if selector and intent :
data [ " sdk_equivalent " ] = f ' await page.select_option( " { selector } " , value= " { value } " , prompt= " { intent } " ) '
elif ai_mode :
data [ " sdk_equivalent " ] = f ' await page.select_option(prompt= " { intent } " , value= " { value } " ) '
elif selector :
data [ " sdk_equivalent " ] = f ' await page.select_option( " { selector } " , value= " { value } " ) '
return make_result (
" skyvern_select_option " ,
browser_context = ctx ,
data = data ,
timing_ms = timer . timing_ms ,
)
async def skyvern_press_key (
key : Annotated [ str , " Key to press (e.g., Enter, Tab, Escape, ArrowDown) " ] ,
session_id : Annotated [ str | None , Field ( description = " Browser session ID (pbs_...) " ) ] = None ,
cdp_url : Annotated [ str | None , Field ( description = " CDP WebSocket URL " ) ] = None ,
intent : Annotated [
str | None , Field ( description = " Natural language description of element to focus first (uses AI) " )
] = None ,
selector : Annotated [ str | None , Field ( description = " CSS selector to focus first " ) ] = None ,
) - > dict [ str , Any ] :
""" Press a keyboard key -- Enter, Tab, Escape, arrow keys, shortcuts, etc.
Use ` intent ` or ` selector ` to focus a specific element before pressing .
Without either , presses the key on the currently focused element .
"""
try :
page , ctx = await get_page ( session_id = session_id , cdp_url = cdp_url )
except BrowserNotAvailableError :
return make_result ( " skyvern_press_key " , ok = False , error = no_browser_error ( ) )
with Timer ( ) as timer :
try :
if intent or selector :
ai_mode , _ = _resolve_ai_mode ( selector , intent )
if ai_mode is not None :
loc = page . locator ( selector = selector , prompt = intent , ai = ai_mode ) # type: ignore[arg-type]
else :
assert selector is not None
loc = page . locator ( selector )
await loc . press ( key )
else :
await page . keyboard . press ( key )
timer . mark ( " sdk " )
except Exception as e :
return make_result (
" skyvern_press_key " ,
ok = False ,
browser_context = ctx ,
timing_ms = timer . timing_ms ,
error = make_error ( ErrorCode . ACTION_FAILED , str ( e ) , " Check key name is valid " ) ,
)
if selector and intent :
sdk_eq = f ' await page.locator( " { selector } " , prompt= " { intent } " ).press( " { key } " ) '
elif intent :
sdk_eq = f ' await page.locator(prompt= " { intent } " ).press( " { key } " ) '
elif selector :
sdk_eq = f ' await page.locator( " { selector } " ).press( " { key } " ) '
else :
sdk_eq = f ' await page.keyboard.press( " { key } " ) '
return make_result (
" skyvern_press_key " ,
browser_context = ctx ,
data = {
" key " : key ,
" selector " : selector ,
" intent " : intent ,
" sdk_equivalent " : sdk_eq ,
} ,
timing_ms = timer . timing_ms ,
)
async def skyvern_wait (
session_id : Annotated [ str | None , Field ( description = " Browser session ID (pbs_...) " ) ] = None ,
cdp_url : Annotated [ str | None , Field ( description = " CDP WebSocket URL " ) ] = None ,
time_ms : Annotated [ int | None , Field ( description = " Time to wait in milliseconds " ) ] = None ,
intent : Annotated [ str | None , Field ( description = " Natural language condition to wait for (uses AI polling) " ) ] = None ,
selector : Annotated [ str | None , Field ( description = " CSS selector to wait for " ) ] = None ,
state : Annotated [ str | None , Field ( description = " Element state: visible, hidden, attached, detached " ) ] = " visible " ,
timeout : Annotated [ int , Field ( description = " Max wait time in milliseconds " , ge = 1000 , le = 120000 ) ] = 30000 ,
poll_interval_ms : Annotated [
int , Field ( description = " Polling interval for intent-based waits in ms " , ge = 500 , le = 10000 )
] = 5000 ,
) - > dict [ str , Any ] :
""" Wait for a condition, element, or time delay before proceeding. Use intent for AI-powered condition checking.
Use ` intent ` to poll with AI validation ( e . g . , " wait until the loading spinner disappears " ) .
Use ` selector ` to wait for an element state . Use ` time_ms ` for a simple delay .
"""
valid_states = ( " visible " , " hidden " , " attached " , " detached " )
if state is not None and state not in valid_states :
return make_result (
" skyvern_wait " ,
ok = False ,
error = make_error (
ErrorCode . INVALID_INPUT ,
f " Invalid state: { state } " ,
" Use visible, hidden, attached, or detached " ,
) ,
)
if time_ms is None and not selector and not intent :
return make_result (
" skyvern_wait " ,
ok = False ,
error = make_error (
ErrorCode . INVALID_INPUT ,
" Must provide intent, selector, or time_ms " ,
" Use intent= ' condition to wait for ' for AI-powered waiting, selector= ' #element ' for element visibility, or time_ms=5000 for a delay " ,
) ,
)
try :
page , ctx = await get_page ( session_id = session_id , cdp_url = cdp_url )
except BrowserNotAvailableError :
return make_result ( " skyvern_wait " , ok = False , error = no_browser_error ( ) )
with Timer ( ) as timer :
try :
if time_ms is not None :
await page . wait_for_timeout ( time_ms )
waited_for = " time "
elif intent :
loop = asyncio . get_running_loop ( )
deadline = loop . time ( ) + timeout / 1000
last_error : Exception | None = None
while True :
try :
result = await page . validate ( intent )
last_error = None
except Exception as poll_err :
result = False
last_error = poll_err
if result :
break
if loop . time ( ) > = deadline :
code = ErrorCode . SDK_ERROR if last_error else ErrorCode . TIMEOUT
msg = str ( last_error ) if last_error else f " Condition not met within { timeout } ms: { intent } "
return make_result (
" skyvern_wait " ,
ok = False ,
browser_context = ctx ,
timing_ms = timer . timing_ms ,
error = make_error (
code ,
msg ,
" Increase timeout or check that the condition can be satisfied " ,
) ,
)
await page . wait_for_timeout ( poll_interval_ms )
waited_for = " intent "
elif selector :
await page . wait_for_selector ( selector , state = state , timeout = timeout )
waited_for = " selector "
timer . mark ( " sdk " )
except Exception as e :
return make_result (
" skyvern_wait " ,
ok = False ,
browser_context = ctx ,
timing_ms = timer . timing_ms ,
error = make_error ( ErrorCode . TIMEOUT , str ( e ) , " Condition was not met within timeout " ) ,
)
sdk_eq = " "
if waited_for == " time " :
sdk_eq = f " await page.wait_for_timeout( { time_ms } ) "
elif waited_for == " intent " :
sdk_eq = f ' await page.validate( " { intent } " ) '
elif waited_for == " selector " :
sdk_eq = f ' await page.wait_for_selector( " { selector } " ) '
return make_result (
" skyvern_wait " ,
browser_context = ctx ,
data = { " waited_for " : waited_for , " sdk_equivalent " : sdk_eq } ,
timing_ms = timer . timing_ms ,
)
async def skyvern_evaluate (
expression : Annotated [ str , " JavaScript expression to evaluate " ] ,
session_id : Annotated [ str | None , Field ( description = " Browser session ID (pbs_...) " ) ] = None ,
cdp_url : Annotated [ str | None , Field ( description = " CDP WebSocket URL " ) ] = None ,
) - > dict [ str , Any ] :
""" Run JavaScript on the page to read DOM state, get URLs, check values, or discover CSS selectors for faster subsequent actions.
Security : This executes arbitrary JS in the page context . Only use with trusted expressions .
"""
try :
page , ctx = await get_page ( session_id = session_id , cdp_url = cdp_url )
except BrowserNotAvailableError :
return make_result ( " skyvern_evaluate " , ok = False , error = no_browser_error ( ) )
with Timer ( ) as timer :
try :
result = await page . evaluate ( expression )
timer . mark ( " sdk " )
except Exception as e :
return make_result (
" skyvern_evaluate " ,
ok = False ,
browser_context = ctx ,
timing_ms = timer . timing_ms ,
error = make_error ( ErrorCode . ACTION_FAILED , str ( e ) , " Check JavaScript syntax " ) ,
)
return make_result (
" skyvern_evaluate " ,
browser_context = ctx ,
data = { " result " : result , " sdk_equivalent " : f ' await page.evaluate( " { expression [ : 80 ] } " ) ' } ,
timing_ms = timer . timing_ms ,
)
async def skyvern_extract (
prompt : Annotated [ str , " Natural language description of what data to extract from the page " ] ,
session_id : Annotated [ str | None , Field ( description = " Browser session ID (pbs_...) " ) ] = None ,
cdp_url : Annotated [ str | None , Field ( description = " CDP WebSocket URL " ) ] = None ,
schema : Annotated [
str | None , Field ( description = " JSON Schema string defining the expected output structure " )
] = None ,
) - > dict [ str , Any ] :
2026-02-10 13:18:30 -08:00
""" Get structured data from any website — prices, listings, articles, tables, contact info, etc. Use this instead of writing scraping code or guessing API endpoints. Describe what you need in natural language.
2026-02-07 02:33:13 -08:00
2026-02-10 13:18:30 -08:00
Reads the CURRENT page — call skyvern_navigate first to go to the right URL .
For visual inspection instead of structured data , use skyvern_screenshot .
2026-02-07 02:33:13 -08:00
Optionally provide a JSON ` schema ` to enforce the output structure ( pass as a JSON string ) .
"""
parsed_schema : dict [ str , Any ] | None = None
if schema is not None :
try :
parsed_schema = json . loads ( schema )
except ( json . JSONDecodeError , TypeError ) as e :
return make_result (
" skyvern_extract " ,
ok = False ,
error = make_error (
ErrorCode . INVALID_INPUT ,
f " Invalid JSON schema: { e } " ,
" Provide schema as a valid JSON string " ,
) ,
)
try :
page , ctx = await get_page ( session_id = session_id , cdp_url = cdp_url )
except BrowserNotAvailableError :
return make_result ( " skyvern_extract " , ok = False , error = no_browser_error ( ) )
with Timer ( ) as timer :
try :
extracted = await page . extract ( prompt = prompt , schema = parsed_schema )
timer . mark ( " sdk " )
except Exception as e :
return make_result (
" skyvern_extract " ,
ok = False ,
browser_context = ctx ,
timing_ms = timer . timing_ms ,
error = make_error ( ErrorCode . SDK_ERROR , str ( e ) , " Check that the page has loaded and the prompt is clear " ) ,
)
return make_result (
" skyvern_extract " ,
browser_context = ctx ,
data = { " extracted " : extracted , " sdk_equivalent " : f ' await page.extract(prompt= " { prompt } " ) ' } ,
timing_ms = timer . timing_ms ,
)
async def skyvern_validate (
prompt : Annotated [ str , " Validation condition to check (e.g., ' the login form is visible ' ) " ] ,
session_id : Annotated [ str | None , Field ( description = " Browser session ID (pbs_...) " ) ] = None ,
cdp_url : Annotated [ str | None , Field ( description = " CDP WebSocket URL " ) ] = None ,
) - > dict [ str , Any ] :
2026-02-10 13:18:30 -08:00
""" Check if something is true on the current page using AI — ' is the user logged in? ' , ' does the cart have 3 items? ' , ' is the form submitted? '
2026-02-07 02:33:13 -08:00
2026-02-10 13:18:30 -08:00
Reads the CURRENT page — navigate first . Returns true / false .
To extract data ( not just check a condition ) , use skyvern_extract instead .
2026-02-07 02:33:13 -08:00
"""
try :
page , ctx = await get_page ( session_id = session_id , cdp_url = cdp_url )
except BrowserNotAvailableError :
return make_result ( " skyvern_validate " , ok = False , error = no_browser_error ( ) )
with Timer ( ) as timer :
try :
valid = await page . validate ( prompt )
timer . mark ( " sdk " )
except Exception as e :
return make_result (
" skyvern_validate " ,
ok = False ,
browser_context = ctx ,
timing_ms = timer . timing_ms ,
error = make_error ( ErrorCode . SDK_ERROR , str ( e ) , " Check that the page has loaded and the prompt is clear " ) ,
)
return make_result (
" skyvern_validate " ,
browser_context = ctx ,
data = { " prompt " : prompt , " valid " : valid , " sdk_equivalent " : f ' await page.validate( " { prompt } " ) ' } ,
timing_ms = timer . timing_ms ,
)
async def skyvern_act (
prompt : Annotated [ str , " Natural language instruction for the action to perform (e.g., ' close the cookie banner ' ) " ] ,
session_id : Annotated [ str | None , Field ( description = " Browser session ID (pbs_...) " ) ] = None ,
cdp_url : Annotated [ str | None , Field ( description = " CDP WebSocket URL " ) ] = None ,
) - > dict [ str , Any ] :
2026-02-10 13:18:30 -08:00
""" Perform actions on a web page by describing what to do in plain English — click buttons, close popups, fill forms, scroll to sections, interact with menus.
2026-02-07 02:33:13 -08:00
The AI agent interprets the prompt and executes the appropriate browser actions .
2026-02-10 13:18:30 -08:00
You can chain multiple actions in one prompt : " close the cookie banner, then click Sign In " .
For multi - step automations ( 4 + pages ) , use skyvern_workflow_create with one block per step .
For quick one - off multi - page tasks , use skyvern_run_task .
2026-02-07 02:33:13 -08:00
"""
try :
page , ctx = await get_page ( session_id = session_id , cdp_url = cdp_url )
except BrowserNotAvailableError :
return make_result ( " skyvern_act " , ok = False , error = no_browser_error ( ) )
with Timer ( ) as timer :
try :
await page . act ( prompt )
timer . mark ( " sdk " )
except Exception as e :
return make_result (
" skyvern_act " ,
ok = False ,
browser_context = ctx ,
timing_ms = timer . timing_ms ,
error = make_error ( ErrorCode . SDK_ERROR , str ( e ) , " Simplify the prompt or break the task into steps " ) ,
)
return make_result (
" skyvern_act " ,
browser_context = ctx ,
data = { " prompt " : prompt , " completed " : True , " sdk_equivalent " : f ' await page.act( " { prompt } " ) ' } ,
timing_ms = timer . timing_ms ,
)
async def skyvern_run_task (
prompt : Annotated [ str , " Natural language description of the task to automate " ] ,
session_id : Annotated [ str | None , Field ( description = " Browser session ID (pbs_...) " ) ] = None ,
cdp_url : Annotated [ str | None , Field ( description = " CDP WebSocket URL " ) ] = None ,
url : Annotated [
str | None , Field ( description = " URL to navigate to before running (uses current page if omitted) " )
] = None ,
data_extraction_schema : Annotated [
str | None , Field ( description = " JSON Schema string defining what data to extract " )
] = None ,
max_steps : Annotated [ int | None , Field ( description = " Maximum number of agent steps " ) ] = None ,
timeout_seconds : Annotated [
int , Field ( description = " Timeout in seconds (default 180s = 3 minutes) " , ge = 10 , le = 1800 )
] = 180 ,
) - > dict [ str , Any ] :
2026-02-10 13:18:30 -08:00
""" Run a quick, one-off web task via an autonomous AI agent. Nothing is saved — use for throwaway tests and exploration only. Best for tasks describable in 2-3 sentences.
2026-02-07 02:33:13 -08:00
2026-02-10 13:18:30 -08:00
For anything reusable , multi - step , or worth keeping , use skyvern_workflow_create instead — it produces a versioned , rerunnable workflow with per - step observability .
For simple single - step actions on the current page , use skyvern_act instead .
2026-02-07 02:33:13 -08:00
"""
try :
page , ctx = await get_page ( session_id = session_id , cdp_url = cdp_url )
except BrowserNotAvailableError :
return make_result ( " skyvern_run_task " , ok = False , error = no_browser_error ( ) )
parsed_schema : dict [ str , Any ] | str | None = None
if data_extraction_schema is not None :
try :
parsed_schema = json . loads ( data_extraction_schema )
except ( json . JSONDecodeError , TypeError ) as e :
return make_result (
" skyvern_run_task " ,
ok = False ,
browser_context = ctx ,
error = make_error (
ErrorCode . INVALID_INPUT ,
f " Invalid data_extraction_schema JSON: { e } " ,
" Provide schema as a valid JSON string " ,
) ,
)
with Timer ( ) as timer :
try :
response = await page . agent . run_task (
prompt = prompt ,
url = url ,
data_extraction_schema = parsed_schema ,
max_steps = max_steps ,
timeout = timeout_seconds ,
)
timer . mark ( " sdk " )
except Exception as e :
return make_result (
" skyvern_run_task " ,
ok = False ,
browser_context = ctx ,
timing_ms = timer . timing_ms ,
error = make_error ( ErrorCode . SDK_ERROR , str ( e ) , " Check the prompt, URL, and timeout settings " ) ,
)
return make_result (
" skyvern_run_task " ,
browser_context = ctx ,
data = {
" run_id " : response . run_id ,
" status " : response . status ,
" output " : response . output ,
" failure_reason " : response . failure_reason ,
" recording_url " : response . recording_url ,
" app_url " : response . app_url ,
" sdk_equivalent " : f ' await page.agent.run_task(prompt= " { prompt } " ) ' ,
} ,
timing_ms = timer . timing_ms ,
)