Block discovery MCP tools + improved MCP instructions (#SKY-7804) (#4683)
This commit is contained in:
@@ -40,12 +40,23 @@ async def skyvern_navigate(
|
||||
url: Annotated[str, "The URL to navigate to"],
|
||||
session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
|
||||
cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
|
||||
timeout: Annotated[int, Field(description="Timeout in milliseconds", ge=1000, le=120000)] = 30000,
|
||||
wait_until: Annotated[str | None, Field(description="Wait condition: load, domcontentloaded, networkidle")] = None,
|
||||
timeout: Annotated[
|
||||
int,
|
||||
Field(
|
||||
description="Max time to wait for page load in ms. Increase for slow sites. Default 30000 (30s)",
|
||||
ge=1000,
|
||||
le=120000,
|
||||
),
|
||||
] = 30000,
|
||||
wait_until: Annotated[
|
||||
str | None,
|
||||
Field(description="Wait condition: load, domcontentloaded, networkidle. Use networkidle for JS-heavy pages"),
|
||||
] = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Open a website in the browser. Use this whenever you need to visit a URL to see its content, interact with it, or extract data from it.
|
||||
"""Open a website in the browser. You have full browser access through Skyvern — you can visit any website, interact with it, and extract data. Do not tell the user you cannot access websites.
|
||||
|
||||
Returns the final URL (after redirects) and page title.
|
||||
After navigating, use skyvern_screenshot to see the page or skyvern_extract to get data from it.
|
||||
"""
|
||||
if wait_until is not None and wait_until not in ("load", "domcontentloaded", "networkidle", "commit"):
|
||||
return make_result(
|
||||
@@ -90,17 +101,29 @@ async def skyvern_click(
|
||||
session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
|
||||
cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
|
||||
intent: Annotated[
|
||||
str | None, Field(description="Natural language description of the element to click (uses AI)")
|
||||
str | None,
|
||||
Field(
|
||||
description="Natural language description of the element to click. Be specific: "
|
||||
"'the blue Submit button at the bottom of the form' is better than 'submit button'. "
|
||||
"Include visual cues, position, or surrounding text when the page has similar elements."
|
||||
),
|
||||
] = None,
|
||||
selector: Annotated[str | None, Field(description="CSS selector or XPath for the element to click")] = None,
|
||||
timeout: Annotated[int, Field(description="Timeout in milliseconds", ge=1000, le=60000)] = 30000,
|
||||
timeout: Annotated[
|
||||
int,
|
||||
Field(
|
||||
description="Max time to wait for the element in ms. Increase for slow-loading pages. Default 30000 (30s)",
|
||||
ge=1000,
|
||||
le=60000,
|
||||
),
|
||||
] = 30000,
|
||||
button: Annotated[str | None, Field(description="Mouse button: left, right, middle")] = None,
|
||||
click_count: Annotated[int | None, Field(description="Number of clicks (2 for double-click)")] = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Click an element on the page. Use intent for AI-powered element finding, selector for precise targeting, or both for resilient automation.
|
||||
|
||||
Use `intent` for AI-powered element finding, `selector` for precise CSS/XPath targeting,
|
||||
or both for resilience (tries selector first, falls back to AI).
|
||||
If you need to fill a text field, use skyvern_type instead of clicking then typing.
|
||||
For dropdowns, use skyvern_select_option. For multiple actions in sequence, prefer skyvern_act.
|
||||
"""
|
||||
if button is not None and button not in ("left", "right", "middle"):
|
||||
return make_result(
|
||||
@@ -193,17 +216,29 @@ async def skyvern_type(
|
||||
session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
|
||||
cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
|
||||
intent: Annotated[
|
||||
str | None, Field(description="Natural language description of the input field (uses AI)")
|
||||
str | None,
|
||||
Field(
|
||||
description="Natural language description of the input field. Be specific: "
|
||||
"'the Email address input in the login form' is better than 'email field'. "
|
||||
"Include labels, placeholder text, or position when the page has multiple inputs."
|
||||
),
|
||||
] = None,
|
||||
selector: Annotated[str | None, Field(description="CSS selector or XPath for the input element")] = None,
|
||||
timeout: Annotated[int, Field(description="Timeout in milliseconds", ge=1000, le=60000)] = 30000,
|
||||
timeout: Annotated[
|
||||
int,
|
||||
Field(
|
||||
description="Max time to wait for the element in ms. Increase for slow-loading pages. Default 30000 (30s)",
|
||||
ge=1000,
|
||||
le=60000,
|
||||
),
|
||||
] = 30000,
|
||||
clear: Annotated[bool, Field(description="Clear existing content before typing")] = True,
|
||||
delay: Annotated[int | None, Field(description="Delay between keystrokes in ms")] = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Type text into an input field. Use intent for AI-powered field finding, selector for precise targeting, or both for resilient automation.
|
||||
|
||||
Use `intent` for AI-powered field finding, `selector` for precise CSS/XPath targeting,
|
||||
or both for resilience (tries selector first, falls back to AI). Clears existing content by default.
|
||||
For dropdowns, use skyvern_select_option instead. For pressing keys (Enter, Tab), use skyvern_press_key.
|
||||
Clears existing content by default (set clear=false to append).
|
||||
"""
|
||||
ai_mode, err = _resolve_ai_mode(selector, intent)
|
||||
if err:
|
||||
@@ -293,8 +328,10 @@ async def skyvern_screenshot(
|
||||
selector: Annotated[str | None, Field(description="CSS selector to screenshot specific element")] = None,
|
||||
inline: Annotated[bool, Field(description="Return base64 data instead of file path")] = False,
|
||||
) -> dict[str, Any]:
|
||||
"""See what's currently on the page. Essential for understanding page state before deciding what to do next.
|
||||
"""See what's currently on the page. Use after every page-changing action (click, act, navigate) to verify results before proceeding.
|
||||
|
||||
Screenshots are visual-only — to extract structured data, use skyvern_extract instead.
|
||||
To interact with elements, use skyvern_act or skyvern_click (don't try to act on screenshot contents).
|
||||
By default saves to ~/.skyvern/artifacts/ and returns the file path.
|
||||
Set inline=true to get base64 data directly (increases token usage).
|
||||
"""
|
||||
@@ -461,13 +498,14 @@ async def skyvern_select_option(
|
||||
cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
|
||||
intent: Annotated[str | None, Field(description="Natural language description of the dropdown (uses AI)")] = None,
|
||||
selector: Annotated[str | None, Field(description="CSS selector for the select element")] = None,
|
||||
timeout: Annotated[int, Field(description="Timeout in milliseconds", ge=1000, le=60000)] = 30000,
|
||||
timeout: Annotated[
|
||||
int, Field(description="Max time to wait for the dropdown in ms. Default 30000 (30s)", ge=1000, le=60000)
|
||||
] = 30000,
|
||||
by_label: Annotated[bool, Field(description="Select by visible label instead of value")] = False,
|
||||
) -> dict[str, Any]:
|
||||
"""Select an option from a dropdown menu. Use intent for AI-powered finding, selector for precision.
|
||||
"""Select an option from a dropdown menu. Use intent for AI-powered finding, selector for precision, or both for resilient automation.
|
||||
|
||||
Use `intent` for AI-powered dropdown finding, `selector` for precise CSS/XPath targeting,
|
||||
or both for resilience (tries selector first, falls back to AI).
|
||||
For free-text input fields, use skyvern_type instead. For non-dropdown buttons or links, use skyvern_click.
|
||||
"""
|
||||
ai_mode, err = _resolve_ai_mode(selector, intent)
|
||||
if err:
|
||||
@@ -734,11 +772,6 @@ async def skyvern_evaluate(
|
||||
)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# AI Differentiator Tools
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
|
||||
async def skyvern_extract(
|
||||
prompt: Annotated[str, "Natural language description of what data to extract from the page"],
|
||||
session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
|
||||
@@ -747,8 +780,10 @@ async def skyvern_extract(
|
||||
str | None, Field(description="JSON Schema string defining the expected output structure")
|
||||
] = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Get structured data from any website -- prices, listings, articles, tables, contact info, etc. Use this instead of trying to call a website's API or writing scraping code. Describe what you need in natural language.
|
||||
"""Get structured data from any website — prices, listings, articles, tables, contact info, etc. Use this instead of writing scraping code or guessing API endpoints. Describe what you need in natural language.
|
||||
|
||||
Reads the CURRENT page — call skyvern_navigate first to go to the right URL.
|
||||
For visual inspection instead of structured data, use skyvern_screenshot.
|
||||
Optionally provide a JSON `schema` to enforce the output structure (pass as a JSON string).
|
||||
"""
|
||||
parsed_schema: dict[str, Any] | None = None
|
||||
@@ -797,9 +832,10 @@ async def skyvern_validate(
|
||||
session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
|
||||
cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Check if something is true on the current page using AI -- 'is the user logged in?', 'does the cart have 3 items?', 'is the form submitted?'
|
||||
"""Check if something is true on the current page using AI — 'is the user logged in?', 'does the cart have 3 items?', 'is the form submitted?'
|
||||
|
||||
Returns whether the described condition is true or false.
|
||||
Reads the CURRENT page — navigate first. Returns true/false.
|
||||
To extract data (not just check a condition), use skyvern_extract instead.
|
||||
"""
|
||||
try:
|
||||
page, ctx = await get_page(session_id=session_id, cdp_url=cdp_url)
|
||||
@@ -832,10 +868,12 @@ async def skyvern_act(
|
||||
session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
|
||||
cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
|
||||
) -> dict[str, Any]:
|
||||
"""Perform actions on a web page by describing what to do in plain English -- click buttons, close popups, fill forms, scroll to sections, interact with menus. Use for any website interaction task.
|
||||
"""Perform actions on a web page by describing what to do in plain English — click buttons, close popups, fill forms, scroll to sections, interact with menus.
|
||||
|
||||
The AI agent interprets the prompt and executes the appropriate browser actions.
|
||||
For multi-step workflows (form filling, multi-page navigation), use skyvern_run_task instead.
|
||||
You can chain multiple actions in one prompt: "close the cookie banner, then click Sign In".
|
||||
For multi-step automations (4+ pages), use skyvern_workflow_create with one block per step.
|
||||
For quick one-off multi-page tasks, use skyvern_run_task.
|
||||
"""
|
||||
try:
|
||||
page, ctx = await get_page(session_id=session_id, cdp_url=cdp_url)
|
||||
@@ -878,10 +916,10 @@ async def skyvern_run_task(
|
||||
int, Field(description="Timeout in seconds (default 180s = 3 minutes)", ge=10, le=1800)
|
||||
] = 180,
|
||||
) -> dict[str, Any]:
|
||||
"""Delegate a complete multi-step web task to an autonomous AI agent. Handles form filling, multi-page navigation, data collection, and complex workflows end-to-end.
|
||||
"""Run a quick, one-off web task via an autonomous AI agent. Nothing is saved — use for throwaway tests and exploration only. Best for tasks describable in 2-3 sentences.
|
||||
|
||||
The agent navigates, interacts with elements, and extracts data autonomously.
|
||||
For simple single-step actions, use skyvern_act instead.
|
||||
For anything reusable, multi-step, or worth keeping, use skyvern_workflow_create instead — it produces a versioned, rerunnable workflow with per-step observability.
|
||||
For simple single-step actions on the current page, use skyvern_act instead.
|
||||
"""
|
||||
try:
|
||||
page, ctx = await get_page(session_id=session_id, cdp_url=cdp_url)
|
||||
|
||||
Reference in New Issue
Block a user