diff --git a/skyvern/cli/mcp_tools/__init__.py b/skyvern/cli/mcp_tools/__init__.py index 04c54b8f..f2c3113c 100644 --- a/skyvern/cli/mcp_tools/__init__.py +++ b/skyvern/cli/mcp_tools/__init__.py @@ -33,6 +33,7 @@ from .credential import ( skyvern_credential_get, skyvern_credential_list, ) +from .prompts import build_workflow, debug_automation, extract_data from .session import ( skyvern_session_close, skyvern_session_connect, @@ -213,7 +214,6 @@ Use `{{parameter_key}}` to reference workflow input parameters in any block fiel Common block types for workflow definitions: - **navigation** — take actions on a page: fill forms, click buttons, navigate multi-step flows (most common) - **extraction** — extract structured data from the current page -- **task_v2** — complex tasks via natural language prompt (handles both actions and extraction) - **for_loop** — iterate over a list of items - **conditional** — branch based on conditions - **code** — run Python code for data transformation @@ -305,6 +305,11 @@ mcp.tool()(skyvern_workflow_run) mcp.tool()(skyvern_workflow_status) mcp.tool()(skyvern_workflow_cancel) +# -- Prompts (methodology guides injected into LLM conversations) -- +mcp.prompt()(build_workflow) +mcp.prompt()(debug_automation) +mcp.prompt()(extract_data) + __all__ = [ "mcp", # Session @@ -346,4 +351,8 @@ __all__ = [ "skyvern_workflow_run", "skyvern_workflow_status", "skyvern_workflow_cancel", + # Prompts + "build_workflow", + "debug_automation", + "extract_data", ] diff --git a/skyvern/cli/mcp_tools/blocks.py b/skyvern/cli/mcp_tools/blocks.py index 07a116e8..3d610ffb 100644 --- a/skyvern/cli/mcp_tools/blocks.py +++ b/skyvern/cli/mcp_tools/blocks.py @@ -85,7 +85,6 @@ BLOCK_TYPE_MAP: dict[str, type[BlockYAML]] = { BLOCK_SUMMARIES: dict[str, str] = { "navigation": "Take actions on a page: fill forms, click buttons, navigate multi-step flows (most common)", "extraction": "Extract structured data from the current page", - "task_v2": "Complex tasks via natural language prompt — handles both actions and extraction", "for_loop": "Iterate over a list, executing nested blocks for each item", "conditional": "Branch based on Jinja2 expressions or AI prompts", "code": "Run Python code for data transformation", @@ -138,12 +137,6 @@ BLOCK_EXAMPLES: dict[str, dict[str, Any]] = { }, }, }, - "task_v2": { - "block_type": "task_v2", - "label": "book_flight", - "url": "https://booking.example.com", - "prompt": "Book a flight from {{ origin }} to {{ destination }} on {{ date }}", - }, "for_loop": { "block_type": "for_loop", "label": "process_each_url", @@ -297,7 +290,7 @@ async def skyvern_block_schema( block_type: Annotated[ str | None, Field( - description="Block type to get schema for (e.g., 'navigation', 'extraction', 'task_v2'). Omit to list all available types." + description="Block type to get schema for (e.g., 'navigation', 'extraction', 'for_loop'). Omit to list all available types." ), ] = None, ) -> dict[str, Any]: @@ -323,7 +316,7 @@ async def skyvern_block_schema( normalized = block_type.strip().lower() - task_redirect = normalized == "task" + task_redirect = normalized in ("task", "task_v2") if task_redirect: normalized = "navigation" @@ -344,7 +337,7 @@ async def skyvern_block_schema( warnings = ( [ - "'task' is deprecated. Showing 'navigation' schema instead. Use 'navigation' for actions (requires navigation_goal) and 'extraction' for data extraction (requires data_extraction_goal + data_schema)." + f"'{block_type}' is deprecated. Showing 'navigation' schema instead. Use 'navigation' for actions (requires navigation_goal) and 'extraction' for data extraction (requires data_extraction_goal + data_schema)." ] if task_redirect else [] @@ -426,9 +419,9 @@ async def skyvern_block_validate( try: block = adapter.validate_python(raw) warnings = [] - if block.block_type == "task": + if block.block_type in ("task", "task_v2"): warnings.append( - "'task' block type is deprecated. Use 'navigation' for actions and 'extraction' for data extraction." + f"'{block.block_type}' block type is deprecated. Use 'navigation' for actions and 'extraction' for data extraction." ) return make_result( action, diff --git a/skyvern/cli/mcp_tools/prompts.py b/skyvern/cli/mcp_tools/prompts.py new file mode 100644 index 00000000..9d3fa9a8 --- /dev/null +++ b/skyvern/cli/mcp_tools/prompts.py @@ -0,0 +1,430 @@ +"""MCP prompt skills for Skyvern workflow design, debugging, and data extraction. + +These prompts are registered with @mcp.prompt() and injected into LLM conversations +to guide Claude through Skyvern automation tasks. Each prompt teaches Claude to act +as the USER — designing workflows and interpreting results — while Skyvern's AI +handles the actual browser navigation. + +Milestone context: + M3: Expose MCP functionality as Claude Skills (this module) + M4: Expose Skyvern Workflow Copilot via MCP + Skills (these prompts power it) +""" + +from typing import Annotated + +from pydantic import Field + +# --------------------------------------------------------------------------- +# build_workflow +# --------------------------------------------------------------------------- + +BUILD_WORKFLOW_CONTENT = """\ +# Build a Skyvern Workflow + +## How Skyvern Workflows Work + +You create a workflow definition (blocks + parameters). Skyvern executes it in a cloud browser. +Each block runs in order, sharing the same browser session. The key differentiator is +**navigation blocks**: you describe a goal in natural language and Skyvern's AI navigates the site +autonomously — clicking, filling forms, handling popups, and retrying on failure. You do NOT +need to specify selectors, XPaths, or step-by-step instructions. + +Workflows are versioned, parameterized, and reusable. Once a workflow works, you can re-run it +with different inputs forever. Think of each workflow as a saved automation skill. + +Tools you will use: skyvern_workflow_create, skyvern_workflow_run, skyvern_workflow_status, +skyvern_workflow_update, skyvern_block_schema, skyvern_block_validate. + +--- + +## Design the Workflow + +### Default to navigation blocks + +navigation is the right choice for most steps. You give it a URL and a navigation_goal. +Skyvern figures out the navigation at runtime. + +GOOD navigation_goal — describes the GOAL and what "done" looks like: + "Search for '{{product_name}}' in the search bar, click the first result, and add it to cart. + Done when the cart icon shows 1 item." + +BAD navigation_goal — describes HOW to do it (Skyvern already knows): + "Find the input element with id='search', type the product name, press Enter, wait for + results to load, find the first anchor tag in the results div, click it..." + +### Block type decision tree + +1. **navigation** (default) — AI-powered browser actions. Use when the step involves browsing, + clicking, filling forms, or any multi-action sequence. Skyvern handles element finding, + popup dismissal, scrolling, and retries. +2. **extraction** — structured data extraction. Use when you need JSON output from a page + (prices, tables, lists). Requires data_extraction_goal and data_schema. +3. **for_loop** — iterate over a list. Use when you need to repeat blocks for each item + in a parameter list (e.g., process each URL, each product). +4. **goto_url** — simple navigation without any actions. Use to jump to a known URL. +5. **login** — authenticate with stored credentials. Use for sites that require login. +6. **code** — run Python for data transformation between blocks. +7. **action** — single focused action on the current page (e.g., click one button). + +### One block per logical step + +Split workflows into small, focused blocks. Each block should do ONE thing. + +GOOD (3 blocks): + Block 1 (navigation): "Go to the search page and search for '{{query}}'. Done when results load." + Block 2 (navigation): "Click the first result and add it to cart. Done when cart shows 1 item." + Block 3 (extraction): "Extract the product name, price, and availability from the cart page." + +BAD (1 block): + Block 1 (navigation): "Search for the product, click the first result, add to cart, go to checkout, + fill in shipping, enter payment, and submit the order." + +### Common workflow shapes + +**Search + Extract**: goto_url -> navigation (search) -> extraction (results) +**Multi-page form**: navigation (page 1) -> navigation (page 2) -> navigation (page 3) -> extraction (confirmation) +**Login + Action**: login (authenticate) -> navigation (do work) -> extraction (results) +**Batch processing**: for_loop over URLs -> navigation (process each) -> extraction (gather data) + +--- + +## Parameterize for Reuse + +Every workflow should accept parameters so it can be re-run with different inputs. + +### Declaring parameters + +Parameters are declared in the workflow_definition.parameters array: + {"parameter_type": "workflow", "key": "company_name", "workflow_parameter_type": "string"} + +Supported types: string, integer, float, boolean, json, file_url. + +### Referencing parameters + +Use {{parameter_key}} in any block field — prompts, URLs, data schemas, goal descriptions. +Skyvern substitutes values at runtime. + +### What to parameterize + +- Input data: names, addresses, search queries, product IDs +- URLs: if the target URL varies between runs +- Credentials: use the login block + credential_id parameter + +### What NOT to parameterize + +- Navigation instructions: these are the block prompts themselves +- Block structure: if you need different flows, create separate workflows +- Static site URLs: if the URL is always the same, hardcode it + +--- + +## Test via Skyvern's Feedback Loop + +Do NOT try to get the workflow perfect on the first attempt. Use this iteration loop: + +### Step 1: Create and run + +Call skyvern_workflow_create with your definition, then skyvern_workflow_run with test parameters. + +### Step 2: Check status + +Poll with skyvern_workflow_status using the run_id. The response tells you: +- Which block succeeded or failed +- The failure_reason with what Skyvern saw on the page +- Step count and timing + +### Step 3: Fix and re-run + +Based on the error feedback: +- **"Prompt too vague"** — add specificity about what "done" looks like. Example: change + "Fill in the form" to "Fill in the form with company name '{{name}}'. Done when the + confirmation page shows 'Application Submitted'." +- **"Element not found"** — add a navigation hint. Example: "Look for the search bar in the + top navigation area" or "The form is inside an iframe." +- **"Wrong page"** — add a URL check or split into smaller blocks so each one starts on the + right page. +- **"Timeout"** — the page may be slow. Increase max_retries on the block or add a wait block. + +Call skyvern_workflow_update with the fixed definition, then skyvern_workflow_run again. + +### Step 4: Verify output + +When the run succeeds, check the output field in skyvern_workflow_status. For extraction blocks, +verify the JSON matches your data_schema. + +--- + +## Quick Feasibility Check (Optional) + +If you are unsure whether Skyvern can handle a particular site, use skyvern_run_task as a probe +BEFORE building a full workflow. + +skyvern_run_task is a one-shot autonomous agent. Give it a URL and a prompt — it navigates +the site, takes actions, and reports results. No workflow definition needed. + +If it succeeds: turn the approach into a multi-block workflow for reuse. +If it struggles: read the failure_reason, refine the prompt, and try again. Two or three +iterations usually reveal whether the site is automatable and what prompt phrasing works. + +Do NOT open a manual browser session (skyvern_session_create) to explore the site before +building a workflow. That approach bypasses Skyvern's AI and wastes time. skyvern_run_task +gives you the same insight faster because Skyvern navigates the site for you. + +--- + +## Prompt Refinement Tips + +When a navigation block fails, refine the navigation_goal: +1. Use skyvern_run_task first to identify what Skyvern sees on the page. +2. Add specificity: reference exact labels visible on the page. +3. Describe what "done" looks like so Skyvern knows when to stop. + +Example workflow with clear goals: + Block 1 (login): Authenticate with stored credentials. + Block 2 (navigation): "Navigate to the settings page and open 'Notification Preferences'. + Uncheck 'Marketing Emails' and check 'Security Alerts'. Click 'Save Changes'. + Done when a success banner appears." + Block 3 (extraction): "Extract the confirmation message." + +--- + +## Pre-Flight Checklist + +Before calling skyvern_workflow_create, verify: +1. Each block has a clear, single-responsibility goal (not a multi-page mega-prompt). +2. Navigation block goals describe WHAT to achieve, not HOW to click. +3. Every variable input uses {{parameter_key}} and is declared in parameters. +4. Extraction blocks include a data_schema with the expected JSON structure. +5. The block order matches the actual site flow (blocks share one browser session). +6. Login is handled by a login block (not embedded in a navigation goal). +7. You have test parameter values ready for the first skyvern_workflow_run call. +8. Validate blocks with skyvern_block_validate before submitting the full definition. +""" + + +# --------------------------------------------------------------------------- +# debug_automation +# --------------------------------------------------------------------------- + +DEBUG_AUTOMATION_CONTENT = """\ +# Debugging Skyvern Automations + +When a workflow run or task fails, follow this structured process: read the error, diagnose the pattern, +fix and re-run. Do NOT open a manual browser session to explore — Skyvern already tells you what went wrong. + +## Step 1: Read the Error + +Call skyvern_workflow_status with the workflow_run_id to get structured failure info. + +Key fields to examine: +- **status**: "failed", "terminated", or "timed_out" +- **failure_reason**: which block failed and why +- **screenshot**: what the page looked like when the failure occurred +- **extracted_information**: any partial data the AI did extract before failing + +If this was an interactive session failure (not a workflow), call skyvern_screenshot to see the current page state, +then check the last tool response for error details. + +Record three things before proceeding: +1. Which block (by label) failed +2. The error type (timeout, element not found, wrong page, extraction empty, auth required) +3. What the AI reported seeing on the page + +## Step 2: Diagnose the Pattern + +Skyvern failures fall into predictable categories. Match the error to a pattern and apply the standard fix. + +### Timeout (block exceeded max_steps or wall time) +- Cause: prompt is too vague, so the AI explores without converging. +- Fix: add specificity about what "done" looks like. Instead of "fill out the form", write "fill out the form and + click the blue Submit button. Done when you see a confirmation message containing a reference number." +- Also check: is max_steps too low? Default is reasonable, but complex forms may need more. + +### Element Not Found (AI could not locate the target element) +- Cause: label mismatch (the button says "Continue" but the prompt says "Next"), or element loads asynchronously. +- Fix: update the prompt to use the exact label visible on the page. If the element loads after a delay, add + "wait for the page to fully load before acting" to the prompt. +- If you know the exact label: switch that block to a navigation block with precise element references. + +### Wrong Page (block started on an unexpected page) +- Cause: the previous block did not complete its page transition. The current block assumed it would land on page B + but it is still on page A. +- Fix: update the previous block's prompt to explicitly include the page transition. Add "click Continue and wait + until the next page loads" instead of just "click Continue". Alternatively, add a goto_url block between them. + +### Extraction Empty (extraction returned null or empty object) +- Cause: data loads dynamically (AJAX, infinite scroll) and was not present when the AI read the page. Or the + extraction prompt does not match the page structure. +- Fix: add "wait for the data table to fully load" to the prompt. If data requires scrolling, add a navigation + block that scrolls first. If the prompt is wrong, update it to describe the data using labels visible on the page. + +### Auth Failure (redirected to login page) +- Cause: workflow does not handle authentication, or session cookies expired. +- Fix: add a login block at the start of the workflow, or use a browser_profile that has saved credentials. + +### Stuck / Hanging (run stays "running" indefinitely) +- Action: call skyvern_workflow_cancel to stop the run. Then investigate: is the page showing a CAPTCHA, a + modal dialog, or an unexpected redirect? Check the last screenshot from skyvern_workflow_status. + +### Rate Limited or Blocked (403, CAPTCHA, "unusual traffic" message) +- Cause: the target site detected automation. +- Fix: add a proxy (residential or ISP) to the workflow's proxy_location parameter. Reduce request frequency + by adding wait blocks between actions. If CAPTCHA persists, report to the user — this may require manual + intervention or a CAPTCHA-solving integration. + +## Step 3: Fix and Re-run + +Use skyvern_workflow_update to modify the failing block. Do NOT delete the workflow and recreate it. + +Fixing playbook: +1. Update the failing block's prompt to address the diagnosed issue. Be specific: add exact labels, describe + what "done" looks like, mention elements to wait for. +2. If the navigation_goal is too vague for a complex form, make it more explicit — reference exact field labels, + describe the form layout, and specify what "done" looks like. +3. Re-run with skyvern_workflow_run using the same parameters as the failed run. +4. Poll skyvern_workflow_status until the run completes. Check whether the previously failing block now passes. +5. If it still fails with the same error: refine the prompt further. If it fails with a NEW error, restart + diagnosis from Step 1. + +## Step 4: Escalation + +Not every failure can be fixed by prompt refinement. Know when to escalate. + +**Use action blocks for single-step precision** when: +- A navigation block does too much — you only need one specific click or input +- The page has multiple similar-looking elements and the AI picks the wrong one +- The step involves a single focused action (e.g., click one button, toggle one checkbox) + +**Open a manual session (last resort)** when: +- You cannot determine from error output what the page looks like +- The site has unusual UI patterns not described in any error message +- Use: skyvern_session_create, skyvern_navigate to the failing URL, skyvern_screenshot to see the page + +**Report to the user** when: +- CAPTCHA blocks persist even with proxy rotation +- The site requires 2FA or hardware authentication +- Rate limiting cannot be avoided with proxies and delays +- The site's terms of service explicitly prohibit automation +""" + + +# --------------------------------------------------------------------------- +# extract_data +# --------------------------------------------------------------------------- + +EXTRACT_DATA_CONTENT = """\ +# Data Extraction with Skyvern + +You design the data schema and describe what to extract. Skyvern's AI finds the elements, \ +parses the page, and returns structured JSON. You never write selectors or scraping code. + +## Schema Design + +Always provide a `data_extraction_schema` (JSON Schema) so Skyvern returns typed, validated output. + +- Use `"type": "object"` for a single record (profile, summary, confirmation). +- Use `"type": "array"` with `"items": { "type": "object", ... }` for lists (search results, table rows). +- Mark critical fields as `"required"` so missing data surfaces as an error rather than null. +- Choose descriptive property names that reflect the data, not the page layout \ +(`order_date` not `col_3`, `company_name` not `first_bold_text`). +- Nest objects when the data is naturally hierarchical: + `{ "seller": { "name": "...", "rating": 4.5 }, "price": { "amount": 29.99, "currency": "USD" } }` + +## Writing Extraction Prompts + +Describe WHAT to extract, not WHERE it is on the page. Skyvern's AI locates the data. + +Good: "Extract all product names, prices, and star ratings from the search results" +Bad: "Get the text from each div.product-card > span.price" + +When the page has multiple similar sections, specify which one: +"Extract order details from the table under 'Recent Orders', not 'Recommended Products'" + +For simple extractions on a page you already navigated to, use `skyvern_extract` with a schema. \ +For extraction combined with navigation (log in, then go to dashboard, then extract), use \ +`skyvern_run_task` with a `data_extraction_schema` -- it handles the full flow in one call. + +## Multi-Page Extraction + +For paginated results, build a workflow with a `for_loop` block that iterates over page numbers \ +or "next" clicks. Each iteration uses an extraction block to pull that page's data. + +For infinite-scroll pages, use `skyvern_run_task` with a prompt like \ +"scroll to load all results, then extract every item" -- Skyvern handles the scrolling. + +For detail-page drilling (list page -> click each item -> extract details), build a workflow: \ +extraction block to get the list of links, then a `for_loop` block that visits each link and \ +extracts the detail fields. + +## Validate Results + +After extraction, check the returned data before using it: +- Verify record count matches expectations (e.g., "I expected ~50 results but got 3"). +- Check for null or empty fields that should have values. +- If the data looks wrong, refine the extraction prompt (be more specific about which section \ +or what the data looks like), not the schema. +- Use `skyvern_validate` for page-level assertions before extracting \ +("Is this the search results page?" / "Are there at least 10 results visible?"). +""" + + +# --------------------------------------------------------------------------- +# Prompt functions +# --------------------------------------------------------------------------- + + +def build_workflow( + task_description: Annotated[ + str, + Field(description="What the workflow should automate, e.g. 'Fill out a tax form on irs.gov'"), + ] = "", +) -> str: + """Guide for building a Skyvern workflow. Invoke this prompt when a user asks to create, + design, or build a browser automation workflow. The guide covers block selection, prompt + writing, parameterization, testing, and iteration.""" + if task_description: + return f"{BUILD_WORKFLOW_CONTENT}\n---\n\nUser's automation goal:\n```\n{task_description}\n```\n" + return BUILD_WORKFLOW_CONTENT + + +def debug_automation( + error_or_symptom: Annotated[ + str, + Field(description="The error message or symptom to diagnose, e.g. 'Timeout after 30s on login page'"), + ] = "", +) -> str: + """Diagnose and fix a failing Skyvern workflow or task. + + Guides you through reading Skyvern's structured error output, matching the failure to a known pattern, + and fixing the workflow by updating block prompts — without manually exploring in a browser. + """ + parts = [DEBUG_AUTOMATION_CONTENT] + if error_or_symptom: + parts.append( + f"\n---\n\nThe user reports this error or symptom:\n```\n{error_or_symptom}\n```\n\n" + "Start at Step 1: call skyvern_workflow_status (or check the last tool response) to get the full error " + "details. Then match it to a pattern in Step 2 and apply the fix from Step 3." + ) + return "\n".join(parts) + + +def extract_data( + target_description: Annotated[ + str, + Field(description="What data to extract, e.g. 'Product prices and ratings from Amazon search results'"), + ] = "", +) -> str: + """Guide for extracting structured data from websites using Skyvern. + + Covers schema design, writing extraction prompts, multi-page extraction patterns, and result + validation. Call this prompt before building an extraction workflow or writing extraction calls. + """ + suffix = "" + if target_description: + suffix = ( + f"\n\nApply the above methodology to extract:\n```\n{target_description}\n```\n" + "Design a JSON Schema for the output, choose the right tool " + "(skyvern_extract for current page, skyvern_run_task for navigate-then-extract), " + "and validate the results." + ) + return EXTRACT_DATA_CONTENT + suffix diff --git a/skyvern/cli/mcp_tools/workflow.py b/skyvern/cli/mcp_tools/workflow.py index 548c89ab..5ba52386 100644 --- a/skyvern/cli/mcp_tools/workflow.py +++ b/skyvern/cli/mcp_tools/workflow.py @@ -349,7 +349,7 @@ async def skyvern_workflow_create( Best practice: use one block per logical step with a short focused prompt (2-3 sentences). Use "navigation" blocks for actions (filling forms, clicking) and "extraction" blocks for pulling data. Do NOT use the deprecated "task" block type. - Common block types: navigation, extraction, task_v2, for_loop, conditional, code, text_prompt, action, wait, login. + Common block types: navigation, extraction, for_loop, conditional, code, text_prompt, action, wait, login. Call skyvern_block_schema() for the full list with schemas and examples. Example JSON definition (multi-block EIN application):