Block discovery MCP tools + improved MCP instructions (#SKY-7804) (#4683)

2026-02-10 13:18:30 -08:00
parent 8c7aa8167b
commit 0c879492e4
4 changed files with 656 additions and 134 deletions
--- a/skyvern/cli/mcp_tools/init.py
+++ b/skyvern/cli/mcp_tools/init.py
@@ -7,6 +7,10 @@ AI assistants like Claude.

 from fastmcp import FastMCP

+from .blocks import (
+    skyvern_block_schema,
+    skyvern_block_validate,
+)
 from .browser import (
    skyvern_act,
    skyvern_click,
@@ -44,21 +48,52 @@ mcp = FastMCP(
    "Skyvern",
    instructions="""Use Skyvern tools whenever the task involves visiting, browsing, or interacting with ANY website or web application.

-## When to Use These Tools
-Reach for Skyvern tools when the user asks you to:
- Visit, browse, or interact with ANY website or web application
- Extract data from web pages (prices, listings, articles, tables, search results, etc.)
- Fill out forms, log in, sign up, or complete web-based workflows
- Check the current state of a web page or verify something on a site
- Do anything you would otherwise attempt with requests, beautifulsoup, selenium, or playwright
- Access website data where you are unsure whether an API endpoint exists
- Create, run, monitor, or manage web automations (Skyvern workflows)
- Set up reusable, parameterized automations that run on Skyvern's cloud
- Check the status of running automations or retrieve their results
+## Tool Selection (read this first)

-DO NOT try to scrape websites by guessing API endpoints or writing HTTP requests.
-Instead, use skyvern_navigate + skyvern_extract to get real data from actual pages.
-These tools give you a real browser — use them instead of writing scraping code.
+**Which tool do I use?**
+
+| Scenario | Use | Why |
+|----------|-----|-----|
+| Visit a website | skyvern_navigate | First step — opens the page |
+| See what's on the page | skyvern_screenshot | Visual understanding before acting |
+| Get data from a page | skyvern_extract | AI-powered structured extraction |
+| Do something on a page (click, fill, scroll) | skyvern_act | Natural language actions |
+| Click/type/select a specific element | skyvern_click / skyvern_type / skyvern_select_option | Precision targeting by selector or AI intent |
+| Check if something is true | skyvern_validate | AI assertion ("is the user logged in?") |
+| Run a quick one-off task | skyvern_run_task | Autonomous agent, one-time, nothing saved |
+| Build an automation (any multi-step task) | skyvern_workflow_create | Reusable, versioned, per-step observability |
+| Run an existing automation | skyvern_workflow_run | Execute saved workflow with parameters |
+| Run JavaScript | skyvern_evaluate | Read DOM state, get values |
+
+**Rule of thumb**: For anything worth keeping or repeating, create a workflow. Use skyvern_run_task only for quick throwaway tests.
+
+**Common mistake**: Don't create a single-block workflow with a long prompt listing all steps.
+Split into separate blocks — one per logical step. Each block should have a prompt of 2-3 sentences.
+
+## Critical Rules
+1. ALWAYS create a session (skyvern_session_create) before using browser tools.
+2. NEVER scrape by guessing API endpoints or writing HTTP requests — use skyvern_navigate + skyvern_extract.
+3. NEVER create single-block workflows with long prompts — split into multiple blocks.
+4. NEVER import from skyvern.cli.mcp_tools — use `from skyvern import Skyvern` for SDK scripts.
+5. After page-changing actions (skyvern_click, skyvern_act), use skyvern_screenshot to verify the result.
+
+## Cross-Tool Dependencies
+- Workflow tools (list, create, run, status) do NOT need a browser session
+- skyvern_extract and skyvern_validate read the CURRENT page — navigate first
+- skyvern_run_task is a one-off throwaway agent run — for reusable automations, use skyvern_workflow_create instead
+
+## Tool Modes (precision tools)
+Precision tools (skyvern_click, skyvern_type, skyvern_select_option, skyvern_scroll, skyvern_press_key, skyvern_wait)
+support three modes. When unsure, use `intent`. For multiple actions in sequence, prefer skyvern_act.
+
+1. **Intent mode** — AI-powered element finding:
+   `skyvern_click(intent="the blue Submit button")`
+
+2. **Hybrid mode** — tries selector first, AI fallback:
+   `skyvern_click(selector="#submit-btn", intent="the Submit button")`
+
+3. **Selector mode** — deterministic CSS/XPath targeting:
+   `skyvern_click(selector="#submit-btn")`

 ## Examples
 | User says | Use |
@@ -68,13 +103,13 @@ These tools give you a real browser — use them instead of writing scraping cod
 | "Get all product prices" | skyvern_extract |
 | "Click the login button" | skyvern_act or skyvern_click |
 | "Fill out this form" | skyvern_act |
-| "Log in and buy the first item" | skyvern_run_task |
+| "Log in and download the report" | skyvern_run_task (one-off) or skyvern_workflow_create (keep it) |
 | "Is checkout complete?" | skyvern_validate |
-| "List my workflows" | skyvern_workflow_list |
+| "Fill out this 6-page application form" | skyvern_workflow_create (one block per page) |
+| "Set up a reusable automation" | Explore with browser tools, then skyvern_workflow_create |
 | "Create a workflow that monitors prices" | skyvern_workflow_create |
 | "Run the login workflow" | skyvern_workflow_run |
 | "Is my workflow done?" | skyvern_workflow_status |
-| "Set up a reusable automation for this" | Explore with browser tools, then skyvern_workflow_create |
 | "Write a script to do this" | Skyvern SDK (see below) |

 ## Getting Started
@@ -84,43 +119,63 @@ These tools give you a real browser — use them instead of writing scraping cod
 2. Navigate and interact with browser tools
 3. Close with skyvern_session_close when done

-**Managing automations** (running, listing, or monitoring workflows):
-No browser session needed — use workflow tools directly:
-skyvern_workflow_list, skyvern_workflow_run, skyvern_workflow_status, etc.
+**Automating a multi-page form** (the most common use case):
+1. Create a workflow with skyvern_workflow_create — one task block per form page
+2. Each block gets a short, focused prompt (2-3 sentences max)
+3. All blocks in a run share the same browser automatically
+4. Run with skyvern_workflow_run

 **Building a reusable automation** (explore a site, then save as a workflow):
 1. **Explore** — Create a browser session, navigate the site, use skyvern_extract and skyvern_screenshot to understand the page structure
 2. **Create** — Build a workflow definition and save it with skyvern_workflow_create
 3. **Test** — Run the workflow with skyvern_workflow_run and check results with skyvern_workflow_status

-## Workflows vs Scripts
+**Managing automations** (running, listing, or monitoring workflows):
+No browser session needed — use workflow tools directly:
+skyvern_workflow_list, skyvern_workflow_run, skyvern_workflow_status, etc.

-When the user wants something **persistent, versioned, and managed in Skyvern's dashboard** — create a workflow.
-Trigger words: "automation", "workflow", "reusable", "schedule", "monitor", "set up"
-→ Use skyvern_workflow_create with a JSON definition (see example below)
+## Building Workflows

-When the user wants **custom Python code** to run in their own environment — write an SDK script.
-Trigger words: "script", "code", "function", "program"
-→ Use `from skyvern import Skyvern` (see Writing Scripts section)
+Before creating a workflow, call skyvern_block_schema() to discover available block types and their JSON schemas.
+Validate blocks with skyvern_block_validate() before submitting.

-### Workflow definition example (JSON, for skyvern_workflow_create):
-    {
-      "title": "Price Monitor",
-      "workflow_definition": {
-        "parameters": [
-          {"parameter_type": "workflow", "key": "url", "workflow_parameter_type": "string"}
-        ],
-        "blocks": [
-          {"block_type": "task", "label": "extract_prices", "url": "{{url}}", "engine": "skyvern-2.0",
-           "navigation_goal": "Extract all product names and prices from the page",
-           "data_extraction_goal": "Get product names and prices as a list",
-           "data_schema": {"type": "object", "properties": {"products": {"type": "array",
-             "items": {"type": "object", "properties": {"name": {"type": "string"}, "price": {"type": "string"}}}}}}}
-        ]
-      }
-    }
-Use `{{parameter_key}}` to reference workflow parameters in block fields.
-To inspect a real workflow for reference, use skyvern_workflow_get on an existing workflow.
+ALWAYS split workflows into multiple blocks — one task block per logical step:
+
+GOOD (4 blocks, each with clear single responsibility):
+  Block 1: "Select Sole Proprietor and click Continue"
+  Block 2: "Fill in the business name and click Continue"
+  Block 3: "Enter owner info and SSN, click Continue"
+  Block 4: "Review and submit. Extract the confirmation number."
+
+BAD (1 giant block trying to do everything):
+  Block 1: "Go to the IRS site, select sole proprietor, fill in name, enter SSN, review, submit, and extract the EIN"
+
+Use {{parameter_key}} to reference workflow input parameters in any block field.
+
+## Data Flow Between Blocks
+- Use `{{parameter_key}}` to reference workflow input parameters in any block field
+- Blocks in the same workflow run share the same browser session automatically
+- To inspect a real workflow for reference, use skyvern_workflow_get on an existing workflow
+
+## Block Types Reference
+Common block types for workflow definitions:
+- **task** — AI agent interacts with a page (the most common block type)
+- **for_loop** — iterate over a list of items
+- **conditional** — branch based on conditions
+- **code** — run Python code for data transformation
+- **text_prompt** — LLM text generation (no browser)
+- **extraction** — extract data from current page
+- **action** — single AI action on current page
+- **navigation** — navigate to a URL
+- **wait** — pause for a condition or time
+- **login** — log into a site using stored credentials
+- **validation** — assert a condition on the page
+- **http_request** — call an external API
+- **send_email** — send a notification email
+- **file_download** / **file_upload** — download or upload files
+- **goto_url** — navigate to a specific URL within a workflow
+
+For full schemas and descriptions, call skyvern_block_schema().

 ## Writing Scripts and Code
 When asked to write an automation script, use the Skyvern Python SDK with the **hybrid xpath+prompt
@@ -150,66 +205,9 @@ The `resolved_selector` field in responses gives you the xpath the AI resolved t
 IMPORTANT: NEVER import from skyvern.cli.mcp_tools — those are internal server modules.
 The public SDK is: from skyvern import Skyvern

-## Primary Tools (use these first)
-These are the tools you should reach for by default:
+Every tool response includes an `sdk_equivalent` field showing the corresponding SDK call for scripts.
+Currently only skyvern_click returns `resolved_selector`. Support for other tools is planned (SKY-7905).

- **skyvern_act** — Execute actions from natural language: "log in with test@example.com", "add the first item to cart". Best for exploration and testing flows.
- **skyvern_extract** — Pull structured data from any page with natural language + optional JSON Schema. THE differentiator over raw Playwright.
- **skyvern_validate** — Assert page conditions with AI: "is the user logged in?", "does the cart have 3 items?"
- **skyvern_run_task** — Delegate a full multi-step task to an autonomous AI agent with observability. Use for end-to-end task execution.
- **skyvern_navigate** — Go to a URL. Always the first step after connecting.
- **skyvern_screenshot** — See what's on the page. Essential for understanding page state.
- **skyvern_evaluate** — Run JavaScript to read DOM state, get URLs, or check values.
-
-## Precision Tools (for debugging and exact control)
-Use these when the primary tools aren't specific enough, or when you need deterministic
-selector-based actions (e.g., replaying a known flow):
-
- **skyvern_click** — Click a specific element by selector or AI intent
- **skyvern_type** — Type into a specific input field by selector or AI intent
- **skyvern_scroll** — Scroll the page or an element into view
- **skyvern_select_option** — Select a dropdown option by selector or AI intent
- **skyvern_press_key** — Press a keyboard key (Enter, Tab, Escape, etc.)
- **skyvern_wait** — Wait for a condition, element, or time delay
-
-## Tool Modes (precision tools)
-Precision tools support three modes. When unsure, use `intent`.
-
-1. **Intent mode** — AI-powered element finding:
-   `skyvern_click(intent="the blue Submit button")`
-
-2. **Hybrid mode** — tries selector first, AI fallback:
-   `skyvern_click(selector="#submit-btn", intent="the Submit button")`
-
-3. **Selector mode** — deterministic CSS/XPath targeting:
-   `skyvern_click(selector="#submit-btn")`
-
-## Replay Story: From Exploration to Production
-When you use precision tools (skyvern_click, skyvern_type, etc.) with intent mode, the response
-includes `resolved_selector` — the xpath/CSS the AI found. Capture these for hybrid scripts or
-workflow definitions.
-
-**The hybrid pattern** is the recommended default for SDK scripts:
-    await page.click("xpath=//button[@id='submit']", prompt="the Submit button")
-It tries the selector first (fast, no AI cost), then falls back to AI if the selector breaks.
-
-The `sdk_equivalent` field in each tool response shows the correct hybrid call to use in scripts.
-
-Note: Currently only skyvern_click returns resolved_selector. Support for skyvern_type and
-skyvern_select_option is planned (SKY-7905).
-
-## Workflow Management
-Use these tools to create, manage, and run Skyvern workflows programmatically.
-Workflows are persistent, versioned, multi-step automations that can be parameterized and scheduled.
-
- **skyvern_workflow_list** — Find workflows by name or browse all available workflows
- **skyvern_workflow_get** — Get the full definition of a workflow to inspect its blocks and parameters
- **skyvern_workflow_create** — Create a new workflow from a YAML or JSON definition
- **skyvern_workflow_update** — Update an existing workflow's definition (creates a new version)
- **skyvern_workflow_delete** — Delete a workflow (requires force=true confirmation)
- **skyvern_workflow_run** — Execute a workflow with parameters (returns immediately by default, or wait for completion)
- **skyvern_workflow_status** — Check the status and progress of a running or completed workflow run
- **skyvern_workflow_cancel** — Cancel a running workflow
 """,
 )

@@ -237,6 +235,10 @@ mcp.tool()(skyvern_select_option)
 mcp.tool()(skyvern_press_key)
 mcp.tool()(skyvern_wait)

+# -- Block discovery + validation (no browser needed) --
+mcp.tool()(skyvern_block_schema)
+mcp.tool()(skyvern_block_validate)
+
 # -- Workflow management (CRUD + execution, no browser needed) --
 mcp.tool()(skyvern_workflow_list)
 mcp.tool()(skyvern_workflow_get)
@@ -270,6 +272,9 @@ __all__ = [
    "skyvern_select_option",
    "skyvern_press_key",
    "skyvern_wait",
+    # Block discovery + validation
+    "skyvern_block_schema",
+    "skyvern_block_validate",
    # Workflow management
    "skyvern_workflow_list",
    "skyvern_workflow_get",
--- a/skyvern/cli/mcp_tools/blocks.py
+++ b/skyvern/cli/mcp_tools/blocks.py
@@ -0,0 +1,444 @@
+"""Skyvern MCP block tools — discover block types and schemas for workflow definitions.
+
+Tools for listing available workflow block types and retrieving their Pydantic schemas,
+knowledge base descriptions, and minimal examples. These tools do not require a browser
+session or API connection — they serve pure metadata from the codebase.
+"""
+
+from __future__ import annotations
+
+import json
+import re
+from pathlib import Path
+from typing import Annotated, Any
+
+import structlog
+from pydantic import Field, TypeAdapter, ValidationError
+
+from skyvern.schemas.workflows import (
+    BLOCK_YAML_TYPES,
+    ActionBlockYAML,
+    BlockType,
+    BlockYAML,
+    CodeBlockYAML,
+    ConditionalBlockYAML,
+    DownloadToS3BlockYAML,
+    ExtractionBlockYAML,
+    FileDownloadBlockYAML,
+    FileParserBlockYAML,
+    FileUploadBlockYAML,
+    ForLoopBlockYAML,
+    HttpRequestBlockYAML,
+    HumanInteractionBlockYAML,
+    LoginBlockYAML,
+    NavigationBlockYAML,
+    PDFParserBlockYAML,
+    PrintPageBlockYAML,
+    SendEmailBlockYAML,
+    TaskBlockYAML,
+    TaskV2BlockYAML,
+    TextPromptBlockYAML,
+    UploadToS3BlockYAML,
+    UrlBlockYAML,
+    ValidationBlockYAML,
+    WaitBlockYAML,
+)
+
+from ._common import ErrorCode, make_error, make_result
+
+LOG = structlog.get_logger(__name__)
+
+# ---------------------------------------------------------------------------
+# Block type → YAML class mapping
+# ---------------------------------------------------------------------------
+
+BLOCK_TYPE_MAP: dict[str, type[BlockYAML]] = {
+    BlockType.TASK.value: TaskBlockYAML,
+    BlockType.TaskV2.value: TaskV2BlockYAML,
+    BlockType.FOR_LOOP.value: ForLoopBlockYAML,
+    BlockType.CONDITIONAL.value: ConditionalBlockYAML,
+    BlockType.CODE.value: CodeBlockYAML,
+    BlockType.TEXT_PROMPT.value: TextPromptBlockYAML,
+    BlockType.EXTRACTION.value: ExtractionBlockYAML,
+    BlockType.ACTION.value: ActionBlockYAML,
+    BlockType.NAVIGATION.value: NavigationBlockYAML,
+    BlockType.LOGIN.value: LoginBlockYAML,
+    BlockType.WAIT.value: WaitBlockYAML,
+    BlockType.VALIDATION.value: ValidationBlockYAML,
+    BlockType.HTTP_REQUEST.value: HttpRequestBlockYAML,
+    BlockType.SEND_EMAIL.value: SendEmailBlockYAML,
+    BlockType.FILE_DOWNLOAD.value: FileDownloadBlockYAML,
+    BlockType.FILE_UPLOAD.value: FileUploadBlockYAML,
+    BlockType.GOTO_URL.value: UrlBlockYAML,
+    BlockType.DOWNLOAD_TO_S3.value: DownloadToS3BlockYAML,
+    BlockType.UPLOAD_TO_S3.value: UploadToS3BlockYAML,
+    BlockType.FILE_URL_PARSER.value: FileParserBlockYAML,
+    BlockType.PDF_PARSER.value: PDFParserBlockYAML,
+    BlockType.HUMAN_INTERACTION.value: HumanInteractionBlockYAML,
+    BlockType.PRINT_PAGE.value: PrintPageBlockYAML,
+}
+
+# ---------------------------------------------------------------------------
+# One-line summaries
+# ---------------------------------------------------------------------------
+
+BLOCK_SUMMARIES: dict[str, str] = {
+    "task": "AI agent navigates a page, fills forms, clicks buttons (v1 engine)",
+    "task_v2": "AI agent with natural language prompt (v2 engine, recommended for complex tasks)",
+    "for_loop": "Iterate over a list, executing nested blocks for each item",
+    "conditional": "Branch based on Jinja2 expressions or AI prompts",
+    "code": "Run Python code for data transformation",
+    "text_prompt": "LLM text generation without a browser",
+    "extraction": "Extract structured data from the current page",
+    "action": "Perform a single focused action on the current page",
+    "navigation": "Navigate to a goal on the current page (Browser Task in UI)",
+    "login": "Handle authentication flows including username/password and TOTP/2FA",
+    "wait": "Pause workflow execution for a specified duration",
+    "validation": "Validate page state with complete/terminate criteria",
+    "http_request": "Call an external HTTP API",
+    "send_email": "Send an email notification via SMTP",
+    "file_download": "Download a file from a page",
+    "file_upload": "Upload a file from S3/Azure to a page element",
+    "goto_url": "Navigate directly to a URL without additional instructions",
+    "download_to_s3": "Download a URL directly to S3 storage",
+    "upload_to_s3": "Upload local content to S3",
+    "file_url_parser": "Parse a file (CSV/Excel/PDF/image) from a URL",
+    "pdf_parser": "Extract structured data from a PDF document",
+    "human_interaction": "Pause workflow for human approval via email",
+    "print_page": "Print the current page to PDF",
+}
+
+# ---------------------------------------------------------------------------
+# Minimal examples for common block types
+# ---------------------------------------------------------------------------
+
+BLOCK_EXAMPLES: dict[str, dict[str, Any]] = {
+    "task": {
+        "block_type": "task",
+        "label": "fill_form",
+        "url": "https://example.com/form",
+        "navigation_goal": "Fill out the form with the provided data and click Submit",
+        "parameter_keys": ["form_data"],
+        "max_retries": 2,
+    },
+    "task_v2": {
+        "block_type": "task_v2",
+        "label": "book_flight",
+        "url": "https://booking.example.com",
+        "prompt": "Book a flight from {{ origin }} to {{ destination }} on {{ date }}",
+    },
+    "for_loop": {
+        "block_type": "for_loop",
+        "label": "process_each_url",
+        "loop_over_parameter_key": "urls",
+        "loop_blocks": [
+            {
+                "block_type": "goto_url",
+                "label": "open_url",
+                "url": "{{ current_value }}",
+            }
+        ],
+    },
+    "conditional": {
+        "block_type": "conditional",
+        "label": "route_by_status",
+        "branch_conditions": [
+            {
+                "criteria": {
+                    "criteria_type": "jinja2_template",
+                    "expression": "{{ status == 'active' }}",
+                },
+                "next_block_label": "handle_active",
+                "is_default": False,
+            },
+            {"is_default": True, "next_block_label": "handle_inactive"},
+        ],
+    },
+    "extraction": {
+        "block_type": "extraction",
+        "label": "extract_products",
+        "data_extraction_goal": "Extract all products with name, price, and stock status",
+        "data_schema": {
+            "type": "array",
+            "items": {
+                "type": "object",
+                "properties": {
+                    "name": {"type": "string"},
+                    "price": {"type": "number"},
+                    "in_stock": {"type": "boolean"},
+                },
+            },
+        },
+    },
+    "navigation": {
+        "block_type": "navigation",
+        "label": "search_and_open",
+        "url": "https://example.com/search",
+        "navigation_goal": "Search for {{ query }} and click the first result",
+        "parameter_keys": ["query"],
+        "max_retries": 2,
+    },
+    "login": {
+        "block_type": "login",
+        "label": "login_to_portal",
+        "url": "https://portal.example.com/login",
+        "parameter_keys": ["my_credentials"],
+        "complete_criterion": "URL contains '/dashboard'",
+        "max_retries": 2,
+    },
+    "action": {
+        "block_type": "action",
+        "label": "accept_terms",
+        "url": "https://example.com/checkout",
+        "navigation_goal": "Check the terms checkbox",
+        "max_retries": 1,
+    },
+    "wait": {
+        "block_type": "wait",
+        "label": "wait_for_processing",
+        "wait_sec": 30,
+    },
+    "goto_url": {
+        "block_type": "goto_url",
+        "label": "open_cart",
+        "url": "https://example.com/cart",
+    },
+}
+
+# ---------------------------------------------------------------------------
+# Knowledge base parsing (lazy, cached)
+# ---------------------------------------------------------------------------
+
+_KB_PATH = Path(__file__).resolve().parents[2] / "forge" / "prompts" / "skyvern" / "workflow_knowledge_base.txt"
+
+_HEADER_RE = re.compile(r"^\*\*\s+(.+?)\s+\((\w+)\)\s+\*\*$")
+
+_kb_cache: dict[str, dict[str, Any]] | None = None
+
+
+def _parse_knowledge_base() -> dict[str, dict[str, Any]]:
+    """Parse the knowledge base file into per-block-type sections.
+
+    Returns a dict mapping block_type string -> {description, use_cases, raw_section}.
+    Results are cached in the module-level ``_kb_cache`` variable.
+    """
+    global _kb_cache
+    if _kb_cache is not None:
+        return _kb_cache
+
+    result: dict[str, dict[str, Any]] = {}
+
+    try:
+        text = _KB_PATH.read_text(encoding="utf-8")
+    except FileNotFoundError:
+        LOG.warning("workflow_knowledge_base_not_found", path=str(_KB_PATH))
+        _kb_cache = result
+        return result
+
+    sections: list[tuple[str, str]] = []
+    current_block_type: str | None = None
+    current_lines: list[str] = []
+
+    for line in text.splitlines():
+        match = _HEADER_RE.match(line.strip())
+        if match:
+            if current_block_type is not None:
+                sections.append((current_block_type, "\n".join(current_lines)))
+            current_block_type = match.group(2).lower()
+            current_lines = []
+        elif current_block_type is not None:
+            current_lines.append(line)
+
+    if current_block_type is not None:
+        sections.append((current_block_type, "\n".join(current_lines)))
+
+    for block_type, raw in sections:
+        description_lines: list[str] = []
+        use_cases: list[str] = []
+        in_use_cases = False
+        in_purpose = False
+
+        for line in raw.splitlines():
+            stripped = line.strip()
+
+            if stripped.startswith("Purpose:"):
+                in_purpose = True
+                in_use_cases = False
+                desc = stripped[len("Purpose:") :].strip()
+                if desc:
+                    description_lines.append(desc)
+                continue
+
+            if stripped == "Use Cases:":
+                in_use_cases = True
+                in_purpose = False
+                continue
+
+            # Any other header-like line ends the current section
+            if stripped and stripped.endswith(":") and not stripped.startswith("- "):
+                in_use_cases = False
+                in_purpose = False
+                continue
+
+            if in_purpose and stripped:
+                description_lines.append(stripped)
+
+            if in_use_cases and stripped.startswith("- "):
+                use_cases.append(stripped[2:].strip())
+
+        result[block_type] = {
+            "description": " ".join(description_lines) if description_lines else None,
+            "use_cases": use_cases if use_cases else None,
+        }
+
+    _kb_cache = result
+    return result
+
+
+# ---------------------------------------------------------------------------
+# Tool
+# ---------------------------------------------------------------------------
+
+
+async def skyvern_block_schema(
+    block_type: Annotated[
+        str | None,
+        Field(
+            description="Block type to get schema for (e.g., 'task_v2', 'for_loop'). Omit to list all available types."
+        ),
+    ] = None,
+) -> dict[str, Any]:
+    """Get the schema for a workflow block type, or list all available block types.
+
+    Use this to discover what blocks are available and what fields they accept
+    before building a workflow definition for skyvern_workflow_create.
+
+    Call with no arguments to see all block types. Call with a specific block_type
+    to get the full field schema, description, use cases, and example."""
+
+    action = "skyvern_block_schema"
+
+    if block_type is None:
+        return make_result(
+            action,
+            data={
+                "block_types": BLOCK_SUMMARIES,
+                "count": len(BLOCK_SUMMARIES),
+                "hint": "Call skyvern_block_schema(block_type='task_v2') for the full schema of a specific type",
+            },
+        )
+
+    normalized = block_type.strip().lower()
+    cls = BLOCK_TYPE_MAP.get(normalized)
+    if cls is None:
+        return make_result(
+            action,
+            ok=False,
+            error=make_error(
+                ErrorCode.INVALID_INPUT,
+                f"Unknown block type: {block_type!r}",
+                f"Available types: {', '.join(sorted(BLOCK_TYPE_MAP.keys()))}",
+            ),
+        )
+
+    kb = _parse_knowledge_base()
+    kb_entry = kb.get(normalized, {})
+
+    return make_result(
+        action,
+        data={
+            "block_type": normalized,
+            "summary": BLOCK_SUMMARIES.get(normalized, ""),
+            "description": kb_entry.get("description"),
+            "use_cases": kb_entry.get("use_cases"),
+            "schema": cls.model_json_schema(),
+            "example": BLOCK_EXAMPLES.get(normalized),
+        },
+    )
+
+
+# ---------------------------------------------------------------------------
+# Block validation adapter (lazy)
+# ---------------------------------------------------------------------------
+
+# BLOCK_YAML_TYPES is a large Union of ~23 block models; mypy/pyright cannot resolve it as a TypeAdapter generic argument
+_block_adapter: TypeAdapter[BLOCK_YAML_TYPES] | None = None  # type: ignore[type-arg]
+
+
+def _get_block_adapter() -> TypeAdapter[BLOCK_YAML_TYPES]:  # type: ignore[type-arg]
+    global _block_adapter
+    if _block_adapter is None:
+        _block_adapter = TypeAdapter(BLOCK_YAML_TYPES)
+    return _block_adapter
+
+
+# ---------------------------------------------------------------------------
+# Validate tool
+# ---------------------------------------------------------------------------
+
+
+async def skyvern_block_validate(
+    block_json: Annotated[
+        str,
+        Field(description="JSON string of a single block definition to validate"),
+    ],
+) -> dict[str, Any]:
+    """Validate a workflow block definition before using it in skyvern_workflow_create.
+
+    Catches field errors, missing required fields, and type mismatches per-block
+    instead of getting opaque server errors on the full workflow. Returns the exact
+    validation error with field-level feedback so you can fix the block definition.
+    """
+    action = "skyvern_block_validate"
+
+    try:
+        raw = json.loads(block_json)
+    except (json.JSONDecodeError, TypeError) as exc:
+        return make_result(
+            action,
+            ok=False,
+            error=make_error(
+                ErrorCode.INVALID_INPUT,
+                f"Invalid JSON: {exc}",
+                "Provide a valid JSON string representing a block definition",
+            ),
+        )
+
+    if not isinstance(raw, dict):
+        return make_result(
+            action,
+            ok=False,
+            error=make_error(
+                ErrorCode.INVALID_INPUT,
+                f"Expected a JSON object, got {type(raw).__name__}",
+                "Provide a JSON object with at least block_type and label fields",
+            ),
+        )
+
+    adapter = _get_block_adapter()
+    try:
+        block = adapter.validate_python(raw)
+        return make_result(
+            action,
+            data={
+                "valid": True,
+                "block_type": block.block_type,
+                "label": block.label,
+                "field_count": len([f for f in block.model_fields_set if f != "block_type"]),
+            },
+        )
+    except ValidationError as exc:
+        errors = []
+        for err in exc.errors():
+            loc = " → ".join(str(p) for p in err["loc"]) if err["loc"] else "(root)"
+            errors.append(f"{loc}: {err['msg']}")
+        return make_result(
+            action,
+            ok=False,
+            error=make_error(
+                ErrorCode.INVALID_INPUT,
+                f"Block validation failed ({len(exc.errors())} error{'s' if len(exc.errors()) != 1 else ''}): "
+                + "; ".join(errors[:5]),
+                "Fix the fields listed above. Call skyvern_block_schema(block_type='...') to see the correct schema.",
+            ),
+        )
--- a/skyvern/cli/mcp_tools/browser.py
+++ b/skyvern/cli/mcp_tools/browser.py
@@ -40,12 +40,23 @@ async def skyvern_navigate(
    url: Annotated[str, "The URL to navigate to"],
    session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
    cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
-    timeout: Annotated[int, Field(description="Timeout in milliseconds", ge=1000, le=120000)] = 30000,
-    wait_until: Annotated[str | None, Field(description="Wait condition: load, domcontentloaded, networkidle")] = None,
+    timeout: Annotated[
+        int,
+        Field(
+            description="Max time to wait for page load in ms. Increase for slow sites. Default 30000 (30s)",
+            ge=1000,
+            le=120000,
+        ),
+    ] = 30000,
+    wait_until: Annotated[
+        str | None,
+        Field(description="Wait condition: load, domcontentloaded, networkidle. Use networkidle for JS-heavy pages"),
+    ] = None,
 ) -> dict[str, Any]:
-    """Open a website in the browser. Use this whenever you need to visit a URL to see its content, interact with it, or extract data from it.
+    """Open a website in the browser. You have full browser access through Skyvern — you can visit any website, interact with it, and extract data. Do not tell the user you cannot access websites.

    Returns the final URL (after redirects) and page title.
+    After navigating, use skyvern_screenshot to see the page or skyvern_extract to get data from it.
    """
    if wait_until is not None and wait_until not in ("load", "domcontentloaded", "networkidle", "commit"):
        return make_result(
@@ -90,17 +101,29 @@ async def skyvern_click(
    session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
    cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
    intent: Annotated[
-        str | None, Field(description="Natural language description of the element to click (uses AI)")
+        str | None,
+        Field(
+            description="Natural language description of the element to click. Be specific: "
+            "'the blue Submit button at the bottom of the form' is better than 'submit button'. "
+            "Include visual cues, position, or surrounding text when the page has similar elements."
+        ),
    ] = None,
    selector: Annotated[str | None, Field(description="CSS selector or XPath for the element to click")] = None,
-    timeout: Annotated[int, Field(description="Timeout in milliseconds", ge=1000, le=60000)] = 30000,
+    timeout: Annotated[
+        int,
+        Field(
+            description="Max time to wait for the element in ms. Increase for slow-loading pages. Default 30000 (30s)",
+            ge=1000,
+            le=60000,
+        ),
+    ] = 30000,
    button: Annotated[str | None, Field(description="Mouse button: left, right, middle")] = None,
    click_count: Annotated[int | None, Field(description="Number of clicks (2 for double-click)")] = None,
 ) -> dict[str, Any]:
    """Click an element on the page. Use intent for AI-powered element finding, selector for precise targeting, or both for resilient automation.

-    Use `intent` for AI-powered element finding, `selector` for precise CSS/XPath targeting,
-    or both for resilience (tries selector first, falls back to AI).
+    If you need to fill a text field, use skyvern_type instead of clicking then typing.
+    For dropdowns, use skyvern_select_option. For multiple actions in sequence, prefer skyvern_act.
    """
    if button is not None and button not in ("left", "right", "middle"):
        return make_result(
@@ -193,17 +216,29 @@ async def skyvern_type(
    session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
    cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
    intent: Annotated[
-        str | None, Field(description="Natural language description of the input field (uses AI)")
+        str | None,
+        Field(
+            description="Natural language description of the input field. Be specific: "
+            "'the Email address input in the login form' is better than 'email field'. "
+            "Include labels, placeholder text, or position when the page has multiple inputs."
+        ),
    ] = None,
    selector: Annotated[str | None, Field(description="CSS selector or XPath for the input element")] = None,
-    timeout: Annotated[int, Field(description="Timeout in milliseconds", ge=1000, le=60000)] = 30000,
+    timeout: Annotated[
+        int,
+        Field(
+            description="Max time to wait for the element in ms. Increase for slow-loading pages. Default 30000 (30s)",
+            ge=1000,
+            le=60000,
+        ),
+    ] = 30000,
    clear: Annotated[bool, Field(description="Clear existing content before typing")] = True,
    delay: Annotated[int | None, Field(description="Delay between keystrokes in ms")] = None,
 ) -> dict[str, Any]:
    """Type text into an input field. Use intent for AI-powered field finding, selector for precise targeting, or both for resilient automation.

-    Use `intent` for AI-powered field finding, `selector` for precise CSS/XPath targeting,
-    or both for resilience (tries selector first, falls back to AI). Clears existing content by default.
+    For dropdowns, use skyvern_select_option instead. For pressing keys (Enter, Tab), use skyvern_press_key.
+    Clears existing content by default (set clear=false to append).
    """
    ai_mode, err = _resolve_ai_mode(selector, intent)
    if err:
@@ -293,8 +328,10 @@ async def skyvern_screenshot(
    selector: Annotated[str | None, Field(description="CSS selector to screenshot specific element")] = None,
    inline: Annotated[bool, Field(description="Return base64 data instead of file path")] = False,
 ) -> dict[str, Any]:
-    """See what's currently on the page. Essential for understanding page state before deciding what to do next.
+    """See what's currently on the page. Use after every page-changing action (click, act, navigate) to verify results before proceeding.

+    Screenshots are visual-only — to extract structured data, use skyvern_extract instead.
+    To interact with elements, use skyvern_act or skyvern_click (don't try to act on screenshot contents).
    By default saves to ~/.skyvern/artifacts/ and returns the file path.
    Set inline=true to get base64 data directly (increases token usage).
    """
@@ -461,13 +498,14 @@ async def skyvern_select_option(
    cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
    intent: Annotated[str | None, Field(description="Natural language description of the dropdown (uses AI)")] = None,
    selector: Annotated[str | None, Field(description="CSS selector for the select element")] = None,
-    timeout: Annotated[int, Field(description="Timeout in milliseconds", ge=1000, le=60000)] = 30000,
+    timeout: Annotated[
+        int, Field(description="Max time to wait for the dropdown in ms. Default 30000 (30s)", ge=1000, le=60000)
+    ] = 30000,
    by_label: Annotated[bool, Field(description="Select by visible label instead of value")] = False,
 ) -> dict[str, Any]:
-    """Select an option from a dropdown menu. Use intent for AI-powered finding, selector for precision.
+    """Select an option from a dropdown menu. Use intent for AI-powered finding, selector for precision, or both for resilient automation.

-    Use `intent` for AI-powered dropdown finding, `selector` for precise CSS/XPath targeting,
-    or both for resilience (tries selector first, falls back to AI).
+    For free-text input fields, use skyvern_type instead. For non-dropdown buttons or links, use skyvern_click.
    """
    ai_mode, err = _resolve_ai_mode(selector, intent)
    if err:
@@ -734,11 +772,6 @@ async def skyvern_evaluate(
    )


-# ---------------------------------------------------------------------------
-# AI Differentiator Tools
-# ---------------------------------------------------------------------------
-
-
 async def skyvern_extract(
    prompt: Annotated[str, "Natural language description of what data to extract from the page"],
    session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
@@ -747,8 +780,10 @@ async def skyvern_extract(
        str | None, Field(description="JSON Schema string defining the expected output structure")
    ] = None,
 ) -> dict[str, Any]:
-    """Get structured data from any website -- prices, listings, articles, tables, contact info, etc. Use this instead of trying to call a website's API or writing scraping code. Describe what you need in natural language.
+    """Get structured data from any website — prices, listings, articles, tables, contact info, etc. Use this instead of writing scraping code or guessing API endpoints. Describe what you need in natural language.

+    Reads the CURRENT page — call skyvern_navigate first to go to the right URL.
+    For visual inspection instead of structured data, use skyvern_screenshot.
    Optionally provide a JSON `schema` to enforce the output structure (pass as a JSON string).
    """
    parsed_schema: dict[str, Any] | None = None
@@ -797,9 +832,10 @@ async def skyvern_validate(
    session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
    cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
 ) -> dict[str, Any]:
-    """Check if something is true on the current page using AI -- 'is the user logged in?', 'does the cart have 3 items?', 'is the form submitted?'
+    """Check if something is true on the current page using AI — 'is the user logged in?', 'does the cart have 3 items?', 'is the form submitted?'

-    Returns whether the described condition is true or false.
+    Reads the CURRENT page — navigate first. Returns true/false.
+    To extract data (not just check a condition), use skyvern_extract instead.
    """
    try:
        page, ctx = await get_page(session_id=session_id, cdp_url=cdp_url)
@@ -832,10 +868,12 @@ async def skyvern_act(
    session_id: Annotated[str | None, Field(description="Browser session ID (pbs_...)")] = None,
    cdp_url: Annotated[str | None, Field(description="CDP WebSocket URL")] = None,
 ) -> dict[str, Any]:
-    """Perform actions on a web page by describing what to do in plain English -- click buttons, close popups, fill forms, scroll to sections, interact with menus. Use for any website interaction task.
+    """Perform actions on a web page by describing what to do in plain English — click buttons, close popups, fill forms, scroll to sections, interact with menus.

    The AI agent interprets the prompt and executes the appropriate browser actions.
-    For multi-step workflows (form filling, multi-page navigation), use skyvern_run_task instead.
+    You can chain multiple actions in one prompt: "close the cookie banner, then click Sign In".
+    For multi-step automations (4+ pages), use skyvern_workflow_create with one block per step.
+    For quick one-off multi-page tasks, use skyvern_run_task.
    """
    try:
        page, ctx = await get_page(session_id=session_id, cdp_url=cdp_url)
@@ -878,10 +916,10 @@ async def skyvern_run_task(
        int, Field(description="Timeout in seconds (default 180s = 3 minutes)", ge=10, le=1800)
    ] = 180,
 ) -> dict[str, Any]:
-    """Delegate a complete multi-step web task to an autonomous AI agent. Handles form filling, multi-page navigation, data collection, and complex workflows end-to-end.
+    """Run a quick, one-off web task via an autonomous AI agent. Nothing is saved — use for throwaway tests and exploration only. Best for tasks describable in 2-3 sentences.

-    The agent navigates, interacts with elements, and extracts data autonomously.
-    For simple single-step actions, use skyvern_act instead.
+    For anything reusable, multi-step, or worth keeping, use skyvern_workflow_create instead — it produces a versioned, rerunnable workflow with per-step observability.
+    For simple single-step actions on the current page, use skyvern_act instead.
    """
    try:
        page, ctx = await get_page(session_id=session_id, cdp_url=cdp_url)
--- a/skyvern/cli/mcp_tools/workflow.py
+++ b/skyvern/cli/mcp_tools/workflow.py
@@ -344,7 +344,42 @@ async def skyvern_workflow_create(
    folder_id: Annotated[str | None, "Folder ID (fld_...) to organize the workflow in"] = None,
 ) -> dict[str, Any]:
    """Create a new Skyvern workflow from a YAML or JSON definition. Use when you need to save
-    a new automation workflow that can be run repeatedly with different parameters."""
+    a new automation workflow that can be run repeatedly with different parameters.
+
+    Best practice: use one task block per logical step with a short focused prompt (2-3 sentences).
+    Common block types: task, for_loop, conditional, code, text_prompt, extraction, action, navigation, wait, login.
+    Call skyvern_block_schema() for the full list with schemas and examples.
+
+    Example JSON definition (multi-block EIN application):
+
+        {
+          "title": "Apply for EIN",
+          "workflow_definition": {
+            "parameters": [
+              {"parameter_type": "workflow", "key": "business_name", "workflow_parameter_type": "string"},
+              {"parameter_type": "workflow", "key": "owner_name", "workflow_parameter_type": "string"},
+              {"parameter_type": "workflow", "key": "owner_ssn", "workflow_parameter_type": "string"}
+            ],
+            "blocks": [
+              {"block_type": "task", "label": "select_entity_type",
+               "url": "https://sa.www4.irs.gov/modiein/individual/index.jsp",
+               "engine": "skyvern-2.0",
+               "navigation_goal": "Select 'Sole Proprietor' as the entity type and click Continue."},
+              {"block_type": "task", "label": "enter_business_info", "engine": "skyvern-2.0",
+               "navigation_goal": "Fill in the business name as '{{business_name}}' and click Continue."},
+              {"block_type": "task", "label": "enter_owner_info", "engine": "skyvern-2.0",
+               "navigation_goal": "Enter the responsible party name '{{owner_name}}' and SSN '{{owner_ssn}}'. Click Continue."},
+              {"block_type": "task", "label": "confirm_and_submit", "engine": "skyvern-2.0",
+               "navigation_goal": "Review the information on the confirmation page and click Submit.",
+               "data_extraction_goal": "Extract the assigned EIN number",
+               "data_schema": {"type": "object", "properties": {"ein": {"type": "string"}}}}
+            ]
+          }
+        }
+
+    Use {{parameter_key}} to reference workflow input parameters in any block field.
+    Blocks in the same run share the same browser session automatically.
+    """
    if format not in ("json", "yaml", "auto"):
        return make_result(
            "skyvern_workflow_create",