From 4062139edc4e47f851ca3c87e7ade857be7312c5 Mon Sep 17 00:00:00 2001 From: Marc Kelechava Date: Wed, 11 Feb 2026 16:13:08 -0800 Subject: [PATCH] =?UTF-8?q?Deprecate=20'task'=20block=20type=20in=20MCP=20?= =?UTF-8?q?tools=20=E2=80=94=20steer=20toward=20navigation/extraction=20(#?= =?UTF-8?q?4706)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- skyvern/cli/mcp_tools/__init__.py | 40 +++++---- skyvern/cli/mcp_tools/blocks.py | 89 +++++++++++-------- skyvern/cli/mcp_tools/workflow.py | 28 +++--- .../forge/prompts/skyvern/workflow-copilot.j2 | 9 +- 4 files changed, 96 insertions(+), 70 deletions(-) diff --git a/skyvern/cli/mcp_tools/__init__.py b/skyvern/cli/mcp_tools/__init__.py index 10f8c396..a66da910 100644 --- a/skyvern/cli/mcp_tools/__init__.py +++ b/skyvern/cli/mcp_tools/__init__.py @@ -71,11 +71,12 @@ mcp = FastMCP( Split into separate blocks — one per logical step. Each block should have a prompt of 2-3 sentences. ## Critical Rules -1. ALWAYS create a session (skyvern_session_create) before using browser tools. -2. NEVER scrape by guessing API endpoints or writing HTTP requests — use skyvern_navigate + skyvern_extract. -3. NEVER create single-block workflows with long prompts — split into multiple blocks. -4. NEVER import from skyvern.cli.mcp_tools — use `from skyvern import Skyvern` for SDK scripts. -5. After page-changing actions (skyvern_click, skyvern_act), use skyvern_screenshot to verify the result. +1. ALWAYS use MCP tools directly — do NOT write Python scripts unless the user explicitly asks for a script. The tools ARE the interface. +2. Create a session (skyvern_session_create) before using browser tools (navigate, click, extract, etc.). Workflow and block tools do NOT need a session. +3. NEVER scrape by guessing API endpoints or writing HTTP requests — use skyvern_navigate + skyvern_extract. +4. NEVER create single-block workflows with long prompts — split into multiple blocks. +5. NEVER import from skyvern.cli.mcp_tools — use `from skyvern import Skyvern` for SDK scripts. +6. After page-changing actions (skyvern_click, skyvern_act), use skyvern_screenshot to verify the result. ## Cross-Tool Dependencies - Workflow tools (list, create, run, status) do NOT need a browser session @@ -110,7 +111,8 @@ support three modes. When unsure, use `intent`. For multiple actions in sequence | "Create a workflow that monitors prices" | skyvern_workflow_create | | "Run the login workflow" | skyvern_workflow_run | | "Is my workflow done?" | skyvern_workflow_status | -| "Write a script to do this" | Skyvern SDK (see below) | +| "Automate this process" | skyvern_workflow_create (always prefer MCP tools over scripts) | +| "Write a Python script to do this" | Skyvern SDK (ONLY when user explicitly asks for a script) | ## Getting Started @@ -120,7 +122,7 @@ support three modes. When unsure, use `intent`. For multiple actions in sequence 3. Close with skyvern_session_close when done **Automating a multi-page form** (the most common use case): -1. Create a workflow with skyvern_workflow_create — one task block per form page +1. Create a workflow with skyvern_workflow_create — one navigation/extraction block per form page 2. Each block gets a short, focused prompt (2-3 sentences max) 3. All blocks in a run share the same browser automatically 4. Run with skyvern_workflow_run @@ -139,13 +141,15 @@ skyvern_workflow_list, skyvern_workflow_run, skyvern_workflow_status, etc. Before creating a workflow, call skyvern_block_schema() to discover available block types and their JSON schemas. Validate blocks with skyvern_block_validate() before submitting. -ALWAYS split workflows into multiple blocks — one task block per logical step: +ALWAYS split workflows into multiple blocks — one block per logical step. +Use **navigation** blocks for actions (filling forms, clicking buttons) and **extraction** blocks for pulling data. +Do NOT use the deprecated "task" block type — use "navigation" or "extraction" instead. GOOD (4 blocks, each with clear single responsibility): - Block 1: "Select Sole Proprietor and click Continue" - Block 2: "Fill in the business name and click Continue" - Block 3: "Enter owner info and SSN, click Continue" - Block 4: "Review and submit. Extract the confirmation number." + Block 1 (navigation): "Select Sole Proprietor and click Continue" + Block 2 (navigation): "Fill in the business name and click Continue" + Block 3 (navigation): "Enter owner info and SSN, click Continue" + Block 4 (extraction): "Extract the confirmation number from the results page" BAD (1 giant block trying to do everything): Block 1: "Go to the IRS site, select sole proprietor, fill in name, enter SSN, review, submit, and extract the EIN" @@ -159,21 +163,23 @@ Use {{parameter_key}} to reference workflow input parameters in any block field. ## Block Types Reference Common block types for workflow definitions: -- **task** — AI agent interacts with a page (the most common block type) +- **navigation** — take actions on a page: fill forms, click buttons, navigate multi-step flows (most common) +- **extraction** — extract structured data from the current page +- **task_v2** — complex tasks via natural language prompt (handles both actions and extraction) - **for_loop** — iterate over a list of items - **conditional** — branch based on conditions - **code** — run Python code for data transformation - **text_prompt** — LLM text generation (no browser) -- **extraction** — extract data from current page -- **action** — single AI action on current page -- **navigation** — navigate to a URL +- **action** — single focused action on the current page +- **goto_url** — navigate directly to a URL - **wait** — pause for a condition or time - **login** — log into a site using stored credentials - **validation** — assert a condition on the page - **http_request** — call an external API - **send_email** — send a notification email - **file_download** / **file_upload** — download or upload files -- **goto_url** — navigate to a specific URL within a workflow + +IMPORTANT: Do NOT use "task" block type — it is deprecated. Use "navigation" for actions and "extraction" for data extraction. For full schemas and descriptions, call skyvern_block_schema(). diff --git a/skyvern/cli/mcp_tools/blocks.py b/skyvern/cli/mcp_tools/blocks.py index 4c00f99b..07a116e8 100644 --- a/skyvern/cli/mcp_tools/blocks.py +++ b/skyvern/cli/mcp_tools/blocks.py @@ -83,15 +83,14 @@ BLOCK_TYPE_MAP: dict[str, type[BlockYAML]] = { # --------------------------------------------------------------------------- BLOCK_SUMMARIES: dict[str, str] = { - "task": "AI agent navigates a page, fills forms, clicks buttons (v1 engine)", - "task_v2": "AI agent with natural language prompt (v2 engine, recommended for complex tasks)", + "navigation": "Take actions on a page: fill forms, click buttons, navigate multi-step flows (most common)", + "extraction": "Extract structured data from the current page", + "task_v2": "Complex tasks via natural language prompt — handles both actions and extraction", "for_loop": "Iterate over a list, executing nested blocks for each item", "conditional": "Branch based on Jinja2 expressions or AI prompts", "code": "Run Python code for data transformation", "text_prompt": "LLM text generation without a browser", - "extraction": "Extract structured data from the current page", "action": "Perform a single focused action on the current page", - "navigation": "Navigate to a goal on the current page (Browser Task in UI)", "login": "Handle authentication flows including username/password and TOTP/2FA", "wait": "Pause workflow execution for a specified duration", "validation": "Validate page state with complete/terminate criteria", @@ -113,14 +112,32 @@ BLOCK_SUMMARIES: dict[str, str] = { # --------------------------------------------------------------------------- BLOCK_EXAMPLES: dict[str, dict[str, Any]] = { - "task": { - "block_type": "task", - "label": "fill_form", - "url": "https://example.com/form", - "navigation_goal": "Fill out the form with the provided data and click Submit", - "parameter_keys": ["form_data"], + "navigation": { + "block_type": "navigation", + "label": "search_and_open", + "url": "https://example.com/search", + "title": "Search and Open Result", + "navigation_goal": "Search for {{ query }} and click the first result", + "parameter_keys": ["query"], "max_retries": 2, }, + "extraction": { + "block_type": "extraction", + "label": "extract_products", + "title": "Extract Product List", + "data_extraction_goal": "Extract all products with name, price, and stock status", + "data_schema": { + "type": "array", + "items": { + "type": "object", + "properties": { + "name": {"type": "string"}, + "price": {"type": "number"}, + "in_stock": {"type": "boolean"}, + }, + }, + }, + }, "task_v2": { "block_type": "task_v2", "label": "book_flight", @@ -154,30 +171,6 @@ BLOCK_EXAMPLES: dict[str, dict[str, Any]] = { {"is_default": True, "next_block_label": "handle_inactive"}, ], }, - "extraction": { - "block_type": "extraction", - "label": "extract_products", - "data_extraction_goal": "Extract all products with name, price, and stock status", - "data_schema": { - "type": "array", - "items": { - "type": "object", - "properties": { - "name": {"type": "string"}, - "price": {"type": "number"}, - "in_stock": {"type": "boolean"}, - }, - }, - }, - }, - "navigation": { - "block_type": "navigation", - "label": "search_and_open", - "url": "https://example.com/search", - "navigation_goal": "Search for {{ query }} and click the first result", - "parameter_keys": ["query"], - "max_retries": 2, - }, "login": { "block_type": "login", "label": "login_to_portal", @@ -304,7 +297,7 @@ async def skyvern_block_schema( block_type: Annotated[ str | None, Field( - description="Block type to get schema for (e.g., 'task_v2', 'for_loop'). Omit to list all available types." + description="Block type to get schema for (e.g., 'navigation', 'extraction', 'task_v2'). Omit to list all available types." ), ] = None, ) -> dict[str, Any]: @@ -324,11 +317,16 @@ async def skyvern_block_schema( data={ "block_types": BLOCK_SUMMARIES, "count": len(BLOCK_SUMMARIES), - "hint": "Call skyvern_block_schema(block_type='task_v2') for the full schema of a specific type", + "hint": "Call skyvern_block_schema(block_type='navigation') for the full schema of a specific type", }, ) normalized = block_type.strip().lower() + + task_redirect = normalized == "task" + if task_redirect: + normalized = "navigation" + cls = BLOCK_TYPE_MAP.get(normalized) if cls is None: return make_result( @@ -337,13 +335,21 @@ async def skyvern_block_schema( error=make_error( ErrorCode.INVALID_INPUT, f"Unknown block type: {block_type!r}", - f"Available types: {', '.join(sorted(BLOCK_TYPE_MAP.keys()))}", + f"Available types: {', '.join(sorted(BLOCK_SUMMARIES.keys()))}. Note: 'task' is also accepted (deprecated alias for 'navigation')", ), ) kb = _parse_knowledge_base() kb_entry = kb.get(normalized, {}) + warnings = ( + [ + "'task' is deprecated. Showing 'navigation' schema instead. Use 'navigation' for actions (requires navigation_goal) and 'extraction' for data extraction (requires data_extraction_goal + data_schema)." + ] + if task_redirect + else [] + ) + return make_result( action, data={ @@ -354,6 +360,7 @@ async def skyvern_block_schema( "schema": cls.model_json_schema(), "example": BLOCK_EXAMPLES.get(normalized), }, + warnings=warnings, ) @@ -418,6 +425,11 @@ async def skyvern_block_validate( adapter = _get_block_adapter() try: block = adapter.validate_python(raw) + warnings = [] + if block.block_type == "task": + warnings.append( + "'task' block type is deprecated. Use 'navigation' for actions and 'extraction' for data extraction." + ) return make_result( action, data={ @@ -426,6 +438,7 @@ async def skyvern_block_validate( "label": block.label, "field_count": len([f for f in block.model_fields_set if f != "block_type"]), }, + warnings=warnings, ) except ValidationError as exc: errors = [] @@ -439,6 +452,6 @@ async def skyvern_block_validate( ErrorCode.INVALID_INPUT, f"Block validation failed ({len(exc.errors())} error{'s' if len(exc.errors()) != 1 else ''}): " + "; ".join(errors[:5]), - "Fix the fields listed above. Call skyvern_block_schema(block_type='...') to see the correct schema.", + "Fix the fields listed above. Call skyvern_block_schema(block_type='navigation') to see the correct schema. Use 'navigation' for actions and 'extraction' for data extraction — do NOT use the deprecated 'task' type.", ), ) diff --git a/skyvern/cli/mcp_tools/workflow.py b/skyvern/cli/mcp_tools/workflow.py index 66848ea4..548c89ab 100644 --- a/skyvern/cli/mcp_tools/workflow.py +++ b/skyvern/cli/mcp_tools/workflow.py @@ -346,8 +346,10 @@ async def skyvern_workflow_create( """Create a new Skyvern workflow from a YAML or JSON definition. Use when you need to save a new automation workflow that can be run repeatedly with different parameters. - Best practice: use one task block per logical step with a short focused prompt (2-3 sentences). - Common block types: task, for_loop, conditional, code, text_prompt, extraction, action, navigation, wait, login. + Best practice: use one block per logical step with a short focused prompt (2-3 sentences). + Use "navigation" blocks for actions (filling forms, clicking) and "extraction" blocks for pulling data. + Do NOT use the deprecated "task" block type. + Common block types: navigation, extraction, task_v2, for_loop, conditional, code, text_prompt, action, wait, login. Call skyvern_block_schema() for the full list with schemas and examples. Example JSON definition (multi-block EIN application): @@ -361,17 +363,21 @@ async def skyvern_workflow_create( {"parameter_type": "workflow", "key": "owner_ssn", "workflow_parameter_type": "string"} ], "blocks": [ - {"block_type": "task", "label": "select_entity_type", + {"block_type": "navigation", "label": "select_entity_type", "url": "https://sa.www4.irs.gov/modiein/individual/index.jsp", - "engine": "skyvern-2.0", + "title": "Select Entity Type", "navigation_goal": "Select 'Sole Proprietor' as the entity type and click Continue."}, - {"block_type": "task", "label": "enter_business_info", "engine": "skyvern-2.0", - "navigation_goal": "Fill in the business name as '{{business_name}}' and click Continue."}, - {"block_type": "task", "label": "enter_owner_info", "engine": "skyvern-2.0", - "navigation_goal": "Enter the responsible party name '{{owner_name}}' and SSN '{{owner_ssn}}'. Click Continue."}, - {"block_type": "task", "label": "confirm_and_submit", "engine": "skyvern-2.0", - "navigation_goal": "Review the information on the confirmation page and click Submit.", - "data_extraction_goal": "Extract the assigned EIN number", + {"block_type": "navigation", "label": "enter_business_info", + "title": "Enter Business Info", + "navigation_goal": "Fill in the business name as '{{business_name}}' and click Continue.", + "parameter_keys": ["business_name"]}, + {"block_type": "navigation", "label": "enter_owner_info", + "title": "Enter Owner Info", + "navigation_goal": "Enter the responsible party name '{{owner_name}}' and SSN '{{owner_ssn}}'. Click Continue.", + "parameter_keys": ["owner_name", "owner_ssn"]}, + {"block_type": "extraction", "label": "extract_ein", + "title": "Extract EIN", + "data_extraction_goal": "Extract the assigned EIN number from the confirmation page", "data_schema": {"type": "object", "properties": {"ein": {"type": "string"}}}} ] } diff --git a/skyvern/forge/prompts/skyvern/workflow-copilot.j2 b/skyvern/forge/prompts/skyvern/workflow-copilot.j2 index 0071c9b2..5069164c 100644 --- a/skyvern/forge/prompts/skyvern/workflow-copilot.j2 +++ b/skyvern/forge/prompts/skyvern/workflow-copilot.j2 @@ -78,11 +78,12 @@ IMPORTANT RULES: * Always generate valid YAML that conforms to the Skyvern workflow schema * Preserve existing blocks unless the user explicitly asks to modify or remove them * Use appropriate block types based on the user's intent: - - Use "task_v2" blocks for complex, multi-step workflows (may be slightly slower) - - Use "task" blocks for combined navigation and extraction (faster, but less flexible) - - Use "goto_url" blocks for pure navigation without data extraction - - Use "extraction" blocks for data extraction from the current page + - Use "navigation" blocks for actions: filling forms, clicking buttons, navigating flows (most common) + - Use "extraction" blocks for extracting structured data from the current page + - Use "task_v2" blocks for complex tasks requiring deep thinking and natural language prompts + - Use "goto_url" blocks for navigating directly to a URL without additional instructions - Use "login" blocks for authentication flows + - Do NOT use "task" blocks — they are deprecated. Use "navigation" instead. * Include all required fields for each block type (label, next_block_label, block_type, etc.) * Use descriptive, unique labels for blocks (snake_case format) * Reference parameters using Jinja2 syntax: {% raw %}{{ parameters.param_key }}{% endraw %}