From 957e6203c26edd0634890665c47a8f545659448d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Celal=20Zamano=C4=9Flu?= <95054566+celalzamanoglu@users.noreply.github.com> Date: Tue, 17 Feb 2026 19:54:25 +0300 Subject: [PATCH] Default data schema output to recommend snake_case (#SKY-7446) (#4764) --- .../src/routes/workflows/editor/nodes/types.ts | 2 +- .../prompts/skyvern/build-workflow-from-pdf.j2 | 2 +- .../prompts/skyvern/suggest-data-schema.j2 | 17 ++++++++++++----- .../skyvern/task_v2_generate_extraction_task.j2 | 2 +- .../skyvern/task_v2_generate_task_block.j2 | 2 +- .../prompts/skyvern/workflow_knowledge_base.txt | 1 + 6 files changed, 17 insertions(+), 9 deletions(-) diff --git a/skyvern-frontend/src/routes/workflows/editor/nodes/types.ts b/skyvern-frontend/src/routes/workflows/editor/nodes/types.ts index 1917c967..1807e70d 100644 --- a/skyvern-frontend/src/routes/workflows/editor/nodes/types.ts +++ b/skyvern-frontend/src/routes/workflows/editor/nodes/types.ts @@ -27,7 +27,7 @@ export const errorMappingExampleValue = { export const dataSchemaExampleValue = { type: "object", properties: { - sample: { type: "string" }, + sample_field: { type: "string" }, }, } as const; diff --git a/skyvern/forge/prompts/skyvern/build-workflow-from-pdf.j2 b/skyvern/forge/prompts/skyvern/build-workflow-from-pdf.j2 index 97f2cf7e..4c2e17d2 100644 --- a/skyvern/forge/prompts/skyvern/build-workflow-from-pdf.j2 +++ b/skyvern/forge/prompts/skyvern/build-workflow-from-pdf.j2 @@ -67,7 +67,7 @@ BLOCK STRUCTURE REQUIREMENTS: 1. Each block MUST have: label, block_type, continue_on_failure 2. Navigation blocks need: url (can be empty string ""), navigation_goal, engine (set to "skyvern-1.0") 3. Login blocks need: url, navigation_goal, parameter_keys (empty array if no credentials), engine (set to "skyvern-1.0") -4. Extraction blocks need: url (can be empty string ""), data_extraction_goal, data_schema, engine (set to "skyvern-1.0") +4. Extraction blocks need: url (can be empty string ""), data_extraction_goal, data_schema (use snake_case for all field names, e.g., product_name, order_date), engine (set to "skyvern-1.0") 5. Action blocks need: url (can be empty string ""), navigation_goal, engine (set to "skyvern-1.0") 6. Validation blocks need: complete_criterion OR terminate_criterion (at least one must be set), parameter_keys (empty array if none) 7. For_loop blocks need: loop_blocks, loop_variable_reference diff --git a/skyvern/forge/prompts/skyvern/suggest-data-schema.j2 b/skyvern/forge/prompts/skyvern/suggest-data-schema.j2 index edc0938b..cf16a364 100644 --- a/skyvern/forge/prompts/skyvern/suggest-data-schema.j2 +++ b/skyvern/forge/prompts/skyvern/suggest-data-schema.j2 @@ -2,23 +2,25 @@ We are developing an interface for AI agent tasks that use JSON schemas to descr You are given an input prompt from a user, and some additional context. Your goal is to generate a JSON schema given the user prompt and the context. +IMPORTANT: All field names in the schema MUST use snake_case naming convention (e.g., "first_name", "total_price", "order_date"). Do not use camelCase, PascalCase, or spaces in field names. + If additional context is given, try to use the it for further clues about the data that needs to be extracted. For example, the user might provide some detail about product information to be extracted in a "data_extraction_goal" inside the context, but maybe not necessarily pass it in the input prompt. In these cases, you should use the context. Here is an example: -User prompt: Generate a data schema that extracts the title and link for the posts as a list -Additional context: +User prompt: Generate a data schema that extracts the title, link, and author name for the posts as a list +Additional context: ```json { "url": "https://news.ycombinator.com", - "data_extraction_goal": "Extract the title and link of the top 5 posts", + "data_extraction_goal": "Extract the title, link, and author name of the top 5 posts", "existing_schema": "null" } ``` -Suggested Data Schema: +Suggested Data Schema: ```json { "posts" : { @@ -33,11 +35,16 @@ Suggested Data Schema: "link": { "type": "string", "description": "Link to the post" + }, + "author_name": { + "type": "string", + "description": "Name of the post author" } }, "required": [ "title", - "link" + "link", + "author_name" ] } } diff --git a/skyvern/forge/prompts/skyvern/task_v2_generate_extraction_task.j2 b/skyvern/forge/prompts/skyvern/task_v2_generate_extraction_task.j2 index 1f8b40a3..93a4d6a9 100644 --- a/skyvern/forge/prompts/skyvern/task_v2_generate_extraction_task.j2 +++ b/skyvern/forge/prompts/skyvern/task_v2_generate_extraction_task.j2 @@ -4,7 +4,7 @@ MAKE SURE YOU OUTPUT VALID JSON. No text before or after JSON, no trailing comma Reply in JSON format with the following keys: { - "schema": JSON, // the schema of the output data to extract. Use JSON Schema specification style. + "schema": JSON, // the schema of the output data to extract. Use JSON Schema specification style. All field names MUST use snake_case (e.g., product_name, order_date). } The URL of the page you're on right now is `{{ current_url }}`. diff --git a/skyvern/forge/prompts/skyvern/task_v2_generate_task_block.j2 b/skyvern/forge/prompts/skyvern/task_v2_generate_task_block.j2 index d7213380..95dfcc19 100644 --- a/skyvern/forge/prompts/skyvern/task_v2_generate_task_block.j2 +++ b/skyvern/forge/prompts/skyvern/task_v2_generate_task_block.j2 @@ -17,7 +17,7 @@ Reply in JSON format with the following keys: "thoughts": str, // Think step by step. What would the use do to achieve the goal. "navigation_goal": str, // What kind things the user needs to do in the web achieve the plan and finally get the data to extract. Include all the data needed to complete the goal here.{% if is_link %} The user already has the link to go to the target page first, in order to start executing the plan. So state the navigation goal from the perspective of already being on the target page. What does the user need to do from there?{% endif %} "data_extraction_goal": str, // If the user needs to extract/retrieve data from the site after navigation goal is achieved, define that extraction goal here. null if no data needs to be extracted. - "data_schema": json, // the schema of the output data. use JSON schema specification style. All fields should be optional as we optimize to extract as much data as possible + "data_schema": json, // the schema of the output data. use JSON schema specification style. All field names MUST use snake_case (e.g., product_name, order_date). All fields should be optional as we optimize to extract as much data as possible } Current datetime, ISO format: diff --git a/skyvern/forge/prompts/skyvern/workflow_knowledge_base.txt b/skyvern/forge/prompts/skyvern/workflow_knowledge_base.txt index df5fafa2..44f2aed3 100644 --- a/skyvern/forge/prompts/skyvern/workflow_knowledge_base.txt +++ b/skyvern/forge/prompts/skyvern/workflow_knowledge_base.txt @@ -825,6 +825,7 @@ blocks: * Naming Conventions: - Use descriptive labels: "login_to_portal" not "step1" - Use snake_case for labels and parameter keys + - Use snake_case for data schema field names (e.g., product_name, total_price) - Make labels unique and meaningful * Goal Writing: