From 10d207d01ac51346083d9c45c746887b32ff98db Mon Sep 17 00:00:00 2001 From: LawyZheng Date: Wed, 9 Jul 2025 11:26:32 +0800 Subject: [PATCH] parameterize data schema in workflow (#2903) --- .../workflows/editor/workflowEditorUtils.ts | 37 ++++++++++++------- .../routes/workflows/types/workflowTypes.ts | 4 +- .../workflows/types/workflowYamlTypes.ts | 4 +- skyvern/forge/sdk/workflow/models/block.py | 7 +++- skyvern/forge/sdk/workflow/models/yaml.py | 4 +- 5 files changed, 35 insertions(+), 21 deletions(-) diff --git a/skyvern-frontend/src/routes/workflows/editor/workflowEditorUtils.ts b/skyvern-frontend/src/routes/workflows/editor/workflowEditorUtils.ts index a991c8d3..ea989cef 100644 --- a/skyvern-frontend/src/routes/workflows/editor/workflowEditorUtils.ts +++ b/skyvern-frontend/src/routes/workflows/editor/workflowEditorUtils.ts @@ -226,7 +226,10 @@ function convertToNode( url: block.url ?? "", navigationGoal: block.navigation_goal ?? "", dataExtractionGoal: block.data_extraction_goal ?? "", - dataSchema: JSON.stringify(block.data_schema, null, 2), + dataSchema: + typeof block.data_schema === "string" + ? block.data_schema + : JSON.stringify(block.data_schema, null, 2), errorCodeMapping: JSON.stringify(block.error_code_mapping, null, 2), allowDownloads: block.complete_on_download ?? false, downloadSuffix: block.download_suffix ?? null, @@ -330,7 +333,10 @@ function convertToNode( ...commonData, url: block.url ?? "", dataExtractionGoal: block.data_extraction_goal ?? "", - dataSchema: JSON.stringify(block.data_schema, null, 2), + dataSchema: + typeof block.data_schema === "string" + ? block.data_schema + : JSON.stringify(block.data_schema, null, 2), parameterKeys: block.parameters.map((p) => p.key), maxRetries: block.max_retries ?? null, maxStepsOverride: block.max_steps_per_run ?? null, @@ -992,6 +998,19 @@ function JSONParseSafe(json: string): Record | null { } } +function JSONSafeOrString( + json: string, +): Record | string | null { + if (!json) { + return null; + } + try { + return JSON.parse(json); + } catch { + return json; + } +} + function getWorkflowBlock(node: WorkflowBlockNode): BlockYAML { const base = { label: node.data.label, @@ -1009,7 +1028,7 @@ function getWorkflowBlock(node: WorkflowBlockNode): BlockYAML { data_extraction_goal: node.data.dataExtractionGoal, complete_criterion: node.data.completeCriterion, terminate_criterion: node.data.terminateCriterion, - data_schema: JSONParseSafe(node.data.dataSchema), + data_schema: JSONSafeOrString(node.data.dataSchema), error_code_mapping: JSONParseSafe(node.data.errorCodeMapping) as Record< string, string @@ -1111,7 +1130,7 @@ function getWorkflowBlock(node: WorkflowBlockNode): BlockYAML { url: node.data.url, title: node.data.label, data_extraction_goal: node.data.dataExtractionGoal, - data_schema: JSONParseSafe(node.data.dataSchema), + data_schema: JSONSafeOrString(node.data.dataSchema), ...(node.data.maxRetries !== null && { max_retries: node.data.maxRetries, }), @@ -2112,11 +2131,6 @@ function getWorkflowErrors(nodes: Array): Array { // check task node json fields const taskNodes = nodes.filter(isTaskNode); taskNodes.forEach((node) => { - try { - JSON.parse(node.data.dataSchema); - } catch { - errors.push(`${node.data.label}: Data schema is not valid JSON.`); - } try { JSON.parse(node.data.errorCodeMapping); } catch { @@ -2153,11 +2167,6 @@ function getWorkflowErrors(nodes: Array): Array { if (node.data.dataExtractionGoal.length === 0) { errors.push(`${node.data.label}: Data extraction goal is required.`); } - try { - JSON.parse(node.data.dataSchema); - } catch { - errors.push(`${node.data.label}: Data schema is not valid JSON.`); - } }); const textPromptNodes = nodes.filter(isTextPromptNode); diff --git a/skyvern-frontend/src/routes/workflows/types/workflowTypes.ts b/skyvern-frontend/src/routes/workflows/types/workflowTypes.ts index b924ab50..b4ed76a5 100644 --- a/skyvern-frontend/src/routes/workflows/types/workflowTypes.ts +++ b/skyvern-frontend/src/routes/workflows/types/workflowTypes.ts @@ -271,7 +271,7 @@ export type TaskBlock = WorkflowBlockBase & { title: string; navigation_goal: string | null; data_extraction_goal: string | null; - data_schema: Record | null; + data_schema: Record | string | null; complete_criterion: string | null; terminate_criterion: string | null; error_code_mapping: Record | null; @@ -407,7 +407,7 @@ export type ExtractionBlock = WorkflowBlockBase & { data_extraction_goal: string | null; url: string | null; title: string; - data_schema: Record | null; + data_schema: Record | string | null; max_retries?: number; max_steps_per_run?: number | null; parameters: Array; diff --git a/skyvern-frontend/src/routes/workflows/types/workflowYamlTypes.ts b/skyvern-frontend/src/routes/workflows/types/workflowYamlTypes.ts index 21e53589..a3202666 100644 --- a/skyvern-frontend/src/routes/workflows/types/workflowYamlTypes.ts +++ b/skyvern-frontend/src/routes/workflows/types/workflowYamlTypes.ts @@ -135,7 +135,7 @@ export type TaskBlockYAML = BlockYAMLBase & { title?: string; navigation_goal: string | null; data_extraction_goal: string | null; - data_schema: Record | null; + data_schema: Record | string | null; error_code_mapping: Record | null; max_retries?: number; max_steps_per_run?: number | null; @@ -210,7 +210,7 @@ export type ExtractionBlockYAML = BlockYAMLBase & { url: string | null; title?: string; data_extraction_goal: string | null; - data_schema: Record | null; + data_schema: Record | string | null; max_retries?: number; max_steps_per_run?: number | null; parameter_keys?: Array | null; diff --git a/skyvern/forge/sdk/workflow/models/block.py b/skyvern/forge/sdk/workflow/models/block.py index f575686e..2bfe8ed3 100644 --- a/skyvern/forge/sdk/workflow/models/block.py +++ b/skyvern/forge/sdk/workflow/models/block.py @@ -392,7 +392,7 @@ class BaseTaskBlock(Block): terminate_criterion: str | None = None navigation_goal: str | None = None data_extraction_goal: str | None = None - data_schema: dict[str, Any] | list | None = None + data_schema: dict[str, Any] | list | str | None = None # error code to error description for the LLM error_code_mapping: dict[str, str] | None = None max_retries: int = 0 @@ -454,6 +454,11 @@ class BaseTaskBlock(Block): self.data_extraction_goal, workflow_run_context ) + if isinstance(self.data_schema, str): + self.data_schema = self.format_block_parameter_template_from_workflow_run_context( + self.data_schema, workflow_run_context + ) + if self.complete_criterion: self.complete_criterion = self.format_block_parameter_template_from_workflow_run_context( self.complete_criterion, workflow_run_context diff --git a/skyvern/forge/sdk/workflow/models/yaml.py b/skyvern/forge/sdk/workflow/models/yaml.py index 1ece3ca3..ef0c7e4e 100644 --- a/skyvern/forge/sdk/workflow/models/yaml.py +++ b/skyvern/forge/sdk/workflow/models/yaml.py @@ -138,7 +138,7 @@ class TaskBlockYAML(BlockYAML): engine: RunEngine = RunEngine.skyvern_v1 navigation_goal: str | None = None data_extraction_goal: str | None = None - data_schema: dict[str, Any] | list | None = None + data_schema: dict[str, Any] | list | str | None = None error_code_mapping: dict[str, str] | None = None max_retries: int = 0 max_steps_per_run: int | None = None @@ -308,7 +308,7 @@ class ExtractionBlockYAML(BlockYAML): url: str | None = None title: str = "" engine: RunEngine = RunEngine.skyvern_v1 - data_schema: dict[str, Any] | list | None = None + data_schema: dict[str, Any] | list | str | None = None max_retries: int = 0 max_steps_per_run: int | None = None parameter_keys: list[str] | None = None