parameterize data schema in workflow (#2903)

This commit is contained in:
LawyZheng
2025-07-09 11:26:32 +08:00
committed by GitHub
parent 6c47a8709f
commit 10d207d01a
5 changed files with 35 additions and 21 deletions

View File

@@ -226,7 +226,10 @@ function convertToNode(
url: block.url ?? "",
navigationGoal: block.navigation_goal ?? "",
dataExtractionGoal: block.data_extraction_goal ?? "",
dataSchema: JSON.stringify(block.data_schema, null, 2),
dataSchema:
typeof block.data_schema === "string"
? block.data_schema
: JSON.stringify(block.data_schema, null, 2),
errorCodeMapping: JSON.stringify(block.error_code_mapping, null, 2),
allowDownloads: block.complete_on_download ?? false,
downloadSuffix: block.download_suffix ?? null,
@@ -330,7 +333,10 @@ function convertToNode(
...commonData,
url: block.url ?? "",
dataExtractionGoal: block.data_extraction_goal ?? "",
dataSchema: JSON.stringify(block.data_schema, null, 2),
dataSchema:
typeof block.data_schema === "string"
? block.data_schema
: JSON.stringify(block.data_schema, null, 2),
parameterKeys: block.parameters.map((p) => p.key),
maxRetries: block.max_retries ?? null,
maxStepsOverride: block.max_steps_per_run ?? null,
@@ -992,6 +998,19 @@ function JSONParseSafe(json: string): Record<string, unknown> | null {
}
}
function JSONSafeOrString(
json: string,
): Record<string, unknown> | string | null {
if (!json) {
return null;
}
try {
return JSON.parse(json);
} catch {
return json;
}
}
function getWorkflowBlock(node: WorkflowBlockNode): BlockYAML {
const base = {
label: node.data.label,
@@ -1009,7 +1028,7 @@ function getWorkflowBlock(node: WorkflowBlockNode): BlockYAML {
data_extraction_goal: node.data.dataExtractionGoal,
complete_criterion: node.data.completeCriterion,
terminate_criterion: node.data.terminateCriterion,
data_schema: JSONParseSafe(node.data.dataSchema),
data_schema: JSONSafeOrString(node.data.dataSchema),
error_code_mapping: JSONParseSafe(node.data.errorCodeMapping) as Record<
string,
string
@@ -1111,7 +1130,7 @@ function getWorkflowBlock(node: WorkflowBlockNode): BlockYAML {
url: node.data.url,
title: node.data.label,
data_extraction_goal: node.data.dataExtractionGoal,
data_schema: JSONParseSafe(node.data.dataSchema),
data_schema: JSONSafeOrString(node.data.dataSchema),
...(node.data.maxRetries !== null && {
max_retries: node.data.maxRetries,
}),
@@ -2112,11 +2131,6 @@ function getWorkflowErrors(nodes: Array<AppNode>): Array<string> {
// check task node json fields
const taskNodes = nodes.filter(isTaskNode);
taskNodes.forEach((node) => {
try {
JSON.parse(node.data.dataSchema);
} catch {
errors.push(`${node.data.label}: Data schema is not valid JSON.`);
}
try {
JSON.parse(node.data.errorCodeMapping);
} catch {
@@ -2153,11 +2167,6 @@ function getWorkflowErrors(nodes: Array<AppNode>): Array<string> {
if (node.data.dataExtractionGoal.length === 0) {
errors.push(`${node.data.label}: Data extraction goal is required.`);
}
try {
JSON.parse(node.data.dataSchema);
} catch {
errors.push(`${node.data.label}: Data schema is not valid JSON.`);
}
});
const textPromptNodes = nodes.filter(isTextPromptNode);

View File

@@ -271,7 +271,7 @@ export type TaskBlock = WorkflowBlockBase & {
title: string;
navigation_goal: string | null;
data_extraction_goal: string | null;
data_schema: Record<string, unknown> | null;
data_schema: Record<string, unknown> | string | null;
complete_criterion: string | null;
terminate_criterion: string | null;
error_code_mapping: Record<string, string> | null;
@@ -407,7 +407,7 @@ export type ExtractionBlock = WorkflowBlockBase & {
data_extraction_goal: string | null;
url: string | null;
title: string;
data_schema: Record<string, unknown> | null;
data_schema: Record<string, unknown> | string | null;
max_retries?: number;
max_steps_per_run?: number | null;
parameters: Array<WorkflowParameter>;

View File

@@ -135,7 +135,7 @@ export type TaskBlockYAML = BlockYAMLBase & {
title?: string;
navigation_goal: string | null;
data_extraction_goal: string | null;
data_schema: Record<string, unknown> | null;
data_schema: Record<string, unknown> | string | null;
error_code_mapping: Record<string, string> | null;
max_retries?: number;
max_steps_per_run?: number | null;
@@ -210,7 +210,7 @@ export type ExtractionBlockYAML = BlockYAMLBase & {
url: string | null;
title?: string;
data_extraction_goal: string | null;
data_schema: Record<string, unknown> | null;
data_schema: Record<string, unknown> | string | null;
max_retries?: number;
max_steps_per_run?: number | null;
parameter_keys?: Array<string> | null;

View File

@@ -392,7 +392,7 @@ class BaseTaskBlock(Block):
terminate_criterion: str | None = None
navigation_goal: str | None = None
data_extraction_goal: str | None = None
data_schema: dict[str, Any] | list | None = None
data_schema: dict[str, Any] | list | str | None = None
# error code to error description for the LLM
error_code_mapping: dict[str, str] | None = None
max_retries: int = 0
@@ -454,6 +454,11 @@ class BaseTaskBlock(Block):
self.data_extraction_goal, workflow_run_context
)
if isinstance(self.data_schema, str):
self.data_schema = self.format_block_parameter_template_from_workflow_run_context(
self.data_schema, workflow_run_context
)
if self.complete_criterion:
self.complete_criterion = self.format_block_parameter_template_from_workflow_run_context(
self.complete_criterion, workflow_run_context

View File

@@ -138,7 +138,7 @@ class TaskBlockYAML(BlockYAML):
engine: RunEngine = RunEngine.skyvern_v1
navigation_goal: str | None = None
data_extraction_goal: str | None = None
data_schema: dict[str, Any] | list | None = None
data_schema: dict[str, Any] | list | str | None = None
error_code_mapping: dict[str, str] | None = None
max_retries: int = 0
max_steps_per_run: int | None = None
@@ -308,7 +308,7 @@ class ExtractionBlockYAML(BlockYAML):
url: str | None = None
title: str = ""
engine: RunEngine = RunEngine.skyvern_v1
data_schema: dict[str, Any] | list | None = None
data_schema: dict[str, Any] | list | str | None = None
max_retries: int = 0
max_steps_per_run: int | None = None
parameter_keys: list[str] | None = None