Refactor script gen with block level code cache (#3910)

This commit is contained in:
Shuchang Zheng
2025-11-05 19:57:11 +08:00
committed by GitHub
parent 524513dd93
commit 2fa4d933cc
3 changed files with 250 additions and 83 deletions

View File

@@ -10,6 +10,7 @@ import asyncio
import hashlib
import keyword
import re
from dataclasses import dataclass
from typing import Any
import libcst as cst
@@ -31,6 +32,15 @@ GENERATE_CODE_AI_MODE_PROACTIVE = "proactive"
GENERATE_CODE_AI_MODE_FALLBACK = "fallback"
@dataclass
class ScriptBlockSource:
label: str
code: str
run_signature: str | None
workflow_run_id: str | None
workflow_run_block_id: str | None
# --------------------------------------------------------------------- #
# 1. helpers #
# --------------------------------------------------------------------- #
@@ -104,6 +114,7 @@ ACTIONS_WITH_XPATH = [
"upload_file",
"select_option",
]
ACTIONS_OPT_OUT_INTENTION_FOR_PROMPT = ["extract"]
INDENT = " " * 4
DOUBLE_INDENT = " " * 8
@@ -421,7 +432,7 @@ def _action_to_stmt(act: dict[str, Any], task: dict[str, Any], assign_to_output:
)
)
intention = act.get("intention") or act.get("reasoning") or ""
if intention:
if intention and method not in ACTIONS_OPT_OUT_INTENTION_FOR_PROMPT:
args.extend(
[
cst.Arg(
@@ -432,6 +443,7 @@ def _action_to_stmt(act: dict[str, Any], task: dict[str, Any], assign_to_output:
),
]
)
_mark_last_arg_as_comma(args)
# Only use indented parentheses if we have arguments
if args:
@@ -1694,10 +1706,25 @@ async def generate_workflow_script_python_code(
script_id: str | None = None,
script_revision_id: str | None = None,
pending: bool = False,
cached_blocks: dict[str, ScriptBlockSource] | None = None,
updated_block_labels: set[str] | None = None,
) -> str:
"""
Build a LibCST Module and emit .code (PEP-8-formatted source).
Cached script blocks can be reused by providing them via `cached_blocks`. Any labels present in
`updated_block_labels` will be regenerated from the latest workflow run execution data.
"""
cached_blocks = cached_blocks or {}
updated_block_labels = set(updated_block_labels or [])
# Drop cached entries that do not have usable source
cached_blocks = {label: source for label, source in cached_blocks.items() if source.code}
# Always regenerate the orchestrator block so it stays aligned with the workflow definition
cached_blocks.pop(settings.WORKFLOW_START_BLOCK_LABEL, None)
if task_v2_child_blocks is None:
task_v2_child_blocks = {}
# --- imports --------------------------------------------------------
imports: list[cst.BaseStatement] = [
cst.SimpleStatementLine([cst.Import(names=[cst.ImportAlias(cst.Name("asyncio"))])]),
@@ -1746,33 +1773,47 @@ async def generate_workflow_script_python_code(
generated_model_cls = _build_generated_model_from_schema(generated_schema)
# --- blocks ---------------------------------------------------------
block_fns = []
block_fns: list[cst.CSTNode] = []
task_v1_blocks = [block for block in blocks if block["block_type"] in SCRIPT_TASK_BLOCKS]
task_v2_blocks = [block for block in blocks if block["block_type"] == "task_v2"]
if task_v2_child_blocks is None:
task_v2_child_blocks = {}
def append_block_code(block_code: str) -> None:
nonlocal block_fns
parsed = cst.parse_module(block_code)
if block_fns:
block_fns.append(cst.EmptyLine())
block_fns.append(cst.EmptyLine())
block_fns.extend(parsed.body)
# Handle task v1 blocks (excluding child blocks of task_v2)
for idx, task in enumerate(task_v1_blocks):
# Skip if this is a child block of a task_v2 block
if task.get("parent_task_v2_label"):
continue
block_fn_def = _build_block_fn(task, actions_by_task.get(task.get("task_id", ""), []))
block_name = task.get("label") or task.get("title") or task.get("task_id") or f"task_{idx}"
cached_source = cached_blocks.get(block_name)
use_cached = cached_source is not None and block_name not in updated_block_labels
if use_cached:
assert cached_source is not None
block_code = cached_source.code
run_signature = cached_source.run_signature
block_workflow_run_id = cached_source.workflow_run_id
block_workflow_run_block_id = cached_source.workflow_run_block_id
else:
block_fn_def = _build_block_fn(task, actions_by_task.get(task.get("task_id", ""), []))
temp_module = cst.Module(body=[block_fn_def])
block_code = temp_module.code
block_stmt = _build_block_statement(task)
run_signature_module = cst.Module(body=[block_stmt])
run_signature = run_signature_module.code.strip()
block_workflow_run_id = task.get("workflow_run_id") or run_id
block_workflow_run_block_id = task.get("workflow_run_block_id")
# Create script block if we have script context
if script_id and script_revision_id and organization_id:
try:
block_name = task.get("label") or task.get("title") or task.get("task_id") or f"task_{idx}"
temp_module = cst.Module(body=[block_fn_def])
block_code = temp_module.code
# Extract the run signature (the statement that calls skyvern.action/extract/etc)
block_stmt = _build_block_statement(task)
run_signature_module = cst.Module(body=[block_stmt])
run_signature = run_signature_module.code.strip()
await create_or_update_script_block(
block_code=block_code,
script_revision_id=script_revision_id,
@@ -1781,84 +1822,67 @@ async def generate_workflow_script_python_code(
block_label=block_name,
update=pending,
run_signature=run_signature,
workflow_run_id=task.get("workflow_run_id"),
workflow_run_block_id=task.get("workflow_run_block_id"),
workflow_run_id=block_workflow_run_id,
workflow_run_block_id=block_workflow_run_block_id,
)
except Exception as e:
LOG.error("Failed to create script block", error=str(e), exc_info=True)
# Continue without script block creation if it fails
block_fns.append(block_fn_def)
if idx < len(task_v1_blocks) - 1:
block_fns.append(cst.EmptyLine())
block_fns.append(cst.EmptyLine())
append_block_code(block_code)
# Handle task_v2 blocks
for idx, task_v2 in enumerate(task_v2_blocks):
for task_v2 in task_v2_blocks:
task_v2_label = task_v2.get("label") or f"task_v2_{task_v2.get('workflow_run_block_id')}"
child_blocks = task_v2_child_blocks.get(task_v2_label, [])
# Create the task_v2 function
task_v2_fn_def = _build_task_v2_block_fn(task_v2, child_blocks)
cached_source = cached_blocks.get(task_v2_label)
use_cached = cached_source is not None and task_v2_label not in updated_block_labels
block_code = ""
run_signature = None
block_workflow_run_id = task_v2.get("workflow_run_id") or run_id
block_workflow_run_block_id = task_v2.get("workflow_run_block_id")
if use_cached:
assert cached_source is not None
block_code = cached_source.code
run_signature = cached_source.run_signature
block_workflow_run_id = cached_source.workflow_run_id
block_workflow_run_block_id = cached_source.workflow_run_block_id
else:
task_v2_fn_def = _build_task_v2_block_fn(task_v2, child_blocks)
task_v2_block_body: list[cst.CSTNode] = [task_v2_fn_def]
for child_block in child_blocks:
if child_block.get("block_type") in SCRIPT_TASK_BLOCKS and child_block.get("block_type") != "task_v2":
child_fn_def = _build_block_fn(child_block, actions_by_task.get(child_block.get("task_id", ""), []))
task_v2_block_body.append(cst.EmptyLine())
task_v2_block_body.append(cst.EmptyLine())
task_v2_block_body.append(child_fn_def)
temp_module = cst.Module(body=task_v2_block_body)
block_code = temp_module.code
task_v2_stmt = _build_block_statement(task_v2)
run_signature = cst.Module(body=[task_v2_stmt]).code.strip()
# Create script block for task_v2 that includes both the main function and child functions
if script_id and script_revision_id and organization_id:
try:
# Build the complete module for this task_v2 block
task_v2_block_body = [task_v2_fn_def]
# Add child block functions
for child_block in child_blocks:
if (
child_block.get("block_type") in SCRIPT_TASK_BLOCKS
and child_block.get("block_type") != "task_v2"
):
child_fn_def = _build_block_fn(
child_block, actions_by_task.get(child_block.get("task_id", ""), [])
)
task_v2_block_body.append(cst.EmptyLine())
task_v2_block_body.append(cst.EmptyLine())
task_v2_block_body.append(child_fn_def)
# Create the complete module for this task_v2 block
temp_module = cst.Module(body=task_v2_block_body)
task_v2_block_code = temp_module.code
block_name = task_v2.get("label") or task_v2.get("title") or f"task_v2_{idx}"
# Extract the run signature for task_v2 block
task_v2_stmt = _build_block_statement(task_v2)
run_signature_module = cst.Module(body=[task_v2_stmt])
run_signature = run_signature_module.code.strip()
await create_or_update_script_block(
block_code=task_v2_block_code,
block_code=block_code,
script_revision_id=script_revision_id,
script_id=script_id,
organization_id=organization_id,
block_label=block_name,
block_label=task_v2_label,
update=pending,
run_signature=run_signature,
workflow_run_id=task_v2.get("workflow_run_id"),
workflow_run_block_id=task_v2.get("workflow_run_block_id"),
workflow_run_id=block_workflow_run_id,
workflow_run_block_id=block_workflow_run_block_id,
)
except Exception as e:
LOG.error("Failed to create task_v2 script block", error=str(e), exc_info=True)
# Continue without script block creation if it fails
block_fns.append(task_v2_fn_def)
# Create individual functions for child blocks
for child_block in child_blocks:
if child_block.get("block_type") in SCRIPT_TASK_BLOCKS and child_block.get("block_type") != "task_v2":
child_fn_def = _build_block_fn(child_block, actions_by_task.get(child_block.get("task_id", ""), []))
block_fns.append(cst.EmptyLine())
block_fns.append(cst.EmptyLine())
block_fns.append(child_fn_def)
if idx < len(task_v2_blocks) - 1:
block_fns.append(cst.EmptyLine())
block_fns.append(cst.EmptyLine())
append_block_code(block_code)
# --- runner ---------------------------------------------------------
run_fn = _build_run_fn(blocks, workflow_run_request)