Refactor script gen with block level code cache (#3910)
This commit is contained in:
@@ -10,6 +10,7 @@ import asyncio
|
||||
import hashlib
|
||||
import keyword
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
|
||||
import libcst as cst
|
||||
@@ -31,6 +32,15 @@ GENERATE_CODE_AI_MODE_PROACTIVE = "proactive"
|
||||
GENERATE_CODE_AI_MODE_FALLBACK = "fallback"
|
||||
|
||||
|
||||
@dataclass
|
||||
class ScriptBlockSource:
|
||||
label: str
|
||||
code: str
|
||||
run_signature: str | None
|
||||
workflow_run_id: str | None
|
||||
workflow_run_block_id: str | None
|
||||
|
||||
|
||||
# --------------------------------------------------------------------- #
|
||||
# 1. helpers #
|
||||
# --------------------------------------------------------------------- #
|
||||
@@ -104,6 +114,7 @@ ACTIONS_WITH_XPATH = [
|
||||
"upload_file",
|
||||
"select_option",
|
||||
]
|
||||
ACTIONS_OPT_OUT_INTENTION_FOR_PROMPT = ["extract"]
|
||||
|
||||
INDENT = " " * 4
|
||||
DOUBLE_INDENT = " " * 8
|
||||
@@ -421,7 +432,7 @@ def _action_to_stmt(act: dict[str, Any], task: dict[str, Any], assign_to_output:
|
||||
)
|
||||
)
|
||||
intention = act.get("intention") or act.get("reasoning") or ""
|
||||
if intention:
|
||||
if intention and method not in ACTIONS_OPT_OUT_INTENTION_FOR_PROMPT:
|
||||
args.extend(
|
||||
[
|
||||
cst.Arg(
|
||||
@@ -432,6 +443,7 @@ def _action_to_stmt(act: dict[str, Any], task: dict[str, Any], assign_to_output:
|
||||
),
|
||||
]
|
||||
)
|
||||
_mark_last_arg_as_comma(args)
|
||||
|
||||
# Only use indented parentheses if we have arguments
|
||||
if args:
|
||||
@@ -1694,10 +1706,25 @@ async def generate_workflow_script_python_code(
|
||||
script_id: str | None = None,
|
||||
script_revision_id: str | None = None,
|
||||
pending: bool = False,
|
||||
cached_blocks: dict[str, ScriptBlockSource] | None = None,
|
||||
updated_block_labels: set[str] | None = None,
|
||||
) -> str:
|
||||
"""
|
||||
Build a LibCST Module and emit .code (PEP-8-formatted source).
|
||||
|
||||
Cached script blocks can be reused by providing them via `cached_blocks`. Any labels present in
|
||||
`updated_block_labels` will be regenerated from the latest workflow run execution data.
|
||||
"""
|
||||
cached_blocks = cached_blocks or {}
|
||||
updated_block_labels = set(updated_block_labels or [])
|
||||
|
||||
# Drop cached entries that do not have usable source
|
||||
cached_blocks = {label: source for label, source in cached_blocks.items() if source.code}
|
||||
# Always regenerate the orchestrator block so it stays aligned with the workflow definition
|
||||
cached_blocks.pop(settings.WORKFLOW_START_BLOCK_LABEL, None)
|
||||
|
||||
if task_v2_child_blocks is None:
|
||||
task_v2_child_blocks = {}
|
||||
# --- imports --------------------------------------------------------
|
||||
imports: list[cst.BaseStatement] = [
|
||||
cst.SimpleStatementLine([cst.Import(names=[cst.ImportAlias(cst.Name("asyncio"))])]),
|
||||
@@ -1746,33 +1773,47 @@ async def generate_workflow_script_python_code(
|
||||
generated_model_cls = _build_generated_model_from_schema(generated_schema)
|
||||
|
||||
# --- blocks ---------------------------------------------------------
|
||||
block_fns = []
|
||||
block_fns: list[cst.CSTNode] = []
|
||||
task_v1_blocks = [block for block in blocks if block["block_type"] in SCRIPT_TASK_BLOCKS]
|
||||
task_v2_blocks = [block for block in blocks if block["block_type"] == "task_v2"]
|
||||
|
||||
if task_v2_child_blocks is None:
|
||||
task_v2_child_blocks = {}
|
||||
def append_block_code(block_code: str) -> None:
|
||||
nonlocal block_fns
|
||||
parsed = cst.parse_module(block_code)
|
||||
if block_fns:
|
||||
block_fns.append(cst.EmptyLine())
|
||||
block_fns.append(cst.EmptyLine())
|
||||
block_fns.extend(parsed.body)
|
||||
|
||||
# Handle task v1 blocks (excluding child blocks of task_v2)
|
||||
for idx, task in enumerate(task_v1_blocks):
|
||||
# Skip if this is a child block of a task_v2 block
|
||||
if task.get("parent_task_v2_label"):
|
||||
continue
|
||||
|
||||
block_fn_def = _build_block_fn(task, actions_by_task.get(task.get("task_id", ""), []))
|
||||
block_name = task.get("label") or task.get("title") or task.get("task_id") or f"task_{idx}"
|
||||
cached_source = cached_blocks.get(block_name)
|
||||
use_cached = cached_source is not None and block_name not in updated_block_labels
|
||||
|
||||
if use_cached:
|
||||
assert cached_source is not None
|
||||
block_code = cached_source.code
|
||||
run_signature = cached_source.run_signature
|
||||
block_workflow_run_id = cached_source.workflow_run_id
|
||||
block_workflow_run_block_id = cached_source.workflow_run_block_id
|
||||
else:
|
||||
block_fn_def = _build_block_fn(task, actions_by_task.get(task.get("task_id", ""), []))
|
||||
temp_module = cst.Module(body=[block_fn_def])
|
||||
block_code = temp_module.code
|
||||
|
||||
block_stmt = _build_block_statement(task)
|
||||
run_signature_module = cst.Module(body=[block_stmt])
|
||||
run_signature = run_signature_module.code.strip()
|
||||
|
||||
block_workflow_run_id = task.get("workflow_run_id") or run_id
|
||||
block_workflow_run_block_id = task.get("workflow_run_block_id")
|
||||
|
||||
# Create script block if we have script context
|
||||
if script_id and script_revision_id and organization_id:
|
||||
try:
|
||||
block_name = task.get("label") or task.get("title") or task.get("task_id") or f"task_{idx}"
|
||||
temp_module = cst.Module(body=[block_fn_def])
|
||||
block_code = temp_module.code
|
||||
|
||||
# Extract the run signature (the statement that calls skyvern.action/extract/etc)
|
||||
block_stmt = _build_block_statement(task)
|
||||
run_signature_module = cst.Module(body=[block_stmt])
|
||||
run_signature = run_signature_module.code.strip()
|
||||
|
||||
await create_or_update_script_block(
|
||||
block_code=block_code,
|
||||
script_revision_id=script_revision_id,
|
||||
@@ -1781,84 +1822,67 @@ async def generate_workflow_script_python_code(
|
||||
block_label=block_name,
|
||||
update=pending,
|
||||
run_signature=run_signature,
|
||||
workflow_run_id=task.get("workflow_run_id"),
|
||||
workflow_run_block_id=task.get("workflow_run_block_id"),
|
||||
workflow_run_id=block_workflow_run_id,
|
||||
workflow_run_block_id=block_workflow_run_block_id,
|
||||
)
|
||||
except Exception as e:
|
||||
LOG.error("Failed to create script block", error=str(e), exc_info=True)
|
||||
# Continue without script block creation if it fails
|
||||
|
||||
block_fns.append(block_fn_def)
|
||||
if idx < len(task_v1_blocks) - 1:
|
||||
block_fns.append(cst.EmptyLine())
|
||||
block_fns.append(cst.EmptyLine())
|
||||
append_block_code(block_code)
|
||||
|
||||
# Handle task_v2 blocks
|
||||
for idx, task_v2 in enumerate(task_v2_blocks):
|
||||
for task_v2 in task_v2_blocks:
|
||||
task_v2_label = task_v2.get("label") or f"task_v2_{task_v2.get('workflow_run_block_id')}"
|
||||
child_blocks = task_v2_child_blocks.get(task_v2_label, [])
|
||||
|
||||
# Create the task_v2 function
|
||||
task_v2_fn_def = _build_task_v2_block_fn(task_v2, child_blocks)
|
||||
cached_source = cached_blocks.get(task_v2_label)
|
||||
use_cached = cached_source is not None and task_v2_label not in updated_block_labels
|
||||
|
||||
block_code = ""
|
||||
run_signature = None
|
||||
block_workflow_run_id = task_v2.get("workflow_run_id") or run_id
|
||||
block_workflow_run_block_id = task_v2.get("workflow_run_block_id")
|
||||
|
||||
if use_cached:
|
||||
assert cached_source is not None
|
||||
block_code = cached_source.code
|
||||
run_signature = cached_source.run_signature
|
||||
block_workflow_run_id = cached_source.workflow_run_id
|
||||
block_workflow_run_block_id = cached_source.workflow_run_block_id
|
||||
else:
|
||||
task_v2_fn_def = _build_task_v2_block_fn(task_v2, child_blocks)
|
||||
task_v2_block_body: list[cst.CSTNode] = [task_v2_fn_def]
|
||||
|
||||
for child_block in child_blocks:
|
||||
if child_block.get("block_type") in SCRIPT_TASK_BLOCKS and child_block.get("block_type") != "task_v2":
|
||||
child_fn_def = _build_block_fn(child_block, actions_by_task.get(child_block.get("task_id", ""), []))
|
||||
task_v2_block_body.append(cst.EmptyLine())
|
||||
task_v2_block_body.append(cst.EmptyLine())
|
||||
task_v2_block_body.append(child_fn_def)
|
||||
|
||||
temp_module = cst.Module(body=task_v2_block_body)
|
||||
block_code = temp_module.code
|
||||
|
||||
task_v2_stmt = _build_block_statement(task_v2)
|
||||
run_signature = cst.Module(body=[task_v2_stmt]).code.strip()
|
||||
|
||||
# Create script block for task_v2 that includes both the main function and child functions
|
||||
if script_id and script_revision_id and organization_id:
|
||||
try:
|
||||
# Build the complete module for this task_v2 block
|
||||
task_v2_block_body = [task_v2_fn_def]
|
||||
|
||||
# Add child block functions
|
||||
for child_block in child_blocks:
|
||||
if (
|
||||
child_block.get("block_type") in SCRIPT_TASK_BLOCKS
|
||||
and child_block.get("block_type") != "task_v2"
|
||||
):
|
||||
child_fn_def = _build_block_fn(
|
||||
child_block, actions_by_task.get(child_block.get("task_id", ""), [])
|
||||
)
|
||||
task_v2_block_body.append(cst.EmptyLine())
|
||||
task_v2_block_body.append(cst.EmptyLine())
|
||||
task_v2_block_body.append(child_fn_def)
|
||||
|
||||
# Create the complete module for this task_v2 block
|
||||
temp_module = cst.Module(body=task_v2_block_body)
|
||||
task_v2_block_code = temp_module.code
|
||||
|
||||
block_name = task_v2.get("label") or task_v2.get("title") or f"task_v2_{idx}"
|
||||
|
||||
# Extract the run signature for task_v2 block
|
||||
task_v2_stmt = _build_block_statement(task_v2)
|
||||
run_signature_module = cst.Module(body=[task_v2_stmt])
|
||||
run_signature = run_signature_module.code.strip()
|
||||
|
||||
await create_or_update_script_block(
|
||||
block_code=task_v2_block_code,
|
||||
block_code=block_code,
|
||||
script_revision_id=script_revision_id,
|
||||
script_id=script_id,
|
||||
organization_id=organization_id,
|
||||
block_label=block_name,
|
||||
block_label=task_v2_label,
|
||||
update=pending,
|
||||
run_signature=run_signature,
|
||||
workflow_run_id=task_v2.get("workflow_run_id"),
|
||||
workflow_run_block_id=task_v2.get("workflow_run_block_id"),
|
||||
workflow_run_id=block_workflow_run_id,
|
||||
workflow_run_block_id=block_workflow_run_block_id,
|
||||
)
|
||||
except Exception as e:
|
||||
LOG.error("Failed to create task_v2 script block", error=str(e), exc_info=True)
|
||||
# Continue without script block creation if it fails
|
||||
|
||||
block_fns.append(task_v2_fn_def)
|
||||
|
||||
# Create individual functions for child blocks
|
||||
for child_block in child_blocks:
|
||||
if child_block.get("block_type") in SCRIPT_TASK_BLOCKS and child_block.get("block_type") != "task_v2":
|
||||
child_fn_def = _build_block_fn(child_block, actions_by_task.get(child_block.get("task_id", ""), []))
|
||||
block_fns.append(cst.EmptyLine())
|
||||
block_fns.append(cst.EmptyLine())
|
||||
block_fns.append(child_fn_def)
|
||||
|
||||
if idx < len(task_v2_blocks) - 1:
|
||||
block_fns.append(cst.EmptyLine())
|
||||
block_fns.append(cst.EmptyLine())
|
||||
append_block_code(block_code)
|
||||
|
||||
# --- runner ---------------------------------------------------------
|
||||
run_fn = _build_run_fn(blocks, workflow_run_request)
|
||||
|
||||
Reference in New Issue
Block a user