Files
Dorod-Sky/skyvern/services/workflow_script_service.py

351 lines
13 KiB
Python
Raw Normal View History

2025-09-19 08:50:21 -07:00
import base64
import structlog
from cachetools import TTLCache
2025-09-19 08:50:21 -07:00
from jinja2.sandbox import SandboxedEnvironment
from skyvern.config import settings
from skyvern.core.script_generations.generate_script import ScriptBlockSource, generate_workflow_script_python_code
2025-09-19 08:50:21 -07:00
from skyvern.core.script_generations.transform_workflow_run import transform_workflow_run_to_code_gen_input
from skyvern.forge import app
from skyvern.forge.sdk.core import skyvern_context
from skyvern.forge.sdk.workflow.models.block import get_all_blocks
2025-09-19 08:50:21 -07:00
from skyvern.forge.sdk.workflow.models.workflow import Workflow, WorkflowRun
from skyvern.schemas.scripts import FileEncoding, Script, ScriptFileCreate, ScriptStatus
from skyvern.schemas.workflows import BlockType
2025-09-19 08:50:21 -07:00
from skyvern.services import script_service
LOG = structlog.get_logger()
jinja_sandbox_env = SandboxedEnvironment()
def workflow_has_conditionals(workflow: Workflow) -> bool:
"""
Check if a workflow contains any conditional blocks.
This is used to determine whether "missing" blocks in the cache should trigger
regeneration. For workflows with conditionals, blocks in unexecuted branches
are legitimately missing and should NOT trigger regeneration.
"""
try:
all_blocks = get_all_blocks(workflow.workflow_definition.blocks)
return any(block.block_type == BlockType.CONDITIONAL for block in all_blocks)
except Exception:
LOG.warning(
"Failed to check workflow for conditional blocks",
workflow_id=workflow.workflow_id,
exc_info=True,
)
return False
# Cache for workflow scripts - only stores non-None results
_workflow_script_cache: TTLCache[tuple, "Script"] = TTLCache(maxsize=128, ttl=60 * 60)
def _make_workflow_script_cache_key(
organization_id: str,
workflow_permanent_id: str,
cache_key_value: str,
workflow_run_id: str | None = None,
cache_key: str | None = None,
statuses: list[ScriptStatus] | None = None,
) -> tuple:
"""Create a hashable cache key from the function arguments."""
# Convert list to tuple for hashability
statuses_key = tuple(statuses) if statuses else None
return (organization_id, workflow_permanent_id, cache_key_value, workflow_run_id, cache_key, statuses_key)
2025-09-19 08:50:21 -07:00
2025-09-19 09:00:48 -07:00
async def generate_or_update_pending_workflow_script(
2025-09-19 08:50:21 -07:00
workflow_run: WorkflowRun,
workflow: Workflow,
) -> None:
organization_id = workflow.organization_id
context = skyvern_context.current()
if not context:
return
script_id = context.script_id
script = None
if script_id:
script = await app.DATABASE.get_script(script_id=script_id, organization_id=organization_id)
if not script:
script = await app.DATABASE.create_script(organization_id=organization_id, run_id=workflow_run.workflow_run_id)
if context:
context.script_id = script.script_id
context.script_revision_id = script.script_revision_id
_, rendered_cache_key_value = await get_workflow_script(
workflow=workflow,
workflow_run=workflow_run,
status=ScriptStatus.pending,
)
await generate_workflow_script(
workflow_run=workflow_run,
workflow=workflow,
script=script,
rendered_cache_key_value=rendered_cache_key_value,
2025-09-19 09:00:48 -07:00
pending=True,
cached_script=script,
2025-09-19 08:50:21 -07:00
)
async def get_workflow_script(
workflow: Workflow,
workflow_run: WorkflowRun,
block_labels: list[str] | None = None,
status: ScriptStatus = ScriptStatus.published,
) -> tuple[Script | None, str]:
"""
Check if there's a related workflow script that should be used instead of running the workflow.
Returns the tuple of (script, rendered_cache_key_value).
"""
cache_key = workflow.cache_key or ""
rendered_cache_key_value = ""
try:
parameter_tuples = await app.DATABASE.get_workflow_run_parameters(
2025-09-19 08:50:21 -07:00
workflow_run_id=workflow_run.workflow_run_id,
)
parameters = {wf_param.key: run_param.value for wf_param, run_param in parameter_tuples}
rendered_cache_key_value = jinja_sandbox_env.from_string(cache_key).render(parameters)
if block_labels:
# Do not generate script or run script if block_labels is provided
return None, rendered_cache_key_value
2025-09-19 08:50:21 -07:00
# Check if there are existing cached scripts for this workflow + cache_key_value
existing_script = await get_workflow_script_by_cache_key_value(
2025-09-19 08:50:21 -07:00
organization_id=workflow.organization_id,
workflow_permanent_id=workflow.workflow_permanent_id,
cache_key_value=rendered_cache_key_value,
statuses=[status],
use_cache=True,
2025-09-19 08:50:21 -07:00
)
if existing_script:
2025-09-19 08:50:21 -07:00
LOG.info(
"Found cached script for workflow",
workflow_id=workflow.workflow_id,
cache_key_value=rendered_cache_key_value,
workflow_run_id=workflow_run.workflow_run_id,
)
return existing_script, rendered_cache_key_value
2025-09-19 08:50:21 -07:00
return None, rendered_cache_key_value
except Exception as e:
LOG.warning(
"Failed to check for workflow script, proceeding with normal workflow execution",
workflow_id=workflow.workflow_id,
workflow_run_id=workflow_run.workflow_run_id,
error=str(e),
exc_info=True,
)
return None, rendered_cache_key_value
async def get_workflow_script_by_cache_key_value(
organization_id: str,
workflow_permanent_id: str,
cache_key_value: str,
workflow_run_id: str | None = None,
cache_key: str | None = None,
statuses: list[ScriptStatus] | None = None,
use_cache: bool = False,
) -> Script | None:
if use_cache:
cache_key_tuple = _make_workflow_script_cache_key(
organization_id=organization_id,
workflow_permanent_id=workflow_permanent_id,
cache_key_value=cache_key_value,
workflow_run_id=workflow_run_id,
cache_key=cache_key,
statuses=statuses,
)
# Check cache first
if cache_key_tuple in _workflow_script_cache:
return _workflow_script_cache[cache_key_tuple]
# Cache miss - fetch from database
result = await app.DATABASE.get_workflow_script_by_cache_key_value(
organization_id=organization_id,
workflow_permanent_id=workflow_permanent_id,
cache_key_value=cache_key_value,
workflow_run_id=workflow_run_id,
cache_key=cache_key,
statuses=statuses,
)
# Only cache non-None results
if result is not None:
_workflow_script_cache[cache_key_tuple] = result
return result
return await app.DATABASE.get_workflow_script_by_cache_key_value(
organization_id=organization_id,
workflow_permanent_id=workflow_permanent_id,
cache_key_value=cache_key_value,
workflow_run_id=workflow_run_id,
cache_key=cache_key,
statuses=statuses,
)
async def _load_cached_script_block_sources(
script: Script,
organization_id: str,
) -> dict[str, ScriptBlockSource]:
"""
Load existing script block sources (code + metadata) for a script revision so they can be reused.
"""
cached_blocks: dict[str, ScriptBlockSource] = {}
script_blocks = await app.DATABASE.get_script_blocks_by_script_revision_id(
script_revision_id=script.script_revision_id,
organization_id=organization_id,
)
for script_block in script_blocks:
if not script_block.script_block_label:
continue
code_str: str | None = None
if script_block.script_file_id:
script_file = await app.DATABASE.get_script_file_by_id(
script_revision_id=script.script_revision_id,
file_id=script_block.script_file_id,
organization_id=organization_id,
)
if script_file and script_file.artifact_id:
artifact = await app.DATABASE.get_artifact_by_id(script_file.artifact_id, organization_id)
if artifact:
file_content = await app.ARTIFACT_MANAGER.retrieve_artifact(artifact)
if isinstance(file_content, bytes):
code_str = file_content.decode("utf-8")
elif isinstance(file_content, str):
code_str = file_content
if not code_str:
continue
cached_blocks[script_block.script_block_label] = ScriptBlockSource(
label=script_block.script_block_label,
code=code_str,
run_signature=script_block.run_signature,
workflow_run_id=script_block.workflow_run_id,
workflow_run_block_id=script_block.workflow_run_block_id,
input_fields=script_block.input_fields,
)
return cached_blocks
2025-09-19 08:50:21 -07:00
async def generate_workflow_script(
workflow_run: WorkflowRun,
workflow: Workflow,
script: Script,
rendered_cache_key_value: str,
2025-09-19 09:00:48 -07:00
pending: bool = False,
cached_script: Script | None = None,
updated_block_labels: set[str] | None = None,
2025-09-19 08:50:21 -07:00
) -> None:
# Note: Workflows with conditional blocks ARE supported. The conditional block itself
# is not cached (it's evaluated at runtime), but cacheable blocks in branches are
# cached progressively as they execute. See workflow_has_conditionals() for the
# regeneration logic that prevents unnecessary regeneration for unexecuted branches.
2025-09-19 08:50:21 -07:00
try:
LOG.info(
"Generating script for workflow",
workflow_run_id=workflow_run.workflow_run_id,
workflow_id=workflow.workflow_id,
workflow_name=workflow.title,
cache_key_value=rendered_cache_key_value,
)
cached_block_sources: dict[str, ScriptBlockSource] = {}
if cached_script:
cached_block_sources = await _load_cached_script_block_sources(cached_script, workflow.organization_id)
2025-09-19 08:50:21 -07:00
codegen_input = await transform_workflow_run_to_code_gen_input(
workflow_run_id=workflow_run.workflow_run_id,
organization_id=workflow.organization_id,
)
block_labels = [block.get("label") for block in codegen_input.workflow_blocks if block.get("label")]
if updated_block_labels is None:
updated_block_labels = {label for label in block_labels if label}
else:
updated_block_labels = set(updated_block_labels)
missing_labels = {label for label in block_labels if label and label not in cached_block_sources}
updated_block_labels.update(missing_labels)
updated_block_labels.add(settings.WORKFLOW_START_BLOCK_LABEL)
2025-09-19 08:50:21 -07:00
python_src = await generate_workflow_script_python_code(
file_name=codegen_input.file_name,
workflow_run_request=codegen_input.workflow_run,
workflow=codegen_input.workflow,
blocks=codegen_input.workflow_blocks,
actions_by_task=codegen_input.actions_by_task,
task_v2_child_blocks=codegen_input.task_v2_child_blocks,
organization_id=workflow.organization_id,
script_id=script.script_id,
script_revision_id=script.script_revision_id,
2025-09-19 09:00:48 -07:00
pending=pending,
cached_blocks=cached_block_sources,
updated_block_labels=updated_block_labels,
2025-09-19 08:50:21 -07:00
)
except Exception:
LOG.error("Failed to generate workflow script source", exc_info=True)
return
# 4) Persist script and files, then record mapping
content_bytes = python_src.encode("utf-8")
content_b64 = base64.b64encode(content_bytes).decode("utf-8")
files = [
ScriptFileCreate(
path="main.py",
content=content_b64,
encoding=FileEncoding.BASE64,
mime_type="text/x-python",
)
]
# Upload script file(s) as artifacts and create rows
await script_service.build_file_tree(
files=files,
organization_id=workflow.organization_id,
script_id=script.script_id,
script_version=script.version,
script_revision_id=script.script_revision_id,
2025-09-19 09:00:48 -07:00
pending=pending,
2025-09-19 08:50:21 -07:00
)
# check if an existing drfat workflow script exists for this workflow run
2025-09-19 09:00:48 -07:00
existing_pending_workflow_script = None
2025-09-19 08:50:21 -07:00
status = ScriptStatus.published
2025-09-19 09:00:48 -07:00
if pending:
2025-09-19 08:50:21 -07:00
status = ScriptStatus.pending
2025-09-19 09:00:48 -07:00
existing_pending_workflow_script = await app.DATABASE.get_workflow_script(
2025-09-19 08:50:21 -07:00
organization_id=workflow.organization_id,
workflow_permanent_id=workflow.workflow_permanent_id,
workflow_run_id=workflow_run.workflow_run_id,
statuses=[status],
)
2025-09-19 09:00:48 -07:00
if not existing_pending_workflow_script:
2025-09-19 08:50:21 -07:00
# Record the workflow->script mapping for cache lookup
await app.DATABASE.create_workflow_script(
organization_id=workflow.organization_id,
script_id=script.script_id,
workflow_permanent_id=workflow.workflow_permanent_id,
cache_key=workflow.cache_key or "",
cache_key_value=rendered_cache_key_value,
workflow_id=workflow.workflow_id,
workflow_run_id=workflow_run.workflow_run_id,
status=status,
)