script gen: extract action support (#3238)

This commit is contained in:
Shuchang Zheng
2025-08-19 21:21:05 -07:00
committed by GitHub
parent 7823ff9c46
commit 2392252c66
5 changed files with 161 additions and 43 deletions

View File

@@ -151,11 +151,15 @@ def _make_decorator(block_label: str, block: dict[str, Any]) -> cst.Decorator:
)
def _action_to_stmt(act: dict[str, Any]) -> cst.BaseStatement:
def _action_to_stmt(act: dict[str, Any], assign_to_output: bool = False) -> cst.BaseStatement:
"""
Turn one Action dict into:
await page.<method>(xpath=..., intention=..., data=context.parameters)
Or if assign_to_output is True for extract actions:
output = await page.extract(...)
"""
method = ACTION_MAP[act["action_type"]]
@@ -248,13 +252,23 @@ def _action_to_stmt(act: dict[str, Any]) -> cst.BaseStatement:
# await page.method(...)
await_expr = cst.Await(call)
# Wrap in a statement line: await ...
return cst.SimpleStatementLine([cst.Expr(await_expr)])
# If this is an extract action and we want to assign to output
if assign_to_output and method == "extract":
# output = await page.extract(...)
assign = cst.Assign(
targets=[cst.AssignTarget(cst.Name("output"))],
value=await_expr,
)
return cst.SimpleStatementLine([assign])
else:
# Wrap in a statement line: await ...
return cst.SimpleStatementLine([cst.Expr(await_expr)])
def _build_block_fn(block: dict[str, Any], actions: list[dict[str, Any]]) -> FunctionDef:
name = block.get("label") or _safe_name(block.get("title") or f"block_{block.get('workflow_run_block_id')}")
body_stmts: list[cst.BaseStatement] = []
is_extraction_block = block.get("block_type") == "extraction"
if block.get("url"):
body_stmts.append(cst.parse_statement(f"await page.goto({repr(block['url'])})"))
@@ -262,9 +276,19 @@ def _build_block_fn(block: dict[str, Any], actions: list[dict[str, Any]]) -> Fun
for act in actions:
if act["action_type"] in [ActionType.COMPLETE, ActionType.TERMINATE, ActionType.NULL_ACTION]:
continue
body_stmts.append(_action_to_stmt(act))
if not body_stmts:
# For extraction blocks, assign extract action results to output variable
assign_to_output = is_extraction_block and act["action_type"] == "extract"
body_stmts.append(_action_to_stmt(act, assign_to_output=assign_to_output))
# For extraction blocks, add return output statement if we have actions
if is_extraction_block and any(
act["action_type"] == "extract"
for act in actions
if act["action_type"] not in [ActionType.COMPLETE, ActionType.TERMINATE, ActionType.NULL_ACTION]
):
body_stmts.append(cst.parse_statement("return output"))
elif not body_stmts:
body_stmts.append(cst.parse_statement("return None"))
return FunctionDef(

View File

@@ -3,7 +3,7 @@ from __future__ import annotations
import asyncio
import json
from dataclasses import dataclass
from datetime import datetime
from datetime import datetime, timezone
from enum import StrEnum
from typing import Any, Callable, Literal
@@ -16,6 +16,7 @@ from skyvern.forge.prompts import prompt_engine
from skyvern.forge.sdk.api.files import download_file
from skyvern.forge.sdk.artifact.models import ArtifactType
from skyvern.forge.sdk.core import skyvern_context
from skyvern.utils.prompt_engine import load_prompt_with_elements
from skyvern.webeye.actions import handler_utils
from skyvern.webeye.actions.action_types import ActionType
from skyvern.webeye.actions.actions import Action, ActionStatus
@@ -156,7 +157,7 @@ class SkyvernPage:
raise
finally:
skyvern_page._record(call)
# Auto-create action before execution
# Auto-create action after execution
await skyvern_page._create_action_before_execution(
action_type=action,
intention=intention,
@@ -418,10 +419,43 @@ class SkyvernPage:
@action_wrap(ActionType.EXTRACT)
async def extract(
self, data_extraction_goal: str, intention: str | None = None, data: str | dict[str, Any] | None = None
) -> None:
# TODO: extract the data
return
self,
data_extraction_goal: str,
data_schema: dict[str, Any] | list | str | None = None,
error_code_mapping: dict[str, str] | None = None,
intention: str | None = None,
data: str | dict[str, Any] | None = None,
) -> dict[str, Any] | list | str | None:
scraped_page_refreshed = await self.scraped_page.refresh()
context = skyvern_context.current()
tz_info = datetime.now(tz=timezone.utc).tzinfo
if context and context.tz_info:
tz_info = context.tz_info
extract_information_prompt = load_prompt_with_elements(
element_tree_builder=scraped_page_refreshed,
prompt_engine=prompt_engine,
template_name="extract-information",
html_need_skyvern_attrs=False,
data_extraction_goal=data_extraction_goal,
extracted_information_schema=data_schema,
current_url=scraped_page_refreshed.url,
extracted_text=scraped_page_refreshed.extracted_text,
error_code_mapping_str=(json.dumps(error_code_mapping) if error_code_mapping else None),
local_datetime=datetime.now(tz_info).isoformat(),
)
step = None
if context and context.organization_id and context.task_id and context.step_id:
step = await app.DATABASE.get_step(
task_id=context.task_id, step_id=context.step_id, organization_id=context.organization_id
)
result = await app.EXTRACTION_LLM_API_HANDLER(
prompt=extract_information_prompt,
step=step,
screenshots=scraped_page_refreshed.screenshots,
prompt_name="extract-information",
)
return result
@action_wrap(ActionType.VERIFICATION_CODE)
async def verification_code(