Fix extraction prompt templating (#3335)

This commit is contained in:
Shuchang Zheng
2025-09-01 15:41:40 +08:00
committed by GitHub
parent ae8fda4ed5
commit 9a699e70f8
5 changed files with 65 additions and 3 deletions

View File

@@ -294,13 +294,25 @@ def _action_to_stmt(act: dict[str, Any], assign_to_output: bool = False) -> cst.
args.append(
cst.Arg(
keyword=cst.Name("prompt"),
value=_value(act["data_extraction_goal"]),
value=_render_value(act["data_extraction_goal"]),
whitespace_after_arg=cst.ParenthesizedWhitespace(
indent=True,
last_line=cst.SimpleWhitespace(INDENT),
),
)
)
if act.get("data_extraction_schema"):
args.append(
cst.Arg(
keyword=cst.Name("schema"),
value=_value(act["data_extraction_schema"]),
whitespace_after_arg=cst.ParenthesizedWhitespace(
indent=True,
last_line=cst.SimpleWhitespace(INDENT),
),
comma=cst.Comma(),
)
)
args.extend(
[
@@ -565,6 +577,14 @@ def _build_extract_statement(block_title: str, block: dict[str, Any]) -> cst.Sim
last_line=cst.SimpleWhitespace(INDENT),
),
),
cst.Arg(
keyword=cst.Name("schema"),
value=_value(block.get("data_schema", "")),
whitespace_after_arg=cst.ParenthesizedWhitespace(
indent=True,
last_line=cst.SimpleWhitespace(INDENT),
),
),
cst.Arg(
keyword=cst.Name("cache_key"),
value=_value(block_title),

View File

@@ -20,7 +20,7 @@ from skyvern.forge.sdk.core import skyvern_context
from skyvern.utils.prompt_engine import load_prompt_with_elements
from skyvern.webeye.actions import handler_utils
from skyvern.webeye.actions.action_types import ActionType
from skyvern.webeye.actions.actions import Action, ActionStatus, SelectOption
from skyvern.webeye.actions.actions import Action, ActionStatus, ExtractAction, SelectOption
from skyvern.webeye.browser_factory import BrowserState
from skyvern.webeye.scraper.scraper import ScrapedPage, scrape_website
@@ -224,6 +224,25 @@ class SkyvernPage:
response=response,
created_by="script",
)
if action_type == ActionType.EXTRACT:
action = ExtractAction(
element_id="",
action_type=action_type,
status=status,
organization_id=context.organization_id,
workflow_run_id=context.workflow_run_id,
task_id=context.task_id,
step_id=context.step_id,
step_order=0,
action_order=0,
intention=intention,
reasoning=f"Auto-generated action for {action_type.value}",
data_extraction_goal=kwargs.get("prompt"),
data_extraction_schema=kwargs.get("schema"),
option=select_option,
response=response,
created_by="script",
)
created_action = await app.DATABASE.create_action(action)
return created_action

View File

@@ -7,6 +7,7 @@ from skyvern.core.script_generations.constants import SCRIPT_TASK_BLOCKS
from skyvern.forge import app
from skyvern.schemas.workflows import BlockType
from skyvern.services import workflow_service
from skyvern.webeye.actions.action_types import ActionType
LOG = structlog.get_logger(__name__)
@@ -100,6 +101,19 @@ async def transform_workflow_run_to_code_gen_input(workflow_run_id: str, organiz
for action in actions:
action_dump = action.model_dump()
action_dump["xpath"] = action.get_xpath()
if (
"data_extraction_goal" in final_dump
and final_dump["data_extraction_goal"]
and action.action_type == ActionType.EXTRACT
):
# use the right data extraction goal for the extract action
action_dump["data_extraction_goal"] = final_dump["data_extraction_goal"]
if (
"extracted_information_schema" in final_dump
and final_dump["extracted_information_schema"]
and action.action_type == ActionType.EXTRACT
):
action_dump["data_extraction_schema"] = final_dump["extracted_information_schema"]
action_dumps.append(action_dump)
actions_by_task[run_block.task_id] = action_dumps
else: