support pdf parser block to fix script gen error (#3646)
This commit is contained in:
@@ -38,6 +38,7 @@ from skyvern.services.script_service import ( # noqa: E402
|
|||||||
login, # noqa: E402
|
login, # noqa: E402
|
||||||
loop, # noqa: E402
|
loop, # noqa: E402
|
||||||
parse_file, # noqa: E402
|
parse_file, # noqa: E402
|
||||||
|
parse_pdf, # noqa: E402
|
||||||
prompt, # noqa: E402
|
prompt, # noqa: E402
|
||||||
render_list, # noqa: E402
|
render_list, # noqa: E402
|
||||||
render_template, # noqa: E402
|
render_template, # noqa: E402
|
||||||
@@ -64,6 +65,7 @@ __all__ = [
|
|||||||
"login",
|
"login",
|
||||||
"loop",
|
"loop",
|
||||||
"parse_file",
|
"parse_file",
|
||||||
|
"parse_pdf",
|
||||||
"prompt",
|
"prompt",
|
||||||
"render_list",
|
"render_list",
|
||||||
"render_template",
|
"render_template",
|
||||||
|
|||||||
@@ -1044,6 +1044,55 @@ def _build_file_upload_statement(block: dict[str, Any]) -> cst.SimpleStatementLi
|
|||||||
return cst.SimpleStatementLine([cst.Expr(cst.Await(call))])
|
return cst.SimpleStatementLine([cst.Expr(cst.Await(call))])
|
||||||
|
|
||||||
|
|
||||||
|
def _build_pdf_parser_statement(block: dict[str, Any]) -> cst.SimpleStatementLine:
|
||||||
|
"""Build a skyvern.parse_pdf statement."""
|
||||||
|
args = [
|
||||||
|
cst.Arg(
|
||||||
|
keyword=cst.Name("file_url"),
|
||||||
|
value=_value(block.get("file_url", "")),
|
||||||
|
whitespace_after_arg=cst.ParenthesizedWhitespace(
|
||||||
|
indent=True,
|
||||||
|
last_line=cst.SimpleWhitespace(INDENT),
|
||||||
|
),
|
||||||
|
),
|
||||||
|
]
|
||||||
|
|
||||||
|
if block.get("json_schema") is not None:
|
||||||
|
args.append(
|
||||||
|
cst.Arg(
|
||||||
|
keyword=cst.Name("schema"),
|
||||||
|
value=_value(block.get("json_schema")),
|
||||||
|
whitespace_after_arg=cst.ParenthesizedWhitespace(
|
||||||
|
indent=True,
|
||||||
|
last_line=cst.SimpleWhitespace(INDENT),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if block.get("label") is not None:
|
||||||
|
args.append(
|
||||||
|
cst.Arg(
|
||||||
|
keyword=cst.Name("label"),
|
||||||
|
value=_value(block.get("label")),
|
||||||
|
whitespace_after_arg=cst.ParenthesizedWhitespace(
|
||||||
|
indent=True,
|
||||||
|
last_line=cst.SimpleWhitespace(INDENT),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
)
|
||||||
|
_mark_last_arg_as_comma(args)
|
||||||
|
|
||||||
|
call = cst.Call(
|
||||||
|
func=cst.Attribute(value=cst.Name("skyvern"), attr=cst.Name("parse_pdf")),
|
||||||
|
args=args,
|
||||||
|
whitespace_before_args=cst.ParenthesizedWhitespace(
|
||||||
|
indent=True,
|
||||||
|
last_line=cst.SimpleWhitespace(INDENT),
|
||||||
|
),
|
||||||
|
)
|
||||||
|
return cst.SimpleStatementLine([cst.Expr(cst.Await(call))])
|
||||||
|
|
||||||
|
|
||||||
def _build_file_url_parser_statement(block: dict[str, Any]) -> cst.SimpleStatementLine:
|
def _build_file_url_parser_statement(block: dict[str, Any]) -> cst.SimpleStatementLine:
|
||||||
"""Build a skyvern.parse_file statement."""
|
"""Build a skyvern.parse_file statement."""
|
||||||
args = [
|
args = [
|
||||||
@@ -1531,6 +1580,8 @@ def _build_block_statement(block: dict[str, Any], data_variable_name: str | None
|
|||||||
stmt = _build_file_url_parser_statement(block)
|
stmt = _build_file_url_parser_statement(block)
|
||||||
elif block_type == "http_request":
|
elif block_type == "http_request":
|
||||||
stmt = _build_http_request_statement(block)
|
stmt = _build_http_request_statement(block)
|
||||||
|
elif block_type == "pdf_parser":
|
||||||
|
stmt = _build_pdf_parser_statement(block)
|
||||||
else:
|
else:
|
||||||
# Default case for unknown block types
|
# Default case for unknown block types
|
||||||
stmt = cst.SimpleStatementLine([cst.Expr(cst.SimpleString(f"# Unknown block type: {block_type}"))])
|
stmt = cst.SimpleStatementLine([cst.Expr(cst.SimpleString(f"# Unknown block type: {block_type}"))])
|
||||||
|
|||||||
@@ -37,6 +37,7 @@ from skyvern.forge.sdk.workflow.models.block import (
|
|||||||
ForLoopBlock,
|
ForLoopBlock,
|
||||||
HttpRequestBlock,
|
HttpRequestBlock,
|
||||||
LoginBlock,
|
LoginBlock,
|
||||||
|
PDFParserBlock,
|
||||||
SendEmailBlock,
|
SendEmailBlock,
|
||||||
TaskBlock,
|
TaskBlock,
|
||||||
TextPromptBlock,
|
TextPromptBlock,
|
||||||
@@ -1822,6 +1823,29 @@ async def send_email(
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
async def parse_pdf(
|
||||||
|
file_url: str,
|
||||||
|
schema: dict[str, Any] | None = None,
|
||||||
|
label: str | None = None,
|
||||||
|
parameters: list[PARAMETER_TYPE] | None = None,
|
||||||
|
) -> None:
|
||||||
|
block_validation_output = await _validate_and_get_output_parameter(label)
|
||||||
|
file_url = _render_template_with_label(file_url, label)
|
||||||
|
pdf_parser_block = PDFParserBlock(
|
||||||
|
file_url=file_url,
|
||||||
|
json_schema=schema,
|
||||||
|
label=block_validation_output.label,
|
||||||
|
output_parameter=block_validation_output.output_parameter,
|
||||||
|
parameters=parameters or [],
|
||||||
|
)
|
||||||
|
await pdf_parser_block.execute_safe(
|
||||||
|
workflow_run_id=block_validation_output.workflow_run_id,
|
||||||
|
parent_workflow_run_block_id=block_validation_output.context.parent_workflow_run_block_id,
|
||||||
|
organization_id=block_validation_output.organization_id,
|
||||||
|
browser_session_id=block_validation_output.browser_session_id,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
async def parse_file(
|
async def parse_file(
|
||||||
file_url: str,
|
file_url: str,
|
||||||
file_type: FileType,
|
file_type: FileType,
|
||||||
|
|||||||
Reference in New Issue
Block a user