Pedro/add sop workflow builder (#3685)
This commit is contained in:
@@ -67,16 +67,55 @@ function ImportWorkflowButton() {
|
||||
<input
|
||||
id={inputId}
|
||||
type="file"
|
||||
accept=".yaml,.yml,.json"
|
||||
accept=".yaml,.yml,.json,.pdf"
|
||||
className="hidden"
|
||||
onChange={async (event) => {
|
||||
if (event.target.files && event.target.files[0]) {
|
||||
const fileTextContent = await event.target.files[0].text();
|
||||
const isJson = isJsonString(fileTextContent);
|
||||
const content = isJson
|
||||
? convertToYAML(JSON.parse(fileTextContent))
|
||||
: fileTextContent;
|
||||
createWorkflowFromYamlMutation.mutate(content);
|
||||
const file = event.target.files[0];
|
||||
const fileName = file.name.toLowerCase();
|
||||
|
||||
if (fileName.endsWith(".pdf")) {
|
||||
// Handle PDF file - send as FormData to new endpoint
|
||||
const formData = new FormData();
|
||||
formData.append("file", file);
|
||||
|
||||
const client = await getClient(credentialGetter);
|
||||
try {
|
||||
const response = await client.post<WorkflowApiResponse>(
|
||||
"/workflows/import-pdf",
|
||||
formData,
|
||||
{
|
||||
headers: {
|
||||
"Content-Type": "multipart/form-data",
|
||||
},
|
||||
},
|
||||
);
|
||||
|
||||
queryClient.invalidateQueries({
|
||||
queryKey: ["workflows"],
|
||||
});
|
||||
navigate(
|
||||
`/workflows/${response.data.workflow_permanent_id}/debug`,
|
||||
);
|
||||
} catch (error) {
|
||||
toast({
|
||||
title: "Import Failed",
|
||||
description:
|
||||
error instanceof Error
|
||||
? error.message
|
||||
: "Failed to import PDF",
|
||||
variant: "destructive",
|
||||
});
|
||||
}
|
||||
} else {
|
||||
// Non-pdf files like yaml, json
|
||||
const fileTextContent = await file.text();
|
||||
const isJson = isJsonString(fileTextContent);
|
||||
const content = isJson
|
||||
? convertToYAML(JSON.parse(fileTextContent))
|
||||
: fileTextContent;
|
||||
createWorkflowFromYamlMutation.mutate(content);
|
||||
}
|
||||
}
|
||||
}}
|
||||
/>
|
||||
@@ -91,7 +130,7 @@ function ImportWorkflowButton() {
|
||||
</Label>
|
||||
</TooltipTrigger>
|
||||
<TooltipContent>
|
||||
Import a workflow from a YAML or JSON file
|
||||
Import a workflow from a YAML, JSON, or PDF file
|
||||
</TooltipContent>
|
||||
</Tooltip>
|
||||
</TooltipProvider>
|
||||
|
||||
84
skyvern/forge/prompts/skyvern/build-workflow-from-pdf.j2
Normal file
84
skyvern/forge/prompts/skyvern/build-workflow-from-pdf.j2
Normal file
@@ -0,0 +1,84 @@
|
||||
You are an AI assistant that converts Standard Operating Procedures (SOP) from text into a Skyvern workflow definition in JSON format.
|
||||
|
||||
REQUIRED OUTPUT FORMAT:
|
||||
Return a JSON object with this structure:
|
||||
{
|
||||
"title": "workflow_name",
|
||||
"workflow_definition": {
|
||||
"parameters": [
|
||||
{
|
||||
"key": "parameter_name",
|
||||
"parameter_type": "workflow",
|
||||
"workflow_parameter_type": "string",
|
||||
"description": "Parameter description",
|
||||
"default_value": null
|
||||
}
|
||||
],
|
||||
"blocks": [
|
||||
{
|
||||
"label": "block_name",
|
||||
"block_type": "block_type_name",
|
||||
"continue_on_failure": false,
|
||||
// ... other required fields for each block type
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
PARAMETER STRUCTURE:
|
||||
- Each parameter MUST have: "key", "parameter_type", "workflow_parameter_type"
|
||||
- "parameter_type" should always be "workflow"
|
||||
- "workflow_parameter_type" can be: "string", "json", "credential_id", "file_url"
|
||||
- Use "credential_id" for passwords/credentials
|
||||
- "description" and "default_value" are optional
|
||||
|
||||
AVAILABLE BLOCK TYPES (use these exact names):
|
||||
- "login": For user authentication with credentials
|
||||
- "navigation": For navigating to pages and filling forms
|
||||
- "action": For clicking buttons or simple actions
|
||||
- "extraction": For extracting data from pages
|
||||
- "task": For complex tasks with both navigation and extraction
|
||||
- "file_download": For downloading files
|
||||
- "for_loop": For repeating actions over a list
|
||||
- "validation": For validating extracted data
|
||||
- "wait": For waiting/pausing
|
||||
- "code": For custom code execution
|
||||
- "text_prompt": For LLM text generation
|
||||
- "http_request": For API calls
|
||||
|
||||
BLOCK STRUCTURE REQUIREMENTS:
|
||||
1. Each block MUST have: label, block_type, continue_on_failure
|
||||
2. Navigation blocks need: url (can be empty string ""), navigation_goal, engine (set to "skyvern-1.0")
|
||||
3. Login blocks need: url, navigation_goal, parameter_keys (empty array if no credentials), engine (set to "skyvern-1.0")
|
||||
4. Extraction blocks need: url (can be empty string ""), data_extraction_goal, data_schema, engine (set to "skyvern-1.0")
|
||||
5. Action blocks need: url (can be empty string ""), navigation_goal, engine (set to "skyvern-1.0")
|
||||
6. Validation blocks need: complete_criterion OR terminate_criterion (at least one must be set), parameter_keys (empty array if none)
|
||||
7. For_loop blocks need: loop_blocks, loop_variable_reference
|
||||
8. File_download blocks need: url (can be empty string ""), navigation_goal, engine (set to "skyvern-1.0")
|
||||
|
||||
CRITICAL INSTRUCTIONS - READ CAREFULLY:
|
||||
1. **BE THOROUGH**: Convert EVERY automatable step from the SOP into a block. Do not skip or combine steps.
|
||||
2. **PRESERVE SPECIFICITY**: If the SOP gives specific instructions (e.g., "click X then Y", "fill field A with value B"), create separate blocks for each action. DO NOT generalize or simplify.
|
||||
3. **ONE ACTION PER BLOCK**: For "action" blocks, each should do ONE specific thing (one click, one navigation). Create multiple blocks if needed.
|
||||
4. **DETAILED navigation_goal**: Copy the exact instructions from the SOP into the navigation_goal field. Be as specific as the original SOP.
|
||||
5. **MAINTAIN ORDER**: Keep the exact order of steps from the SOP. Do not reorganize or optimize.
|
||||
6. **INCLUDE ALL CONDITIONS**: If the SOP says "if X then Y", create conditional blocks or separate blocks for each scenario.
|
||||
7. **IGNORE ONLY**: Skip only steps requiring human judgment, creativity, or physical actions. Include everything else.
|
||||
8. **URL FIELD**: Most blocks need a "url" field. Use empty string "" if no specific URL is needed (browser stays on current page).
|
||||
9. **AVOID VALIDATION BLOCKS**: Use "extraction" blocks for data extraction. Only use "validation" if explicitly validating previous extracted data, and always include complete_criterion.
|
||||
10. Set continue_on_failure to false for critical steps, true for optional ones
|
||||
11. Set engine to "skyvern-1.0" for all blocks that need it
|
||||
12. Use clear, descriptive labels that match the SOP terminology
|
||||
|
||||
EXAMPLES OF THOROUGHNESS:
|
||||
- If SOP says "Navigate to page X, then click button Y, then fill form Z" → Create 3 separate blocks
|
||||
- If SOP says "Click the 'Products' link in the top left" → Use that EXACT wording in navigation_goal
|
||||
- If SOP has 20 steps → Your workflow should have ~20 blocks (one per step)
|
||||
|
||||
Standard Operating Procedure:
|
||||
```
|
||||
{{ sop_text }}
|
||||
```
|
||||
|
||||
Return ONLY a valid JSON object following the structure above. Create a comprehensive workflow that captures EVERY automatable step from the SOP with full specificity.
|
||||
|
||||
@@ -98,6 +98,7 @@ from skyvern.schemas.runs import (
|
||||
)
|
||||
from skyvern.schemas.workflows import BlockType, WorkflowCreateYAMLRequest, WorkflowRequest, WorkflowStatus
|
||||
from skyvern.services import block_service, run_service, task_v1_service, task_v2_service, workflow_service
|
||||
from skyvern.services.pdf_import_service import pdf_import_service
|
||||
from skyvern.webeye.actions.actions import Action
|
||||
|
||||
LOG = structlog.get_logger()
|
||||
@@ -588,6 +589,47 @@ async def create_workflow_from_prompt(
|
||||
return workflow.model_dump(by_alias=True)
|
||||
|
||||
|
||||
@legacy_base_router.post(
|
||||
"/workflows/import-pdf",
|
||||
response_model=dict[str, Any],
|
||||
tags=["agent"],
|
||||
openapi_extra={
|
||||
"x-fern-sdk-method-name": "import_workflow_from_pdf",
|
||||
"x-fern-examples": [
|
||||
{
|
||||
"code-samples": [
|
||||
{
|
||||
"sdk": "curl",
|
||||
"code": 'curl -X POST "https://api.skyvern.com/workflows/import-pdf" \\\n -H "Authorization: Bearer YOUR_API_KEY" \\\n -F "file=@sop_document.pdf"',
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
},
|
||||
description="Import a workflow from a PDF containing Standard Operating Procedures",
|
||||
summary="Import workflow from PDF",
|
||||
responses={
|
||||
200: {"description": "Successfully imported workflow from PDF"},
|
||||
400: {"description": "Invalid PDF file or no content found"},
|
||||
422: {"description": "Failed to convert SOP to workflow"},
|
||||
500: {"description": "Internal server error during processing"},
|
||||
},
|
||||
)
|
||||
@legacy_base_router.post(
|
||||
"/workflows/import-pdf/",
|
||||
response_model=dict[str, Any],
|
||||
include_in_schema=False,
|
||||
)
|
||||
async def import_workflow_from_pdf(
|
||||
file: UploadFile,
|
||||
current_org: Organization = Depends(org_auth_service.get_current_org),
|
||||
) -> dict[str, Any]:
|
||||
"""Import a workflow from a PDF file containing Standard Operating Procedures."""
|
||||
analytics.capture("skyvern-oss-workflow-import-pdf")
|
||||
|
||||
return await pdf_import_service.import_workflow_from_pdf(file, current_org)
|
||||
|
||||
|
||||
@legacy_base_router.put(
|
||||
"/workflows/{workflow_id}",
|
||||
openapi_extra={
|
||||
|
||||
288
skyvern/services/pdf_import_service.py
Normal file
288
skyvern/services/pdf_import_service.py
Normal file
@@ -0,0 +1,288 @@
|
||||
import os
|
||||
import re
|
||||
import tempfile
|
||||
from typing import Any
|
||||
|
||||
import structlog
|
||||
from fastapi import HTTPException, UploadFile
|
||||
from pypdf import PdfReader
|
||||
|
||||
from skyvern.config import settings
|
||||
from skyvern.forge import app
|
||||
from skyvern.forge.prompts import prompt_engine
|
||||
from skyvern.forge.sdk.api.llm.api_handler_factory import LLMAPIHandlerFactory
|
||||
from skyvern.forge.sdk.schemas.organizations import Organization
|
||||
from skyvern.schemas.workflows import WorkflowCreateYAMLRequest
|
||||
|
||||
LOG = structlog.get_logger(__name__)
|
||||
|
||||
|
||||
class PDFImportService:
|
||||
@staticmethod
|
||||
def _sanitize_workflow_json(raw: dict[str, Any]) -> dict[str, Any]:
|
||||
"""Clean LLM JSON to match Skyvern schema conventions and avoid Jinja errors.
|
||||
|
||||
- Replace Jinja refs like {{workflow.foo}} or {{parameters.foo}} with {{foo}}
|
||||
- Auto-populate block.parameter_keys with any referenced parameter keys
|
||||
- Ensure all block labels are unique by appending indices to duplicates
|
||||
"""
|
||||
|
||||
def strip_prefixes(text: str) -> tuple[str, set[str]]:
|
||||
# Replace {{ workflow.xxx }} and {{ parameters.xxx }} with {{ xxx }}
|
||||
cleaned = text
|
||||
cleaned = re.sub(r"\{\{\s*workflow\.([a-zA-Z0-9_\.]+)\s*\}\}", r"{{ \1 }}", cleaned)
|
||||
cleaned = re.sub(r"\{\{\s*parameters\.([a-zA-Z0-9_\.]+)\s*\}\}", r"{{ \1 }}", cleaned)
|
||||
|
||||
# Collect jinja variable names (take first segment before any dot)
|
||||
used: set[str] = set()
|
||||
for match in re.finditer(r"\{\{\s*([^\}\s\|]+)\s*[^}]*\}\}", cleaned):
|
||||
var = match.group(1)
|
||||
# Use base segment before dot to match parameter keys
|
||||
base = var.split(".")[0]
|
||||
used.add(base)
|
||||
return cleaned, used
|
||||
|
||||
workflow_def = raw.get("workflow_definition", {})
|
||||
param_defs = workflow_def.get("parameters", []) or []
|
||||
param_keys = {p.get("key") for p in param_defs if isinstance(p, dict) and p.get("key")}
|
||||
|
||||
blocks = workflow_def.get("blocks", []) or []
|
||||
|
||||
# First pass: deduplicate block labels
|
||||
seen_labels: dict[str, int] = {}
|
||||
deduplicated_count = 0
|
||||
for blk in blocks:
|
||||
if not isinstance(blk, dict):
|
||||
continue
|
||||
label = blk.get("label", "")
|
||||
if not label:
|
||||
continue
|
||||
|
||||
if label in seen_labels:
|
||||
# This label has been seen before, append index
|
||||
seen_labels[label] += 1
|
||||
new_label = f"{label}_{seen_labels[label]}"
|
||||
LOG.info(
|
||||
"Deduplicating block label",
|
||||
original_label=label,
|
||||
new_label=new_label,
|
||||
occurrence=seen_labels[label],
|
||||
)
|
||||
blk["label"] = new_label
|
||||
deduplicated_count += 1
|
||||
else:
|
||||
# First time seeing this label
|
||||
seen_labels[label] = 1
|
||||
|
||||
if deduplicated_count > 0:
|
||||
LOG.info(
|
||||
"Deduplicated block labels",
|
||||
total_deduplicated=deduplicated_count,
|
||||
duplicate_labels=sorted([label for label, count in seen_labels.items() if count > 1]),
|
||||
)
|
||||
for blk in blocks:
|
||||
if not isinstance(blk, dict):
|
||||
continue
|
||||
referenced: set[str] = set()
|
||||
# Fields that commonly contain Jinja
|
||||
for field in [
|
||||
"url",
|
||||
"navigation_goal",
|
||||
"data_extraction_goal",
|
||||
"complete_criterion",
|
||||
"terminate_criterion",
|
||||
"title",
|
||||
]:
|
||||
val = blk.get(field)
|
||||
if isinstance(val, str):
|
||||
cleaned, used = strip_prefixes(val)
|
||||
blk[field] = cleaned
|
||||
referenced.update(used)
|
||||
|
||||
# Ensure required fields for text_prompt blocks
|
||||
if blk.get("block_type") == "text_prompt":
|
||||
if not blk.get("prompt"):
|
||||
# Prefer an instruction-bearing field if present
|
||||
blk["prompt"] = (
|
||||
blk.get("navigation_goal")
|
||||
or blk.get("title")
|
||||
or blk.get("label")
|
||||
or "Provide the requested text response."
|
||||
)
|
||||
# Track jinja usage within the prompt
|
||||
prompt_val = blk.get("prompt")
|
||||
if isinstance(prompt_val, str):
|
||||
cleaned, used = strip_prefixes(prompt_val)
|
||||
blk["prompt"] = cleaned
|
||||
referenced.update(used)
|
||||
|
||||
# parameter_keys should include only known parameter keys
|
||||
if param_keys:
|
||||
keys_to_include = sorted(k for k in referenced if k in param_keys)
|
||||
if keys_to_include:
|
||||
blk["parameter_keys"] = keys_to_include
|
||||
|
||||
# Ensure engine where needed
|
||||
if blk.get("block_type") in {"navigation", "action", "extraction", "login", "file_download"}:
|
||||
blk.setdefault("engine", "skyvern-1.0")
|
||||
|
||||
# Ensure url exists (can be empty string)
|
||||
if blk.get("block_type") in {"navigation", "action", "extraction", "file_download"}:
|
||||
if blk.get("url") is None:
|
||||
blk["url"] = ""
|
||||
|
||||
return raw
|
||||
|
||||
async def import_workflow_from_pdf(self, file: UploadFile, organization: Organization) -> dict[str, Any]:
|
||||
LOG.info("Starting PDF import", filename=file.filename, organization_id=organization.organization_id)
|
||||
|
||||
if not file.filename.lower().endswith(".pdf"):
|
||||
raise HTTPException(status_code=400, detail="Only PDF files are supported.")
|
||||
|
||||
# Save the uploaded file to a temporary location
|
||||
LOG.info("Saving PDF to temporary file", filename=file.filename)
|
||||
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
|
||||
temp_file.write(await file.read())
|
||||
temp_file_path = temp_file.name
|
||||
|
||||
try:
|
||||
# Extract text from PDF
|
||||
LOG.info("Extracting text from PDF", filename=file.filename, temp_file=temp_file_path)
|
||||
reader = PdfReader(temp_file_path)
|
||||
sop_text = ""
|
||||
for page_num, page in enumerate(reader.pages, 1):
|
||||
page_text = page.extract_text()
|
||||
sop_text += page_text + "\n"
|
||||
LOG.debug("Extracted text from page", page=page_num, text_length=len(page_text))
|
||||
|
||||
LOG.info(
|
||||
"PDF text extraction complete",
|
||||
total_text_length=len(sop_text),
|
||||
organization_id=organization.organization_id,
|
||||
)
|
||||
|
||||
if not sop_text.strip():
|
||||
raise HTTPException(status_code=400, detail="No readable content found in the PDF.")
|
||||
|
||||
# Load and render the prompt template
|
||||
prompt = prompt_engine.load_prompt(
|
||||
"build-workflow-from-pdf",
|
||||
sop_text=sop_text,
|
||||
)
|
||||
|
||||
# Use the LLM to convert SOP to workflow
|
||||
llm_key = settings.LLM_KEY or "gpt-4o-mini"
|
||||
LOG.info(
|
||||
"Calling LLM to convert SOP to workflow",
|
||||
llm_key=llm_key,
|
||||
prompt_length=len(prompt),
|
||||
sop_text_length=len(sop_text),
|
||||
sop_chars_sent=len(sop_text),
|
||||
organization_id=organization.organization_id,
|
||||
)
|
||||
|
||||
llm_api_handler = LLMAPIHandlerFactory.get_llm_api_handler(llm_key)
|
||||
|
||||
response = await llm_api_handler(
|
||||
prompt=prompt,
|
||||
prompt_name="sop_to_workflow_conversion",
|
||||
organization_id=organization.organization_id,
|
||||
parameters={"max_completion_tokens": 32768}, # Override the default 4096 limit for PDF conversion
|
||||
)
|
||||
|
||||
LOG.info(
|
||||
"LLM response received",
|
||||
response_type=type(response),
|
||||
response_keys=list(response.keys()) if isinstance(response, dict) else None,
|
||||
organization_id=organization.organization_id,
|
||||
)
|
||||
|
||||
# The LLM API handler automatically parses JSON responses
|
||||
# The response should be a dict with the workflow structure
|
||||
if not isinstance(response, dict):
|
||||
LOG.error(
|
||||
"LLM returned non-dict response",
|
||||
response_type=type(response),
|
||||
response=str(response)[:500],
|
||||
organization_id=organization.organization_id,
|
||||
)
|
||||
raise HTTPException(
|
||||
status_code=422, detail="LLM returned invalid response format - expected JSON object"
|
||||
)
|
||||
|
||||
# Validate that it has the required structure
|
||||
if "workflow_definition" not in response:
|
||||
LOG.error(
|
||||
"LLM response missing workflow_definition",
|
||||
response_keys=list(response.keys()),
|
||||
organization_id=organization.organization_id,
|
||||
)
|
||||
raise HTTPException(status_code=422, detail="LLM response missing 'workflow_definition' field")
|
||||
|
||||
if "blocks" not in response.get("workflow_definition", {}):
|
||||
LOG.error(
|
||||
"LLM workflow_definition missing blocks",
|
||||
workflow_def_keys=list(response.get("workflow_definition", {}).keys()),
|
||||
organization_id=organization.organization_id,
|
||||
)
|
||||
raise HTTPException(status_code=422, detail="LLM workflow definition missing 'blocks' field")
|
||||
|
||||
LOG.info(
|
||||
"Workflow JSON validated",
|
||||
title=response.get("title"),
|
||||
block_count=len(response.get("workflow_definition", {}).get("blocks", [])),
|
||||
organization_id=organization.organization_id,
|
||||
)
|
||||
|
||||
LOG.info(
|
||||
"Creating workflow from JSON",
|
||||
response_keys=list(response.keys()),
|
||||
organization_id=organization.organization_id,
|
||||
)
|
||||
|
||||
try:
|
||||
# Sanitize LLM output for Jinja and required fields before validation
|
||||
response = self._sanitize_workflow_json(response)
|
||||
workflow_create_request = WorkflowCreateYAMLRequest.model_validate(response)
|
||||
except Exception as e:
|
||||
LOG.error(
|
||||
"Failed to validate workflow request",
|
||||
error=str(e),
|
||||
error_type=type(e).__name__,
|
||||
response_sample=str(response)[:1000],
|
||||
organization_id=organization.organization_id,
|
||||
exc_info=True,
|
||||
)
|
||||
raise HTTPException(status_code=422, detail=f"Failed to validate workflow structure: {str(e)}")
|
||||
|
||||
try:
|
||||
workflow = await app.WORKFLOW_SERVICE.create_workflow_from_request(
|
||||
organization=organization,
|
||||
request=workflow_create_request,
|
||||
)
|
||||
except Exception as e:
|
||||
LOG.error(
|
||||
"Failed to create workflow",
|
||||
error=str(e),
|
||||
error_type=type(e).__name__,
|
||||
organization_id=organization.organization_id,
|
||||
exc_info=True,
|
||||
)
|
||||
raise HTTPException(status_code=422, detail=f"Failed to create workflow: {str(e)}")
|
||||
|
||||
workflow_dict = workflow.model_dump(by_alias=True)
|
||||
LOG.info(
|
||||
"PDF import completed successfully",
|
||||
workflow_id=workflow.workflow_permanent_id,
|
||||
workflow_permanent_id_in_dict=workflow_dict.get("workflow_permanent_id"),
|
||||
dict_keys=list(workflow_dict.keys()),
|
||||
organization_id=organization.organization_id,
|
||||
)
|
||||
return workflow_dict
|
||||
|
||||
finally:
|
||||
# Clean up the temporary file
|
||||
os.unlink(temp_file_path)
|
||||
|
||||
|
||||
pdf_import_service = PDFImportService()
|
||||
Reference in New Issue
Block a user