From cb2a72775dfb8efe1f3d0c139c279c74799c3fcc Mon Sep 17 00:00:00 2001 From: Celal Zamanoglu <95054566+celalzamanoglu@users.noreply.github.com> Date: Tue, 27 Jan 2026 18:16:05 +0300 Subject: [PATCH] Add SOP-to-blocks endpoint for workflow editor - backend (#4556) --- .../skyvern/build-workflow-from-pdf.j2 | 19 ++++- skyvern/forge/sdk/routes/agent_protocol.py | 70 +++++++++++++++++++ skyvern/services/pdf_import_service.py | 36 +++++++++- 3 files changed, 123 insertions(+), 2 deletions(-) diff --git a/skyvern/forge/prompts/skyvern/build-workflow-from-pdf.j2 b/skyvern/forge/prompts/skyvern/build-workflow-from-pdf.j2 index 71c30faa..97f2cf7e 100644 --- a/skyvern/forge/prompts/skyvern/build-workflow-from-pdf.j2 +++ b/skyvern/forge/prompts/skyvern/build-workflow-from-pdf.j2 @@ -1,5 +1,22 @@ You are an AI assistant that converts Standard Operating Procedures (SOP) from text into a Skyvern workflow definition in JSON format. +FIRST: Evaluate if this content contains procedural instructions. + +A valid SOP MUST contain: +- Step-by-step instructions telling someone what to DO +- Action verbs like: go to, click, navigate, enter, fill, submit, download, extract, etc. +- Sequential steps for completing a task + +Note: A document may include reference materials (like sample invoices, screenshots, or examples) alongside instructions - this is still a valid SOP as long as it contains procedural steps. + +If the content has NO procedural instructions (just data, information, or documents without steps), return ONLY this JSON: +{ + "error": "not_sop", + "reason": "This document does not contain procedural instructions or steps to automate." +} + +If the content DOES contain procedural instructions, proceed with the conversion below. + REQUIRED OUTPUT FORMAT: Return a JSON object with this structure: { @@ -68,7 +85,7 @@ CRITICAL INSTRUCTIONS - READ CAREFULLY: 9. **AVOID VALIDATION BLOCKS**: Use "extraction" blocks for data extraction. Only use "validation" if explicitly validating previous extracted data, and always include complete_criterion. 10. Set continue_on_failure to false for critical steps, true for optional ones 11. Set engine to "skyvern-1.0" for all blocks that need it -12. Use clear, descriptive labels that match the SOP terminology +12. Use SHORT descriptive labels (1-5 words, snake_case): e.g., "login", "extract_date_data", "submit_google_login_form". Avoid long labels. EXAMPLES OF THOROUGHNESS: - If SOP says "Navigate to page X, then click button Y, then fill form Z" → Create 3 separate blocks diff --git a/skyvern/forge/sdk/routes/agent_protocol.py b/skyvern/forge/sdk/routes/agent_protocol.py index 693b180e..8760dc8f 100644 --- a/skyvern/forge/sdk/routes/agent_protocol.py +++ b/skyvern/forge/sdk/routes/agent_protocol.py @@ -664,6 +664,76 @@ async def _validate_file_size(file: UploadFile) -> UploadFile: return file +@legacy_base_router.post( + "/workflows/sop-to-blocks", + response_model=dict[str, Any], + include_in_schema=False, +) +@legacy_base_router.post( + "/workflows/sop-to-blocks/", + response_model=dict[str, Any], + include_in_schema=False, +) +async def convert_sop_to_blocks( + file: UploadFile = Depends(_validate_file_size), + current_org: Organization = Depends(org_auth_service.get_current_org), +) -> dict[str, Any]: + """Convert a PDF SOP to workflow blocks without creating a workflow.""" + analytics.capture( + "skyvern-oss-workflow-sop-to-blocks", + data={"organization_id": current_org.organization_id}, + ) + + # Validate PDF + if not file.filename or not file.filename.lower().endswith(".pdf"): + raise HTTPException(status_code=400, detail="Only PDF files are supported.") + + try: + file_contents = await file.read() + file_name = file.filename + finally: + await file.close() + + # Extract text from PDF + sop_text = await asyncio.to_thread( + pdf_import_service.extract_text_from_pdf, + file_contents, + file_name, + ) + + # Convert to workflow definition via LLM + try: + result = await pdf_import_service.create_workflow_from_sop_text(sop_text, current_org) + except HTTPException: + raise + except Exception as e: + LOG.exception( + "Failed to convert SOP to blocks", + organization_id=current_org.organization_id, + filename=file_name, + ) + raise HTTPException( + status_code=422, + detail="Failed to convert SOP to workflow blocks. Please verify the PDF content and try again.", + ) from e + + workflow_def = result.get("workflow_definition", {}) + + # Transform blocks: convert parameter_keys (backend format) to parameters (frontend format) + # This is done here rather than in _sanitize_workflow_json because the import-pdf endpoint + # needs the backend format for WorkflowCreateYAMLRequest validation + # Create shallow copies to avoid mutating shared data structures + blocks = [dict(block) for block in workflow_def.get("blocks", [])] + for block in blocks: + parameter_keys = block.pop("parameter_keys", None) or [] + block["parameters"] = [{"key": key} for key in parameter_keys] + + return { + "blocks": blocks, + "parameters": workflow_def.get("parameters", []), + } + + @legacy_base_router.post( "/workflows/import-pdf", response_model=dict[str, Any], diff --git a/skyvern/services/pdf_import_service.py b/skyvern/services/pdf_import_service.py index 08cf5acb..70762b45 100644 --- a/skyvern/services/pdf_import_service.py +++ b/skyvern/services/pdf_import_service.py @@ -21,6 +21,7 @@ class PDFImportService: def _sanitize_workflow_json(raw: dict[str, Any]) -> dict[str, Any]: """Clean LLM JSON to match Skyvern schema conventions and avoid Jinja errors. + - Replace whitespace in block labels with underscores - Replace Jinja refs like {{workflow.foo}} or {{parameters.foo}} with {{foo}} - Auto-populate block.parameter_keys with any referenced parameter keys - Ensure all block labels are unique by appending indices to duplicates @@ -47,7 +48,23 @@ class PDFImportService: blocks = workflow_def.get("blocks", []) or [] - # First pass: deduplicate block labels + # First pass: sanitize block labels (replace whitespace with underscores) + for blk in blocks: + if not isinstance(blk, dict): + continue + label = blk.get("label", "") + if label: + # Replace any whitespace with underscores (same as frontend behavior) + sanitized_label = re.sub(r"\s+", "_", label) + if sanitized_label != label: + LOG.info( + "Sanitizing block label", + original_label=label, + sanitized_label=sanitized_label, + ) + blk["label"] = sanitized_label + + # Second pass: deduplicate block labels seen_labels: dict[str, int] = {} deduplicated_count = 0 for blk in blocks: @@ -130,6 +147,9 @@ class PDFImportService: if blk.get("url") is None: blk["url"] = "" + # Note: parameter_keys is kept in backend format for WorkflowCreateYAMLRequest validation + # The sop-to-blocks endpoint transforms to frontend format separately + return raw def extract_text_from_pdf(self, file_contents: bytes, file_name: str) -> str: @@ -212,6 +232,20 @@ class PDFImportService: ) raise HTTPException(status_code=422, detail="LLM returned invalid response format - expected JSON object") + # Check if LLM detected non-SOP content + if response.get("error") == "not_sop": + LOG.info( + "LLM detected non-SOP content", + reason=response.get("reason"), + organization_id=organization.organization_id, + ) + raise HTTPException( + status_code=422, + detail=response.get( + "reason", "The uploaded PDF does not appear to contain a Standard Operating Procedure." + ), + ) + # Validate that it has the required structure if "workflow_definition" not in response: LOG.error(