diff --git a/skyvern-frontend/src/routes/workflows/ImportWorkflowButton.tsx b/skyvern-frontend/src/routes/workflows/ImportWorkflowButton.tsx
index b618752f..9e7c40e9 100644
--- a/skyvern-frontend/src/routes/workflows/ImportWorkflowButton.tsx
+++ b/skyvern-frontend/src/routes/workflows/ImportWorkflowButton.tsx
@@ -67,16 +67,55 @@ function ImportWorkflowButton() {
{
if (event.target.files && event.target.files[0]) {
- const fileTextContent = await event.target.files[0].text();
- const isJson = isJsonString(fileTextContent);
- const content = isJson
- ? convertToYAML(JSON.parse(fileTextContent))
- : fileTextContent;
- createWorkflowFromYamlMutation.mutate(content);
+ const file = event.target.files[0];
+ const fileName = file.name.toLowerCase();
+
+ if (fileName.endsWith(".pdf")) {
+ // Handle PDF file - send as FormData to new endpoint
+ const formData = new FormData();
+ formData.append("file", file);
+
+ const client = await getClient(credentialGetter);
+ try {
+ const response = await client.post(
+ "/workflows/import-pdf",
+ formData,
+ {
+ headers: {
+ "Content-Type": "multipart/form-data",
+ },
+ },
+ );
+
+ queryClient.invalidateQueries({
+ queryKey: ["workflows"],
+ });
+ navigate(
+ `/workflows/${response.data.workflow_permanent_id}/debug`,
+ );
+ } catch (error) {
+ toast({
+ title: "Import Failed",
+ description:
+ error instanceof Error
+ ? error.message
+ : "Failed to import PDF",
+ variant: "destructive",
+ });
+ }
+ } else {
+ // Non-pdf files like yaml, json
+ const fileTextContent = await file.text();
+ const isJson = isJsonString(fileTextContent);
+ const content = isJson
+ ? convertToYAML(JSON.parse(fileTextContent))
+ : fileTextContent;
+ createWorkflowFromYamlMutation.mutate(content);
+ }
}
}}
/>
@@ -91,7 +130,7 @@ function ImportWorkflowButton() {
- Import a workflow from a YAML or JSON file
+ Import a workflow from a YAML, JSON, or PDF file
diff --git a/skyvern/forge/prompts/skyvern/build-workflow-from-pdf.j2 b/skyvern/forge/prompts/skyvern/build-workflow-from-pdf.j2
new file mode 100644
index 00000000..71c30faa
--- /dev/null
+++ b/skyvern/forge/prompts/skyvern/build-workflow-from-pdf.j2
@@ -0,0 +1,84 @@
+You are an AI assistant that converts Standard Operating Procedures (SOP) from text into a Skyvern workflow definition in JSON format.
+
+REQUIRED OUTPUT FORMAT:
+Return a JSON object with this structure:
+{
+ "title": "workflow_name",
+ "workflow_definition": {
+ "parameters": [
+ {
+ "key": "parameter_name",
+ "parameter_type": "workflow",
+ "workflow_parameter_type": "string",
+ "description": "Parameter description",
+ "default_value": null
+ }
+ ],
+ "blocks": [
+ {
+ "label": "block_name",
+ "block_type": "block_type_name",
+ "continue_on_failure": false,
+ // ... other required fields for each block type
+ }
+ ]
+ }
+}
+
+PARAMETER STRUCTURE:
+- Each parameter MUST have: "key", "parameter_type", "workflow_parameter_type"
+- "parameter_type" should always be "workflow"
+- "workflow_parameter_type" can be: "string", "json", "credential_id", "file_url"
+- Use "credential_id" for passwords/credentials
+- "description" and "default_value" are optional
+
+AVAILABLE BLOCK TYPES (use these exact names):
+- "login": For user authentication with credentials
+- "navigation": For navigating to pages and filling forms
+- "action": For clicking buttons or simple actions
+- "extraction": For extracting data from pages
+- "task": For complex tasks with both navigation and extraction
+- "file_download": For downloading files
+- "for_loop": For repeating actions over a list
+- "validation": For validating extracted data
+- "wait": For waiting/pausing
+- "code": For custom code execution
+- "text_prompt": For LLM text generation
+- "http_request": For API calls
+
+BLOCK STRUCTURE REQUIREMENTS:
+1. Each block MUST have: label, block_type, continue_on_failure
+2. Navigation blocks need: url (can be empty string ""), navigation_goal, engine (set to "skyvern-1.0")
+3. Login blocks need: url, navigation_goal, parameter_keys (empty array if no credentials), engine (set to "skyvern-1.0")
+4. Extraction blocks need: url (can be empty string ""), data_extraction_goal, data_schema, engine (set to "skyvern-1.0")
+5. Action blocks need: url (can be empty string ""), navigation_goal, engine (set to "skyvern-1.0")
+6. Validation blocks need: complete_criterion OR terminate_criterion (at least one must be set), parameter_keys (empty array if none)
+7. For_loop blocks need: loop_blocks, loop_variable_reference
+8. File_download blocks need: url (can be empty string ""), navigation_goal, engine (set to "skyvern-1.0")
+
+CRITICAL INSTRUCTIONS - READ CAREFULLY:
+1. **BE THOROUGH**: Convert EVERY automatable step from the SOP into a block. Do not skip or combine steps.
+2. **PRESERVE SPECIFICITY**: If the SOP gives specific instructions (e.g., "click X then Y", "fill field A with value B"), create separate blocks for each action. DO NOT generalize or simplify.
+3. **ONE ACTION PER BLOCK**: For "action" blocks, each should do ONE specific thing (one click, one navigation). Create multiple blocks if needed.
+4. **DETAILED navigation_goal**: Copy the exact instructions from the SOP into the navigation_goal field. Be as specific as the original SOP.
+5. **MAINTAIN ORDER**: Keep the exact order of steps from the SOP. Do not reorganize or optimize.
+6. **INCLUDE ALL CONDITIONS**: If the SOP says "if X then Y", create conditional blocks or separate blocks for each scenario.
+7. **IGNORE ONLY**: Skip only steps requiring human judgment, creativity, or physical actions. Include everything else.
+8. **URL FIELD**: Most blocks need a "url" field. Use empty string "" if no specific URL is needed (browser stays on current page).
+9. **AVOID VALIDATION BLOCKS**: Use "extraction" blocks for data extraction. Only use "validation" if explicitly validating previous extracted data, and always include complete_criterion.
+10. Set continue_on_failure to false for critical steps, true for optional ones
+11. Set engine to "skyvern-1.0" for all blocks that need it
+12. Use clear, descriptive labels that match the SOP terminology
+
+EXAMPLES OF THOROUGHNESS:
+- If SOP says "Navigate to page X, then click button Y, then fill form Z" → Create 3 separate blocks
+- If SOP says "Click the 'Products' link in the top left" → Use that EXACT wording in navigation_goal
+- If SOP has 20 steps → Your workflow should have ~20 blocks (one per step)
+
+Standard Operating Procedure:
+```
+{{ sop_text }}
+```
+
+Return ONLY a valid JSON object following the structure above. Create a comprehensive workflow that captures EVERY automatable step from the SOP with full specificity.
+
diff --git a/skyvern/forge/sdk/routes/agent_protocol.py b/skyvern/forge/sdk/routes/agent_protocol.py
index 374d2be9..ed9c4eb4 100644
--- a/skyvern/forge/sdk/routes/agent_protocol.py
+++ b/skyvern/forge/sdk/routes/agent_protocol.py
@@ -98,6 +98,7 @@ from skyvern.schemas.runs import (
)
from skyvern.schemas.workflows import BlockType, WorkflowCreateYAMLRequest, WorkflowRequest, WorkflowStatus
from skyvern.services import block_service, run_service, task_v1_service, task_v2_service, workflow_service
+from skyvern.services.pdf_import_service import pdf_import_service
from skyvern.webeye.actions.actions import Action
LOG = structlog.get_logger()
@@ -588,6 +589,47 @@ async def create_workflow_from_prompt(
return workflow.model_dump(by_alias=True)
+@legacy_base_router.post(
+ "/workflows/import-pdf",
+ response_model=dict[str, Any],
+ tags=["agent"],
+ openapi_extra={
+ "x-fern-sdk-method-name": "import_workflow_from_pdf",
+ "x-fern-examples": [
+ {
+ "code-samples": [
+ {
+ "sdk": "curl",
+ "code": 'curl -X POST "https://api.skyvern.com/workflows/import-pdf" \\\n -H "Authorization: Bearer YOUR_API_KEY" \\\n -F "file=@sop_document.pdf"',
+ }
+ ]
+ }
+ ],
+ },
+ description="Import a workflow from a PDF containing Standard Operating Procedures",
+ summary="Import workflow from PDF",
+ responses={
+ 200: {"description": "Successfully imported workflow from PDF"},
+ 400: {"description": "Invalid PDF file or no content found"},
+ 422: {"description": "Failed to convert SOP to workflow"},
+ 500: {"description": "Internal server error during processing"},
+ },
+)
+@legacy_base_router.post(
+ "/workflows/import-pdf/",
+ response_model=dict[str, Any],
+ include_in_schema=False,
+)
+async def import_workflow_from_pdf(
+ file: UploadFile,
+ current_org: Organization = Depends(org_auth_service.get_current_org),
+) -> dict[str, Any]:
+ """Import a workflow from a PDF file containing Standard Operating Procedures."""
+ analytics.capture("skyvern-oss-workflow-import-pdf")
+
+ return await pdf_import_service.import_workflow_from_pdf(file, current_org)
+
+
@legacy_base_router.put(
"/workflows/{workflow_id}",
openapi_extra={
diff --git a/skyvern/services/pdf_import_service.py b/skyvern/services/pdf_import_service.py
new file mode 100644
index 00000000..b8defd64
--- /dev/null
+++ b/skyvern/services/pdf_import_service.py
@@ -0,0 +1,288 @@
+import os
+import re
+import tempfile
+from typing import Any
+
+import structlog
+from fastapi import HTTPException, UploadFile
+from pypdf import PdfReader
+
+from skyvern.config import settings
+from skyvern.forge import app
+from skyvern.forge.prompts import prompt_engine
+from skyvern.forge.sdk.api.llm.api_handler_factory import LLMAPIHandlerFactory
+from skyvern.forge.sdk.schemas.organizations import Organization
+from skyvern.schemas.workflows import WorkflowCreateYAMLRequest
+
+LOG = structlog.get_logger(__name__)
+
+
+class PDFImportService:
+ @staticmethod
+ def _sanitize_workflow_json(raw: dict[str, Any]) -> dict[str, Any]:
+ """Clean LLM JSON to match Skyvern schema conventions and avoid Jinja errors.
+
+ - Replace Jinja refs like {{workflow.foo}} or {{parameters.foo}} with {{foo}}
+ - Auto-populate block.parameter_keys with any referenced parameter keys
+ - Ensure all block labels are unique by appending indices to duplicates
+ """
+
+ def strip_prefixes(text: str) -> tuple[str, set[str]]:
+ # Replace {{ workflow.xxx }} and {{ parameters.xxx }} with {{ xxx }}
+ cleaned = text
+ cleaned = re.sub(r"\{\{\s*workflow\.([a-zA-Z0-9_\.]+)\s*\}\}", r"{{ \1 }}", cleaned)
+ cleaned = re.sub(r"\{\{\s*parameters\.([a-zA-Z0-9_\.]+)\s*\}\}", r"{{ \1 }}", cleaned)
+
+ # Collect jinja variable names (take first segment before any dot)
+ used: set[str] = set()
+ for match in re.finditer(r"\{\{\s*([^\}\s\|]+)\s*[^}]*\}\}", cleaned):
+ var = match.group(1)
+ # Use base segment before dot to match parameter keys
+ base = var.split(".")[0]
+ used.add(base)
+ return cleaned, used
+
+ workflow_def = raw.get("workflow_definition", {})
+ param_defs = workflow_def.get("parameters", []) or []
+ param_keys = {p.get("key") for p in param_defs if isinstance(p, dict) and p.get("key")}
+
+ blocks = workflow_def.get("blocks", []) or []
+
+ # First pass: deduplicate block labels
+ seen_labels: dict[str, int] = {}
+ deduplicated_count = 0
+ for blk in blocks:
+ if not isinstance(blk, dict):
+ continue
+ label = blk.get("label", "")
+ if not label:
+ continue
+
+ if label in seen_labels:
+ # This label has been seen before, append index
+ seen_labels[label] += 1
+ new_label = f"{label}_{seen_labels[label]}"
+ LOG.info(
+ "Deduplicating block label",
+ original_label=label,
+ new_label=new_label,
+ occurrence=seen_labels[label],
+ )
+ blk["label"] = new_label
+ deduplicated_count += 1
+ else:
+ # First time seeing this label
+ seen_labels[label] = 1
+
+ if deduplicated_count > 0:
+ LOG.info(
+ "Deduplicated block labels",
+ total_deduplicated=deduplicated_count,
+ duplicate_labels=sorted([label for label, count in seen_labels.items() if count > 1]),
+ )
+ for blk in blocks:
+ if not isinstance(blk, dict):
+ continue
+ referenced: set[str] = set()
+ # Fields that commonly contain Jinja
+ for field in [
+ "url",
+ "navigation_goal",
+ "data_extraction_goal",
+ "complete_criterion",
+ "terminate_criterion",
+ "title",
+ ]:
+ val = blk.get(field)
+ if isinstance(val, str):
+ cleaned, used = strip_prefixes(val)
+ blk[field] = cleaned
+ referenced.update(used)
+
+ # Ensure required fields for text_prompt blocks
+ if blk.get("block_type") == "text_prompt":
+ if not blk.get("prompt"):
+ # Prefer an instruction-bearing field if present
+ blk["prompt"] = (
+ blk.get("navigation_goal")
+ or blk.get("title")
+ or blk.get("label")
+ or "Provide the requested text response."
+ )
+ # Track jinja usage within the prompt
+ prompt_val = blk.get("prompt")
+ if isinstance(prompt_val, str):
+ cleaned, used = strip_prefixes(prompt_val)
+ blk["prompt"] = cleaned
+ referenced.update(used)
+
+ # parameter_keys should include only known parameter keys
+ if param_keys:
+ keys_to_include = sorted(k for k in referenced if k in param_keys)
+ if keys_to_include:
+ blk["parameter_keys"] = keys_to_include
+
+ # Ensure engine where needed
+ if blk.get("block_type") in {"navigation", "action", "extraction", "login", "file_download"}:
+ blk.setdefault("engine", "skyvern-1.0")
+
+ # Ensure url exists (can be empty string)
+ if blk.get("block_type") in {"navigation", "action", "extraction", "file_download"}:
+ if blk.get("url") is None:
+ blk["url"] = ""
+
+ return raw
+
+ async def import_workflow_from_pdf(self, file: UploadFile, organization: Organization) -> dict[str, Any]:
+ LOG.info("Starting PDF import", filename=file.filename, organization_id=organization.organization_id)
+
+ if not file.filename.lower().endswith(".pdf"):
+ raise HTTPException(status_code=400, detail="Only PDF files are supported.")
+
+ # Save the uploaded file to a temporary location
+ LOG.info("Saving PDF to temporary file", filename=file.filename)
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
+ temp_file.write(await file.read())
+ temp_file_path = temp_file.name
+
+ try:
+ # Extract text from PDF
+ LOG.info("Extracting text from PDF", filename=file.filename, temp_file=temp_file_path)
+ reader = PdfReader(temp_file_path)
+ sop_text = ""
+ for page_num, page in enumerate(reader.pages, 1):
+ page_text = page.extract_text()
+ sop_text += page_text + "\n"
+ LOG.debug("Extracted text from page", page=page_num, text_length=len(page_text))
+
+ LOG.info(
+ "PDF text extraction complete",
+ total_text_length=len(sop_text),
+ organization_id=organization.organization_id,
+ )
+
+ if not sop_text.strip():
+ raise HTTPException(status_code=400, detail="No readable content found in the PDF.")
+
+ # Load and render the prompt template
+ prompt = prompt_engine.load_prompt(
+ "build-workflow-from-pdf",
+ sop_text=sop_text,
+ )
+
+ # Use the LLM to convert SOP to workflow
+ llm_key = settings.LLM_KEY or "gpt-4o-mini"
+ LOG.info(
+ "Calling LLM to convert SOP to workflow",
+ llm_key=llm_key,
+ prompt_length=len(prompt),
+ sop_text_length=len(sop_text),
+ sop_chars_sent=len(sop_text),
+ organization_id=organization.organization_id,
+ )
+
+ llm_api_handler = LLMAPIHandlerFactory.get_llm_api_handler(llm_key)
+
+ response = await llm_api_handler(
+ prompt=prompt,
+ prompt_name="sop_to_workflow_conversion",
+ organization_id=organization.organization_id,
+ parameters={"max_completion_tokens": 32768}, # Override the default 4096 limit for PDF conversion
+ )
+
+ LOG.info(
+ "LLM response received",
+ response_type=type(response),
+ response_keys=list(response.keys()) if isinstance(response, dict) else None,
+ organization_id=organization.organization_id,
+ )
+
+ # The LLM API handler automatically parses JSON responses
+ # The response should be a dict with the workflow structure
+ if not isinstance(response, dict):
+ LOG.error(
+ "LLM returned non-dict response",
+ response_type=type(response),
+ response=str(response)[:500],
+ organization_id=organization.organization_id,
+ )
+ raise HTTPException(
+ status_code=422, detail="LLM returned invalid response format - expected JSON object"
+ )
+
+ # Validate that it has the required structure
+ if "workflow_definition" not in response:
+ LOG.error(
+ "LLM response missing workflow_definition",
+ response_keys=list(response.keys()),
+ organization_id=organization.organization_id,
+ )
+ raise HTTPException(status_code=422, detail="LLM response missing 'workflow_definition' field")
+
+ if "blocks" not in response.get("workflow_definition", {}):
+ LOG.error(
+ "LLM workflow_definition missing blocks",
+ workflow_def_keys=list(response.get("workflow_definition", {}).keys()),
+ organization_id=organization.organization_id,
+ )
+ raise HTTPException(status_code=422, detail="LLM workflow definition missing 'blocks' field")
+
+ LOG.info(
+ "Workflow JSON validated",
+ title=response.get("title"),
+ block_count=len(response.get("workflow_definition", {}).get("blocks", [])),
+ organization_id=organization.organization_id,
+ )
+
+ LOG.info(
+ "Creating workflow from JSON",
+ response_keys=list(response.keys()),
+ organization_id=organization.organization_id,
+ )
+
+ try:
+ # Sanitize LLM output for Jinja and required fields before validation
+ response = self._sanitize_workflow_json(response)
+ workflow_create_request = WorkflowCreateYAMLRequest.model_validate(response)
+ except Exception as e:
+ LOG.error(
+ "Failed to validate workflow request",
+ error=str(e),
+ error_type=type(e).__name__,
+ response_sample=str(response)[:1000],
+ organization_id=organization.organization_id,
+ exc_info=True,
+ )
+ raise HTTPException(status_code=422, detail=f"Failed to validate workflow structure: {str(e)}")
+
+ try:
+ workflow = await app.WORKFLOW_SERVICE.create_workflow_from_request(
+ organization=organization,
+ request=workflow_create_request,
+ )
+ except Exception as e:
+ LOG.error(
+ "Failed to create workflow",
+ error=str(e),
+ error_type=type(e).__name__,
+ organization_id=organization.organization_id,
+ exc_info=True,
+ )
+ raise HTTPException(status_code=422, detail=f"Failed to create workflow: {str(e)}")
+
+ workflow_dict = workflow.model_dump(by_alias=True)
+ LOG.info(
+ "PDF import completed successfully",
+ workflow_id=workflow.workflow_permanent_id,
+ workflow_permanent_id_in_dict=workflow_dict.get("workflow_permanent_id"),
+ dict_keys=list(workflow_dict.keys()),
+ organization_id=organization.organization_id,
+ )
+ return workflow_dict
+
+ finally:
+ # Clean up the temporary file
+ os.unlink(temp_file_path)
+
+
+pdf_import_service = PDFImportService()