Folders, Persistent Import Progress Tracking & UX Enhancements (#3841)

Co-authored-by: Jonathan Dobson <jon.m.dobson@gmail.com>
2025-11-05 18:37:18 +03:00
parent fcc3f30ba4
commit 75948053b9
32 changed files with 2886 additions and 538 deletions
--- a/skyvern/services/pdf_import_service.py
+++ b/skyvern/services/pdf_import_service.py
@@ -4,11 +4,10 @@ import tempfile
 from typing import Any

 import structlog
-from fastapi import HTTPException, UploadFile
+from fastapi import HTTPException
 from pypdf import PdfReader

 from skyvern.config import settings
-from skyvern.forge import app
 from skyvern.forge.prompts import prompt_engine
 from skyvern.forge.sdk.api.llm.api_handler_factory import LLMAPIHandlerFactory
 from skyvern.forge.sdk.schemas.organizations import Organization
@@ -133,156 +132,129 @@ class PDFImportService:

        return raw

-    async def import_workflow_from_pdf(self, file: UploadFile, organization: Organization) -> dict[str, Any]:
-        LOG.info("Starting PDF import", filename=file.filename, organization_id=organization.organization_id)
-
-        if not file.filename.lower().endswith(".pdf"):
-            raise HTTPException(status_code=400, detail="Only PDF files are supported.")
+    def extract_text_from_pdf(self, file_contents: bytes, file_name: str) -> str:
+        """Extract text from PDF file contents. Raises HTTPException if invalid."""
+        LOG.info("Extracting text from PDF", filename=file_name)

        # Save the uploaded file to a temporary location
-        LOG.info("Saving PDF to temporary file", filename=file.filename)
        with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
-            temp_file.write(await file.read())
+            temp_file.write(file_contents)
            temp_file_path = temp_file.name

        try:
-            # Extract text from PDF
-            LOG.info("Extracting text from PDF", filename=file.filename, temp_file=temp_file_path)
            reader = PdfReader(temp_file_path)
            sop_text = ""
            for page_num, page in enumerate(reader.pages, 1):
-                page_text = page.extract_text()
+                page_text = page.extract_text() or ""
                sop_text += page_text + "\n"
                LOG.debug("Extracted text from page", page=page_num, text_length=len(page_text))

-            LOG.info(
-                "PDF text extraction complete",
-                total_text_length=len(sop_text),
-                organization_id=organization.organization_id,
-            )
+            LOG.info("PDF text extraction complete", total_text_length=len(sop_text))

            if not sop_text.strip():
                raise HTTPException(status_code=400, detail="No readable content found in the PDF.")

-            # Load and render the prompt template
-            prompt = prompt_engine.load_prompt(
-                "build-workflow-from-pdf",
-                sop_text=sop_text,
+            return sop_text
+        except Exception as e:
+            LOG.warning(
+                "Failed to read/extract text from PDF",
+                filename=file_name,
+                error=str(e),
            )
-
-            # Use the LLM to convert SOP to workflow
-            llm_key = settings.LLM_KEY or "gpt-4o-mini"
-            LOG.info(
-                "Calling LLM to convert SOP to workflow",
-                llm_key=llm_key,
-                prompt_length=len(prompt),
-                sop_text_length=len(sop_text),
-                sop_chars_sent=len(sop_text),
-                organization_id=organization.organization_id,
-            )
-
-            llm_api_handler = LLMAPIHandlerFactory.get_llm_api_handler(llm_key)
-
-            response = await llm_api_handler(
-                prompt=prompt,
-                prompt_name="sop_to_workflow_conversion",
-                organization_id=organization.organization_id,
-                parameters={"max_completion_tokens": 32768},  # Override the default 4096 limit for PDF conversion
-            )
-
-            LOG.info(
-                "LLM response received",
-                response_type=type(response),
-                response_keys=list(response.keys()) if isinstance(response, dict) else None,
-                organization_id=organization.organization_id,
-            )
-
-            # The LLM API handler automatically parses JSON responses
-            # The response should be a dict with the workflow structure
-            if not isinstance(response, dict):
-                LOG.error(
-                    "LLM returned non-dict response",
-                    response_type=type(response),
-                    response=str(response)[:500],
-                    organization_id=organization.organization_id,
-                )
-                raise HTTPException(
-                    status_code=422, detail="LLM returned invalid response format - expected JSON object"
-                )
-
-            # Validate that it has the required structure
-            if "workflow_definition" not in response:
-                LOG.error(
-                    "LLM response missing workflow_definition",
-                    response_keys=list(response.keys()),
-                    organization_id=organization.organization_id,
-                )
-                raise HTTPException(status_code=422, detail="LLM response missing 'workflow_definition' field")
-
-            if "blocks" not in response.get("workflow_definition", {}):
-                LOG.error(
-                    "LLM workflow_definition missing blocks",
-                    workflow_def_keys=list(response.get("workflow_definition", {}).keys()),
-                    organization_id=organization.organization_id,
-                )
-                raise HTTPException(status_code=422, detail="LLM workflow definition missing 'blocks' field")
-
-            LOG.info(
-                "Workflow JSON validated",
-                title=response.get("title"),
-                block_count=len(response.get("workflow_definition", {}).get("blocks", [])),
-                organization_id=organization.organization_id,
-            )
-
-            LOG.info(
-                "Creating workflow from JSON",
-                response_keys=list(response.keys()),
-                organization_id=organization.organization_id,
-            )
-
-            try:
-                # Sanitize LLM output for Jinja and required fields before validation
-                response = self._sanitize_workflow_json(response)
-                workflow_create_request = WorkflowCreateYAMLRequest.model_validate(response)
-            except Exception as e:
-                LOG.error(
-                    "Failed to validate workflow request",
-                    error=str(e),
-                    error_type=type(e).__name__,
-                    response_sample=str(response)[:1000],
-                    organization_id=organization.organization_id,
-                    exc_info=True,
-                )
-                raise HTTPException(status_code=422, detail=f"Failed to validate workflow structure: {str(e)}")
-
-            try:
-                workflow = await app.WORKFLOW_SERVICE.create_workflow_from_request(
-                    organization=organization,
-                    request=workflow_create_request,
-                )
-            except Exception as e:
-                LOG.error(
-                    "Failed to create workflow",
-                    error=str(e),
-                    error_type=type(e).__name__,
-                    organization_id=organization.organization_id,
-                    exc_info=True,
-                )
-                raise HTTPException(status_code=422, detail=f"Failed to create workflow: {str(e)}")
-
-            workflow_dict = workflow.model_dump(by_alias=True)
-            LOG.info(
-                "PDF import completed successfully",
-                workflow_id=workflow.workflow_permanent_id,
-                workflow_permanent_id_in_dict=workflow_dict.get("workflow_permanent_id"),
-                dict_keys=list(workflow_dict.keys()),
-                organization_id=organization.organization_id,
-            )
-            return workflow_dict
-
+            raise HTTPException(status_code=400, detail="Invalid or unreadable PDF file.") from e
        finally:
            # Clean up the temporary file
            os.unlink(temp_file_path)

+    async def create_workflow_from_sop_text(self, sop_text: str, organization: Organization) -> dict[str, Any]:
+        """Convert SOP text to workflow definition using LLM (does not create the workflow)."""
+        # Load and render the prompt template
+        prompt = prompt_engine.load_prompt(
+            "build-workflow-from-pdf",
+            sop_text=sop_text,
+        )
+
+        # Use the LLM to convert SOP to workflow
+        llm_key = settings.LLM_KEY or "gpt-4o-mini"
+        LOG.info(
+            "Calling LLM to convert SOP to workflow",
+            llm_key=llm_key,
+            prompt_length=len(prompt),
+            sop_text_length=len(sop_text),
+            sop_chars_sent=len(sop_text),
+            organization_id=organization.organization_id,
+        )
+
+        llm_api_handler = LLMAPIHandlerFactory.get_llm_api_handler(llm_key)
+
+        response = await llm_api_handler(
+            prompt=prompt,
+            prompt_name="sop_to_workflow_conversion",
+            organization_id=organization.organization_id,
+            parameters={"max_completion_tokens": 32768},  # Override the default 4096 limit for PDF conversion
+        )
+
+        LOG.info(
+            "LLM response received",
+            response_type=type(response),
+            response_keys=list(response.keys()) if isinstance(response, dict) else None,
+            organization_id=organization.organization_id,
+        )
+
+        # The LLM API handler automatically parses JSON responses
+        # The response should be a dict with the workflow structure
+        if not isinstance(response, dict):
+            LOG.error(
+                "LLM returned non-dict response",
+                response_type=type(response),
+                response=str(response)[:500],
+                organization_id=organization.organization_id,
+            )
+            raise HTTPException(status_code=422, detail="LLM returned invalid response format - expected JSON object")
+
+        # Validate that it has the required structure
+        if "workflow_definition" not in response:
+            LOG.error(
+                "LLM response missing workflow_definition",
+                response_keys=list(response.keys()),
+                organization_id=organization.organization_id,
+            )
+            raise HTTPException(status_code=422, detail="LLM response missing 'workflow_definition' field")
+
+        if "blocks" not in response.get("workflow_definition", {}):
+            LOG.error(
+                "LLM workflow_definition missing blocks",
+                workflow_def_keys=list(response.get("workflow_definition", {}).keys()),
+                organization_id=organization.organization_id,
+            )
+            raise HTTPException(status_code=422, detail="LLM workflow definition missing 'blocks' field")
+
+        try:
+            # Sanitize LLM output for Jinja and required fields before validation
+            response = self._sanitize_workflow_json(response)
+            workflow_create_request = WorkflowCreateYAMLRequest.model_validate(response)
+
+            LOG.info(
+                "Workflow JSON validated successfully",
+                title=response.get("title"),
+                block_count=len(response.get("workflow_definition", {}).get("blocks", [])),
+                organization_id=organization.organization_id,
+            )
+        except Exception as e:
+            LOG.error(
+                "Failed to validate workflow request",
+                error=str(e),
+                error_type=type(e).__name__,
+                organization_id=organization.organization_id,
+                exc_info=True,
+            )
+            raise HTTPException(
+                status_code=422,
+                detail=f"Failed to validate workflow structure: {e!s}",
+            ) from e
+
+        # Return the validated request as a dict (caller will create the workflow)
+        return workflow_create_request.model_dump(by_alias=True)
+

 pdf_import_service = PDFImportService()