Folders, Persistent Import Progress Tracking & UX Enhancements (#3841)

Co-authored-by: Jonathan Dobson <jon.m.dobson@gmail.com>
This commit is contained in:
Celal Zamanoglu
2025-11-05 18:37:18 +03:00
committed by GitHub
parent fcc3f30ba4
commit 75948053b9
32 changed files with 2886 additions and 538 deletions

View File

@@ -4,11 +4,10 @@ import tempfile
from typing import Any
import structlog
from fastapi import HTTPException, UploadFile
from fastapi import HTTPException
from pypdf import PdfReader
from skyvern.config import settings
from skyvern.forge import app
from skyvern.forge.prompts import prompt_engine
from skyvern.forge.sdk.api.llm.api_handler_factory import LLMAPIHandlerFactory
from skyvern.forge.sdk.schemas.organizations import Organization
@@ -133,156 +132,129 @@ class PDFImportService:
return raw
async def import_workflow_from_pdf(self, file: UploadFile, organization: Organization) -> dict[str, Any]:
LOG.info("Starting PDF import", filename=file.filename, organization_id=organization.organization_id)
if not file.filename.lower().endswith(".pdf"):
raise HTTPException(status_code=400, detail="Only PDF files are supported.")
def extract_text_from_pdf(self, file_contents: bytes, file_name: str) -> str:
"""Extract text from PDF file contents. Raises HTTPException if invalid."""
LOG.info("Extracting text from PDF", filename=file_name)
# Save the uploaded file to a temporary location
LOG.info("Saving PDF to temporary file", filename=file.filename)
with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file:
temp_file.write(await file.read())
temp_file.write(file_contents)
temp_file_path = temp_file.name
try:
# Extract text from PDF
LOG.info("Extracting text from PDF", filename=file.filename, temp_file=temp_file_path)
reader = PdfReader(temp_file_path)
sop_text = ""
for page_num, page in enumerate(reader.pages, 1):
page_text = page.extract_text()
page_text = page.extract_text() or ""
sop_text += page_text + "\n"
LOG.debug("Extracted text from page", page=page_num, text_length=len(page_text))
LOG.info(
"PDF text extraction complete",
total_text_length=len(sop_text),
organization_id=organization.organization_id,
)
LOG.info("PDF text extraction complete", total_text_length=len(sop_text))
if not sop_text.strip():
raise HTTPException(status_code=400, detail="No readable content found in the PDF.")
# Load and render the prompt template
prompt = prompt_engine.load_prompt(
"build-workflow-from-pdf",
sop_text=sop_text,
return sop_text
except Exception as e:
LOG.warning(
"Failed to read/extract text from PDF",
filename=file_name,
error=str(e),
)
# Use the LLM to convert SOP to workflow
llm_key = settings.LLM_KEY or "gpt-4o-mini"
LOG.info(
"Calling LLM to convert SOP to workflow",
llm_key=llm_key,
prompt_length=len(prompt),
sop_text_length=len(sop_text),
sop_chars_sent=len(sop_text),
organization_id=organization.organization_id,
)
llm_api_handler = LLMAPIHandlerFactory.get_llm_api_handler(llm_key)
response = await llm_api_handler(
prompt=prompt,
prompt_name="sop_to_workflow_conversion",
organization_id=organization.organization_id,
parameters={"max_completion_tokens": 32768}, # Override the default 4096 limit for PDF conversion
)
LOG.info(
"LLM response received",
response_type=type(response),
response_keys=list(response.keys()) if isinstance(response, dict) else None,
organization_id=organization.organization_id,
)
# The LLM API handler automatically parses JSON responses
# The response should be a dict with the workflow structure
if not isinstance(response, dict):
LOG.error(
"LLM returned non-dict response",
response_type=type(response),
response=str(response)[:500],
organization_id=organization.organization_id,
)
raise HTTPException(
status_code=422, detail="LLM returned invalid response format - expected JSON object"
)
# Validate that it has the required structure
if "workflow_definition" not in response:
LOG.error(
"LLM response missing workflow_definition",
response_keys=list(response.keys()),
organization_id=organization.organization_id,
)
raise HTTPException(status_code=422, detail="LLM response missing 'workflow_definition' field")
if "blocks" not in response.get("workflow_definition", {}):
LOG.error(
"LLM workflow_definition missing blocks",
workflow_def_keys=list(response.get("workflow_definition", {}).keys()),
organization_id=organization.organization_id,
)
raise HTTPException(status_code=422, detail="LLM workflow definition missing 'blocks' field")
LOG.info(
"Workflow JSON validated",
title=response.get("title"),
block_count=len(response.get("workflow_definition", {}).get("blocks", [])),
organization_id=organization.organization_id,
)
LOG.info(
"Creating workflow from JSON",
response_keys=list(response.keys()),
organization_id=organization.organization_id,
)
try:
# Sanitize LLM output for Jinja and required fields before validation
response = self._sanitize_workflow_json(response)
workflow_create_request = WorkflowCreateYAMLRequest.model_validate(response)
except Exception as e:
LOG.error(
"Failed to validate workflow request",
error=str(e),
error_type=type(e).__name__,
response_sample=str(response)[:1000],
organization_id=organization.organization_id,
exc_info=True,
)
raise HTTPException(status_code=422, detail=f"Failed to validate workflow structure: {str(e)}")
try:
workflow = await app.WORKFLOW_SERVICE.create_workflow_from_request(
organization=organization,
request=workflow_create_request,
)
except Exception as e:
LOG.error(
"Failed to create workflow",
error=str(e),
error_type=type(e).__name__,
organization_id=organization.organization_id,
exc_info=True,
)
raise HTTPException(status_code=422, detail=f"Failed to create workflow: {str(e)}")
workflow_dict = workflow.model_dump(by_alias=True)
LOG.info(
"PDF import completed successfully",
workflow_id=workflow.workflow_permanent_id,
workflow_permanent_id_in_dict=workflow_dict.get("workflow_permanent_id"),
dict_keys=list(workflow_dict.keys()),
organization_id=organization.organization_id,
)
return workflow_dict
raise HTTPException(status_code=400, detail="Invalid or unreadable PDF file.") from e
finally:
# Clean up the temporary file
os.unlink(temp_file_path)
async def create_workflow_from_sop_text(self, sop_text: str, organization: Organization) -> dict[str, Any]:
"""Convert SOP text to workflow definition using LLM (does not create the workflow)."""
# Load and render the prompt template
prompt = prompt_engine.load_prompt(
"build-workflow-from-pdf",
sop_text=sop_text,
)
# Use the LLM to convert SOP to workflow
llm_key = settings.LLM_KEY or "gpt-4o-mini"
LOG.info(
"Calling LLM to convert SOP to workflow",
llm_key=llm_key,
prompt_length=len(prompt),
sop_text_length=len(sop_text),
sop_chars_sent=len(sop_text),
organization_id=organization.organization_id,
)
llm_api_handler = LLMAPIHandlerFactory.get_llm_api_handler(llm_key)
response = await llm_api_handler(
prompt=prompt,
prompt_name="sop_to_workflow_conversion",
organization_id=organization.organization_id,
parameters={"max_completion_tokens": 32768}, # Override the default 4096 limit for PDF conversion
)
LOG.info(
"LLM response received",
response_type=type(response),
response_keys=list(response.keys()) if isinstance(response, dict) else None,
organization_id=organization.organization_id,
)
# The LLM API handler automatically parses JSON responses
# The response should be a dict with the workflow structure
if not isinstance(response, dict):
LOG.error(
"LLM returned non-dict response",
response_type=type(response),
response=str(response)[:500],
organization_id=organization.organization_id,
)
raise HTTPException(status_code=422, detail="LLM returned invalid response format - expected JSON object")
# Validate that it has the required structure
if "workflow_definition" not in response:
LOG.error(
"LLM response missing workflow_definition",
response_keys=list(response.keys()),
organization_id=organization.organization_id,
)
raise HTTPException(status_code=422, detail="LLM response missing 'workflow_definition' field")
if "blocks" not in response.get("workflow_definition", {}):
LOG.error(
"LLM workflow_definition missing blocks",
workflow_def_keys=list(response.get("workflow_definition", {}).keys()),
organization_id=organization.organization_id,
)
raise HTTPException(status_code=422, detail="LLM workflow definition missing 'blocks' field")
try:
# Sanitize LLM output for Jinja and required fields before validation
response = self._sanitize_workflow_json(response)
workflow_create_request = WorkflowCreateYAMLRequest.model_validate(response)
LOG.info(
"Workflow JSON validated successfully",
title=response.get("title"),
block_count=len(response.get("workflow_definition", {}).get("blocks", [])),
organization_id=organization.organization_id,
)
except Exception as e:
LOG.error(
"Failed to validate workflow request",
error=str(e),
error_type=type(e).__name__,
organization_id=organization.organization_id,
exc_info=True,
)
raise HTTPException(
status_code=422,
detail=f"Failed to validate workflow structure: {e!s}",
) from e
# Return the validated request as a dict (caller will create the workflow)
return workflow_create_request.model_dump(by_alias=True)
pdf_import_service = PDFImportService()