diff --git a/skyvern-frontend/src/routes/workflows/RunWorkflowForm.tsx b/skyvern-frontend/src/routes/workflows/RunWorkflowForm.tsx
index a9c8e118..4bfebb27 100644
--- a/skyvern-frontend/src/routes/workflows/RunWorkflowForm.tsx
+++ b/skyvern-frontend/src/routes/workflows/RunWorkflowForm.tsx
@@ -504,60 +504,6 @@ function RunWorkflowForm({
onSubmit={form.handleSubmit(onSubmit, handleInvalid)}
className="space-y-8"
>
-
-
{hasLoginBlockValidationError && (
@@ -1105,6 +1051,49 @@ function RunWorkflowForm({
+
+
+
{
+ const values = form.getValues();
+ const body = getRunWorkflowRequestBody(
+ values,
+ workflowParameters,
+ );
+ const transformedBody = transformToWorkflowRunRequest(
+ body,
+ workflowPermanentId,
+ );
+
+ // Build headers - x-max-steps-override is optional and can be added manually if needed
+ const headers: Record = {
+ "Content-Type": "application/json",
+ "x-api-key": apiCredential ?? "",
+ };
+
+ return {
+ method: "POST",
+ url: `${runsApiBaseUrl}/run/workflows`,
+ body: transformedBody,
+ headers,
+ } satisfies ApiCommandOptions;
+ }}
+ />
+
+
);
diff --git a/skyvern-frontend/src/routes/workflows/WorkflowRunParameters.tsx b/skyvern-frontend/src/routes/workflows/WorkflowRunParameters.tsx
index 72660a33..61dd25cf 100644
--- a/skyvern-frontend/src/routes/workflows/WorkflowRunParameters.tsx
+++ b/skyvern-frontend/src/routes/workflows/WorkflowRunParameters.tsx
@@ -44,16 +44,22 @@ function WorkflowRunParameters() {
const initialValues = getInitialValues(location, workflowParameters ?? []);
+ const header = (
+
+
+ Parameters{workflow?.title ? ` - ${workflow.title}` : ""}
+
+
+ Fill the placeholder values that you have linked throughout your
+ workflow.
+
+
+ );
+
if (isFetching) {
return (
-
- Parameters
-
- Fill the placeholder values that you have linked throughout your
- workflow.
-
-
+ {header}
);
@@ -64,21 +70,26 @@ function WorkflowRunParameters() {
}
return (
-
+
+ {header}
+
+
);
}
diff --git a/skyvern-frontend/src/routes/workflows/editor/panels/WorkflowNodeLibraryPanel.tsx b/skyvern-frontend/src/routes/workflows/editor/panels/WorkflowNodeLibraryPanel.tsx
index 99f6cc72..992f87e7 100644
--- a/skyvern-frontend/src/routes/workflows/editor/panels/WorkflowNodeLibraryPanel.tsx
+++ b/skyvern-frontend/src/routes/workflows/editor/panels/WorkflowNodeLibraryPanel.tsx
@@ -188,7 +188,7 @@ const nodeLibraryItems: Array<{
/>
),
title: "File Parser Block",
- description: "Parse PDFs, CSVs, and Excel files",
+ description: "Parse PDFs, CSVs, Excel files, and Images",
},
// {
// nodeType: "pdfParser",
diff --git a/skyvern-frontend/src/routes/workflows/types/workflowTypes.ts b/skyvern-frontend/src/routes/workflows/types/workflowTypes.ts
index 5b615a94..0607c235 100644
--- a/skyvern-frontend/src/routes/workflows/types/workflowTypes.ts
+++ b/skyvern-frontend/src/routes/workflows/types/workflowTypes.ts
@@ -415,7 +415,7 @@ export type SendEmailBlock = WorkflowBlockBase & {
export type FileURLParserBlock = WorkflowBlockBase & {
block_type: "file_url_parser";
file_url: string;
- file_type: "csv" | "excel" | "pdf";
+ file_type: "csv" | "excel" | "pdf" | "image";
json_schema: Record | null;
};
diff --git a/skyvern-frontend/src/routes/workflows/types/workflowYamlTypes.ts b/skyvern-frontend/src/routes/workflows/types/workflowYamlTypes.ts
index 81ced574..fa2ca173 100644
--- a/skyvern-frontend/src/routes/workflows/types/workflowYamlTypes.ts
+++ b/skyvern-frontend/src/routes/workflows/types/workflowYamlTypes.ts
@@ -350,7 +350,7 @@ export type SendEmailBlockYAML = BlockYAMLBase & {
export type FileUrlParserBlockYAML = BlockYAMLBase & {
block_type: "file_url_parser";
file_url: string;
- file_type: "csv" | "excel" | "pdf";
+ file_type: "csv" | "excel" | "pdf" | "image";
json_schema?: Record | null;
};
diff --git a/skyvern/client/types/file_type.py b/skyvern/client/types/file_type.py
index fade3f1d..02decc32 100644
--- a/skyvern/client/types/file_type.py
+++ b/skyvern/client/types/file_type.py
@@ -2,4 +2,4 @@
import typing
-FileType = typing.Union[typing.Literal["csv", "excel", "pdf"], typing.Any]
+FileType = typing.Union[typing.Literal["csv", "excel", "pdf", "image"], typing.Any]
diff --git a/skyvern/forge/prompts/skyvern/extract-text-from-image.j2 b/skyvern/forge/prompts/skyvern/extract-text-from-image.j2
new file mode 100644
index 00000000..a577d1e9
--- /dev/null
+++ b/skyvern/forge/prompts/skyvern/extract-text-from-image.j2
@@ -0,0 +1,19 @@
+Extract all visible text from this image.
+
+MAKE SURE YOU OUTPUT VALID JSON. No text before or after JSON, no trailing commas, no comments, no unnecessary quotes.
+
+Reply in JSON format with the following keys:
+{
+ "extracted_text": str // All text extracted from the image
+}
+
+TEXT EXTRACTION GUIDELINES:
+- Preserve reading order (top to bottom, left to right)
+- For tables: format as rows separated by newlines, columns separated by " | "
+- For multi-column layouts: extract each column separately, separated by blank lines
+- For forms: format as "Label: Value" on each line
+- Preserve line breaks where they appear meaningful (paragraphs, list items)
+- Include all visible text: headers, body text, labels, captions, watermarks
+- For handwritten text: do your best to transcribe, use [illegible] for unclear parts
+
+If no text is visible in the image, return an empty string for extracted_text.
diff --git a/skyvern/forge/sdk/workflow/models/block.py b/skyvern/forge/sdk/workflow/models/block.py
index 3c1df7d0..e128558e 100644
--- a/skyvern/forge/sdk/workflow/models/block.py
+++ b/skyvern/forge/sdk/workflow/models/block.py
@@ -3063,6 +3063,8 @@ class FileParserBlock(Block):
return FileType.PDF
elif suffix == ".tsv":
return FileType.CSV # TSV files are handled by the CSV parser
+ elif suffix in (".png", ".jpg", ".jpeg", ".gif", ".bmp", ".webp", ".tiff", ".tif"):
+ return FileType.IMAGE
else:
return FileType.CSV # Default to CSV for .csv and any other extensions
@@ -3112,6 +3114,12 @@ class FileParserBlock(Block):
validate_pdf_file(file_path, file_identifier=file_url_used)
except PDFParsingError as e:
raise InvalidFileType(file_url=file_url_used, file_type=self.file_type, error=str(e))
+ elif self.file_type == FileType.IMAGE:
+ kind = filetype.guess(file_path)
+ if kind is None or not kind.mime.startswith("image/"):
+ raise InvalidFileType(
+ file_url=file_url_used, file_type=self.file_type, error="File is not a valid image"
+ )
async def _parse_csv_file(self, file_path: str) -> list[dict[str, Any]]:
"""Parse CSV/TSV file and return list of dictionaries."""
@@ -3184,6 +3192,27 @@ class FileParserBlock(Block):
except PDFParsingError as e:
raise InvalidFileType(file_url=self.file_url, file_type=self.file_type, error=str(e))
+ async def _parse_image_file(self, file_path: str) -> str:
+ """Parse image file using vision LLM for OCR."""
+ try:
+ with open(file_path, "rb") as f:
+ image_bytes = f.read()
+
+ llm_prompt = prompt_engine.load_prompt("extract-text-from-image")
+ llm_api_handler = LLMAPIHandlerFactory.get_override_llm_api_handler(
+ self.override_llm_key, default=app.LLM_API_HANDLER
+ )
+ llm_response = await llm_api_handler(
+ prompt=llm_prompt,
+ prompt_name="extract-text-from-image",
+ screenshots=[image_bytes],
+ force_dict=True,
+ )
+ return llm_response.get("extracted_text", "")
+ except Exception:
+ LOG.exception("Failed to extract text from image via OCR", file_url=self.file_url)
+ raise
+
async def _extract_with_ai(
self, content: str | list[dict[str, Any]], workflow_run_context: WorkflowRunContext
) -> dict[str, Any]:
@@ -3210,9 +3239,8 @@ class FileParserBlock(Block):
"extract-information-from-file-text", extracted_text_content=content_str, json_schema=schema_to_use
)
- llm_api_handler = LLMAPIHandlerFactory.get_override_llm_api_handler(
- self.override_llm_key, default=app.LLM_API_HANDLER
- )
+ llm_key = self.override_llm_key
+ llm_api_handler = LLMAPIHandlerFactory.get_override_llm_api_handler(llm_key, default=app.LLM_API_HANDLER)
llm_response = await llm_api_handler(
prompt=llm_prompt, prompt_name="extract-information-from-file-text", force_dict=False
@@ -3261,9 +3289,9 @@ class FileParserBlock(Block):
else:
file_path = await download_file(self.file_url)
- # Auto-detect file type based on file extension
- detected_file_type = self._detect_file_type_from_url(self.file_url)
- self.file_type = detected_file_type
+ # Auto-detect file type if not explicitly set (IMAGE/EXCEL/PDF are explicit choices)
+ if self.file_type not in (FileType.IMAGE, FileType.EXCEL, FileType.PDF):
+ self.file_type = self._detect_file_type_from_url(self.file_url)
# Validate the file type
self.validate_file_type(self.file_url, file_path)
@@ -3283,6 +3311,8 @@ class FileParserBlock(Block):
parsed_data = await self._parse_excel_file(file_path)
elif self.file_type == FileType.PDF:
parsed_data = await self._parse_pdf_file(file_path)
+ elif self.file_type == FileType.IMAGE:
+ parsed_data = await self._parse_image_file(file_path)
else:
return await self.build_block_result(
success=False,
diff --git a/skyvern/schemas/workflows.py b/skyvern/schemas/workflows.py
index 8f8faf2e..4b568a17 100644
--- a/skyvern/schemas/workflows.py
+++ b/skyvern/schemas/workflows.py
@@ -67,6 +67,7 @@ class FileType(StrEnum):
CSV = "csv"
EXCEL = "excel"
PDF = "pdf"
+ IMAGE = "image"
class PDFFormat(StrEnum):