Move Run Workflow button to top of page (#4611)

Co-authored-by: Suchintan Singh <suchintan@skyvern.com> Co-authored-by: claude[bot] <41898282+claude[bot]@users.noreply.github.com>
2026-02-02 23:38:34 -08:00
parent fcbb3daddd
commit ebe43e12b1
9 changed files with 86 additions and 136 deletions
--- a/skyvern/forge/prompts/skyvern/extract-text-from-image.j2
+++ b/skyvern/forge/prompts/skyvern/extract-text-from-image.j2
@@ -1,19 +0,0 @@
-Extract all visible text from this image.
-
-MAKE SURE YOU OUTPUT VALID JSON. No text before or after JSON, no trailing commas, no comments, no unnecessary quotes.
-
-Reply in JSON format with the following keys:
-{
-    "extracted_text": str // All text extracted from the image
-}
-
-TEXT EXTRACTION GUIDELINES:
- Preserve reading order (top to bottom, left to right)
- For tables: format as rows separated by newlines, columns separated by " | "
- For multi-column layouts: extract each column separately, separated by blank lines
- For forms: format as "Label: Value" on each line
- Preserve line breaks where they appear meaningful (paragraphs, list items)
- Include all visible text: headers, body text, labels, captions, watermarks
- For handwritten text: do your best to transcribe, use [illegible] for unclear parts
-
-If no text is visible in the image, return an empty string for extracted_text.
--- a/skyvern/forge/sdk/workflow/models/block.py
+++ b/skyvern/forge/sdk/workflow/models/block.py
@@ -3063,8 +3063,6 @@ class FileParserBlock(Block):
            return FileType.PDF
        elif suffix == ".tsv":
            return FileType.CSV  # TSV files are handled by the CSV parser
-        elif suffix in (".png", ".jpg", ".jpeg", ".gif", ".bmp", ".webp", ".tiff", ".tif"):
-            return FileType.IMAGE
        else:
            return FileType.CSV  # Default to CSV for .csv and any other extensions

@@ -3114,12 +3112,6 @@ class FileParserBlock(Block):
                validate_pdf_file(file_path, file_identifier=file_url_used)
            except PDFParsingError as e:
                raise InvalidFileType(file_url=file_url_used, file_type=self.file_type, error=str(e))
-        elif self.file_type == FileType.IMAGE:
-            kind = filetype.guess(file_path)
-            if kind is None or not kind.mime.startswith("image/"):
-                raise InvalidFileType(
-                    file_url=file_url_used, file_type=self.file_type, error="File is not a valid image"
-                )

    async def _parse_csv_file(self, file_path: str) -> list[dict[str, Any]]:
        """Parse CSV/TSV file and return list of dictionaries."""
@@ -3192,27 +3184,6 @@ class FileParserBlock(Block):
        except PDFParsingError as e:
            raise InvalidFileType(file_url=self.file_url, file_type=self.file_type, error=str(e))

-    async def _parse_image_file(self, file_path: str) -> str:
-        """Parse image file using vision LLM for OCR."""
-        try:
-            with open(file_path, "rb") as f:
-                image_bytes = f.read()
-
-            llm_prompt = prompt_engine.load_prompt("extract-text-from-image")
-            llm_api_handler = LLMAPIHandlerFactory.get_override_llm_api_handler(
-                self.override_llm_key, default=app.LLM_API_HANDLER
-            )
-            llm_response = await llm_api_handler(
-                prompt=llm_prompt,
-                prompt_name="extract-text-from-image",
-                screenshots=[image_bytes],
-                force_dict=True,
-            )
-            return llm_response.get("extracted_text", "")
-        except Exception:
-            LOG.exception("Failed to extract text from image via OCR", file_url=self.file_url)
-            raise
-
    async def _extract_with_ai(
        self, content: str | list[dict[str, Any]], workflow_run_context: WorkflowRunContext
    ) -> dict[str, Any]:
@@ -3239,8 +3210,9 @@ class FileParserBlock(Block):
            "extract-information-from-file-text", extracted_text_content=content_str, json_schema=schema_to_use
        )

-        llm_key = self.override_llm_key
-        llm_api_handler = LLMAPIHandlerFactory.get_override_llm_api_handler(llm_key, default=app.LLM_API_HANDLER)
+        llm_api_handler = LLMAPIHandlerFactory.get_override_llm_api_handler(
+            self.override_llm_key, default=app.LLM_API_HANDLER
+        )

        llm_response = await llm_api_handler(
            prompt=llm_prompt, prompt_name="extract-information-from-file-text", force_dict=False
@@ -3289,9 +3261,9 @@ class FileParserBlock(Block):
        else:
            file_path = await download_file(self.file_url)

-        # Auto-detect file type if not explicitly set (IMAGE/EXCEL/PDF are explicit choices)
-        if self.file_type not in (FileType.IMAGE, FileType.EXCEL, FileType.PDF):
-            self.file_type = self._detect_file_type_from_url(self.file_url)
+        # Auto-detect file type based on file extension
+        detected_file_type = self._detect_file_type_from_url(self.file_url)
+        self.file_type = detected_file_type

        # Validate the file type
        self.validate_file_type(self.file_url, file_path)
@@ -3311,8 +3283,6 @@ class FileParserBlock(Block):
            parsed_data = await self._parse_excel_file(file_path)
        elif self.file_type == FileType.PDF:
            parsed_data = await self._parse_pdf_file(file_path)
-        elif self.file_type == FileType.IMAGE:
-            parsed_data = await self._parse_image_file(file_path)
        else:
            return await self.build_block_result(
                success=False,