Revert "Move Run Workflow button to top of page" (#4614)

2026-02-02 23:44:04 -08:00
parent ebe43e12b1
commit 1fcd659c38
9 changed files with 136 additions and 86 deletions
--- a/skyvern-frontend/src/routes/workflows/RunWorkflowForm.tsx
+++ b/skyvern-frontend/src/routes/workflows/RunWorkflowForm.tsx
@@ -504,60 +504,6 @@ function RunWorkflowForm({
        onSubmit={form.handleSubmit(onSubmit, handleInvalid)}
        className="space-y-8"
      >
-        <header className="flex items-end justify-between gap-4">
-          <div className="space-y-5">
-            <h1 className="text-3xl">
-              Parameters{workflow?.title ? ` - ${workflow.title}` : ""}
-            </h1>
-            <h2 className="text-lg text-slate-400">
-              Fill the placeholder values that you have linked throughout your
-              workflow.
-            </h2>
-          </div>
-          <div className="flex shrink-0 gap-2">
-            <CopyApiCommandDropdown
-              getOptions={() => {
-                const values = form.getValues();
-                const body = getRunWorkflowRequestBody(
-                  values,
-                  workflowParameters,
-                );
-                const transformedBody = transformToWorkflowRunRequest(
-                  body,
-                  workflowPermanentId,
-                );
-
-                // Build headers - x-max-steps-override is optional and can be added manually if needed
-                const headers: Record<string, string> = {
-                  "Content-Type": "application/json",
-                  "x-api-key": apiCredential ?? "<your-api-key>",
-                };
-
-                return {
-                  method: "POST",
-                  url: `${runsApiBaseUrl}/run/workflows`,
-                  body: transformedBody,
-                  headers,
-                } satisfies ApiCommandOptions;
-              }}
-            />
-            <Button
-              type="submit"
-              disabled={
-                runWorkflowMutation.isPending || hasLoginBlockValidationError
-              }
-            >
-              {runWorkflowMutation.isPending && (
-                <ReloadIcon className="mr-2 h-4 w-4 animate-spin" />
-              )}
-              {!runWorkflowMutation.isPending && (
-                <PlayIcon className="mr-2 h-4 w-4" />
-              )}
-              Run workflow
-            </Button>
-          </div>
-        </header>
-
        {hasLoginBlockValidationError && (
          <Alert variant="destructive">
            <ExclamationTriangleIcon className="h-4 w-4" />
@@ -1105,6 +1051,49 @@ function RunWorkflowForm({
            </AccordionItem>
          </Accordion>
        </div>
+
+        <div className="flex justify-end gap-2">
+          <CopyApiCommandDropdown
+            getOptions={() => {
+              const values = form.getValues();
+              const body = getRunWorkflowRequestBody(
+                values,
+                workflowParameters,
+              );
+              const transformedBody = transformToWorkflowRunRequest(
+                body,
+                workflowPermanentId,
+              );
+
+              // Build headers - x-max-steps-override is optional and can be added manually if needed
+              const headers: Record<string, string> = {
+                "Content-Type": "application/json",
+                "x-api-key": apiCredential ?? "<your-api-key>",
+              };
+
+              return {
+                method: "POST",
+                url: `${runsApiBaseUrl}/run/workflows`,
+                body: transformedBody,
+                headers,
+              } satisfies ApiCommandOptions;
+            }}
+          />
+          <Button
+            type="submit"
+            disabled={
+              runWorkflowMutation.isPending || hasLoginBlockValidationError
+            }
+          >
+            {runWorkflowMutation.isPending && (
+              <ReloadIcon className="mr-2 h-4 w-4 animate-spin" />
+            )}
+            {!runWorkflowMutation.isPending && (
+              <PlayIcon className="mr-2 h-4 w-4" />
+            )}
+            Run workflow
+          </Button>
+        </div>
      </form>
    </Form>
  );
--- a/skyvern-frontend/src/routes/workflows/WorkflowRunParameters.tsx
+++ b/skyvern-frontend/src/routes/workflows/WorkflowRunParameters.tsx
@@ -44,16 +44,22 @@ function WorkflowRunParameters() {

  const initialValues = getInitialValues(location, workflowParameters ?? []);

+  const header = (
+    <header className="space-y-5">
+      <h1 className="text-3xl">
+        Parameters{workflow?.title ? ` - ${workflow.title}` : ""}
+      </h1>
+      <h2 className="text-lg text-slate-400">
+        Fill the placeholder values that you have linked throughout your
+        workflow.
+      </h2>
+    </header>
+  );
+
  if (isFetching) {
    return (
      <div className="space-y-8">
-        <header className="space-y-5">
-          <h1 className="text-3xl">Parameters</h1>
-          <h2 className="text-lg text-slate-400">
-            Fill the placeholder values that you have linked throughout your
-            workflow.
-          </h2>
-        </header>
+        {header}
        <Skeleton className="h-96 w-full" />
      </div>
    );
@@ -64,21 +70,26 @@ function WorkflowRunParameters() {
  }

  return (
-    <RunWorkflowForm
-      initialValues={initialValues}
-      workflowParameters={workflowParameters}
-      initialSettings={{
-        proxyLocation:
-          proxyLocation ?? workflow.proxy_location ?? ProxyLocation.Residential,
-        webhookCallbackUrl:
-          webhookCallbackUrl ?? workflow.webhook_callback_url ?? "",
-        maxScreenshotScrolls:
-          maxScreenshotScrolls ?? workflow.max_screenshot_scrolls ?? null,
-        extraHttpHeaders:
-          extraHttpHeaders ?? workflow.extra_http_headers ?? null,
-        cdpAddress: null,
-      }}
-    />
+    <div className="space-y-8">
+      {header}
+      <RunWorkflowForm
+        initialValues={initialValues}
+        workflowParameters={workflowParameters}
+        initialSettings={{
+          proxyLocation:
+            proxyLocation ??
+            workflow.proxy_location ??
+            ProxyLocation.Residential,
+          webhookCallbackUrl:
+            webhookCallbackUrl ?? workflow.webhook_callback_url ?? "",
+          maxScreenshotScrolls:
+            maxScreenshotScrolls ?? workflow.max_screenshot_scrolls ?? null,
+          extraHttpHeaders:
+            extraHttpHeaders ?? workflow.extra_http_headers ?? null,
+          cdpAddress: null,
+        }}
+      />
+    </div>
  );
 }

--- a/skyvern-frontend/src/routes/workflows/editor/panels/WorkflowNodeLibraryPanel.tsx
+++ b/skyvern-frontend/src/routes/workflows/editor/panels/WorkflowNodeLibraryPanel.tsx
@@ -188,7 +188,7 @@ const nodeLibraryItems: Array<{
      />
    ),
    title: "File Parser Block",
-    description: "Parse PDFs, CSVs, and Excel files",
+    description: "Parse PDFs, CSVs, Excel files, and Images",
  },
  // {
  //   nodeType: "pdfParser",
--- a/skyvern-frontend/src/routes/workflows/types/workflowTypes.ts
+++ b/skyvern-frontend/src/routes/workflows/types/workflowTypes.ts
@@ -415,7 +415,7 @@ export type SendEmailBlock = WorkflowBlockBase & {
 export type FileURLParserBlock = WorkflowBlockBase & {
  block_type: "file_url_parser";
  file_url: string;
-  file_type: "csv" | "excel" | "pdf";
+  file_type: "csv" | "excel" | "pdf" | "image";
  json_schema: Record<string, unknown> | null;
 };

--- a/skyvern-frontend/src/routes/workflows/types/workflowYamlTypes.ts
+++ b/skyvern-frontend/src/routes/workflows/types/workflowYamlTypes.ts
@@ -350,7 +350,7 @@ export type SendEmailBlockYAML = BlockYAMLBase & {
 export type FileUrlParserBlockYAML = BlockYAMLBase & {
  block_type: "file_url_parser";
  file_url: string;
-  file_type: "csv" | "excel" | "pdf";
+  file_type: "csv" | "excel" | "pdf" | "image";
  json_schema?: Record<string, unknown> | null;
 };

--- a/skyvern/client/types/file_type.py
+++ b/skyvern/client/types/file_type.py
@@ -2,4 +2,4 @@

 import typing

-FileType = typing.Union[typing.Literal["csv", "excel", "pdf"], typing.Any]
+FileType = typing.Union[typing.Literal["csv", "excel", "pdf", "image"], typing.Any]
--- a/skyvern/forge/prompts/skyvern/extract-text-from-image.j2
+++ b/skyvern/forge/prompts/skyvern/extract-text-from-image.j2
@@ -0,0 +1,19 @@
+Extract all visible text from this image.
+
+MAKE SURE YOU OUTPUT VALID JSON. No text before or after JSON, no trailing commas, no comments, no unnecessary quotes.
+
+Reply in JSON format with the following keys:
+{
+    "extracted_text": str // All text extracted from the image
+}
+
+TEXT EXTRACTION GUIDELINES:
+- Preserve reading order (top to bottom, left to right)
+- For tables: format as rows separated by newlines, columns separated by " | "
+- For multi-column layouts: extract each column separately, separated by blank lines
+- For forms: format as "Label: Value" on each line
+- Preserve line breaks where they appear meaningful (paragraphs, list items)
+- Include all visible text: headers, body text, labels, captions, watermarks
+- For handwritten text: do your best to transcribe, use [illegible] for unclear parts
+
+If no text is visible in the image, return an empty string for extracted_text.
--- a/skyvern/forge/sdk/workflow/models/block.py
+++ b/skyvern/forge/sdk/workflow/models/block.py
@@ -3063,6 +3063,8 @@ class FileParserBlock(Block):
            return FileType.PDF
        elif suffix == ".tsv":
            return FileType.CSV  # TSV files are handled by the CSV parser
+        elif suffix in (".png", ".jpg", ".jpeg", ".gif", ".bmp", ".webp", ".tiff", ".tif"):
+            return FileType.IMAGE
        else:
            return FileType.CSV  # Default to CSV for .csv and any other extensions

@@ -3112,6 +3114,12 @@ class FileParserBlock(Block):
                validate_pdf_file(file_path, file_identifier=file_url_used)
            except PDFParsingError as e:
                raise InvalidFileType(file_url=file_url_used, file_type=self.file_type, error=str(e))
+        elif self.file_type == FileType.IMAGE:
+            kind = filetype.guess(file_path)
+            if kind is None or not kind.mime.startswith("image/"):
+                raise InvalidFileType(
+                    file_url=file_url_used, file_type=self.file_type, error="File is not a valid image"
+                )

    async def _parse_csv_file(self, file_path: str) -> list[dict[str, Any]]:
        """Parse CSV/TSV file and return list of dictionaries."""
@@ -3184,6 +3192,27 @@ class FileParserBlock(Block):
        except PDFParsingError as e:
            raise InvalidFileType(file_url=self.file_url, file_type=self.file_type, error=str(e))

+    async def _parse_image_file(self, file_path: str) -> str:
+        """Parse image file using vision LLM for OCR."""
+        try:
+            with open(file_path, "rb") as f:
+                image_bytes = f.read()
+
+            llm_prompt = prompt_engine.load_prompt("extract-text-from-image")
+            llm_api_handler = LLMAPIHandlerFactory.get_override_llm_api_handler(
+                self.override_llm_key, default=app.LLM_API_HANDLER
+            )
+            llm_response = await llm_api_handler(
+                prompt=llm_prompt,
+                prompt_name="extract-text-from-image",
+                screenshots=[image_bytes],
+                force_dict=True,
+            )
+            return llm_response.get("extracted_text", "")
+        except Exception:
+            LOG.exception("Failed to extract text from image via OCR", file_url=self.file_url)
+            raise
+
    async def _extract_with_ai(
        self, content: str | list[dict[str, Any]], workflow_run_context: WorkflowRunContext
    ) -> dict[str, Any]:
@@ -3210,9 +3239,8 @@ class FileParserBlock(Block):
            "extract-information-from-file-text", extracted_text_content=content_str, json_schema=schema_to_use
        )

-        llm_api_handler = LLMAPIHandlerFactory.get_override_llm_api_handler(
-            self.override_llm_key, default=app.LLM_API_HANDLER
-        )
+        llm_key = self.override_llm_key
+        llm_api_handler = LLMAPIHandlerFactory.get_override_llm_api_handler(llm_key, default=app.LLM_API_HANDLER)

        llm_response = await llm_api_handler(
            prompt=llm_prompt, prompt_name="extract-information-from-file-text", force_dict=False
@@ -3261,9 +3289,9 @@ class FileParserBlock(Block):
        else:
            file_path = await download_file(self.file_url)

-        # Auto-detect file type based on file extension
-        detected_file_type = self._detect_file_type_from_url(self.file_url)
-        self.file_type = detected_file_type
+        # Auto-detect file type if not explicitly set (IMAGE/EXCEL/PDF are explicit choices)
+        if self.file_type not in (FileType.IMAGE, FileType.EXCEL, FileType.PDF):
+            self.file_type = self._detect_file_type_from_url(self.file_url)

        # Validate the file type
        self.validate_file_type(self.file_url, file_path)
@@ -3283,6 +3311,8 @@ class FileParserBlock(Block):
            parsed_data = await self._parse_excel_file(file_path)
        elif self.file_type == FileType.PDF:
            parsed_data = await self._parse_pdf_file(file_path)
+        elif self.file_type == FileType.IMAGE:
+            parsed_data = await self._parse_image_file(file_path)
        else:
            return await self.build_block_result(
                success=False,
--- a/skyvern/schemas/workflows.py
+++ b/skyvern/schemas/workflows.py
@@ -67,6 +67,7 @@ class FileType(StrEnum):
    CSV = "csv"
    EXCEL = "excel"
    PDF = "pdf"
+    IMAGE = "image"


 class PDFFormat(StrEnum):