Making file parser flexible to deprecate pdf parser (#3073)
Co-authored-by: Suchintan <suchintan@users.noreply.github.com>
This commit is contained in:
@@ -10,6 +10,8 @@ import { useDebugStore } from "@/store/useDebugStore";
|
||||
import { cn } from "@/util/utils";
|
||||
import { NodeHeader } from "../components/NodeHeader";
|
||||
import { useParams } from "react-router-dom";
|
||||
import { WorkflowDataSchemaInputGroup } from "@/components/DataSchemaInputGroup/WorkflowDataSchemaInputGroup";
|
||||
import { dataSchemaExampleForFileExtraction } from "../types";
|
||||
|
||||
function FileParserNode({ id, data }: NodeProps<FileParserNode>) {
|
||||
const { updateNodeData } = useReactFlow();
|
||||
@@ -21,8 +23,17 @@ function FileParserNode({ id, data }: NodeProps<FileParserNode>) {
|
||||
urlBlockLabel !== undefined && urlBlockLabel === label;
|
||||
const [inputs, setInputs] = useState({
|
||||
fileUrl: data.fileUrl,
|
||||
jsonSchema: data.jsonSchema,
|
||||
});
|
||||
|
||||
function handleChange(key: string, value: unknown) {
|
||||
if (!data.editable) {
|
||||
return;
|
||||
}
|
||||
setInputs({ ...inputs, [key]: value });
|
||||
updateNodeData(id, { [key]: value });
|
||||
}
|
||||
|
||||
const isFirstWorkflowBlock = useIsFirstBlockInWorkflow({ id });
|
||||
|
||||
return (
|
||||
@@ -75,15 +86,19 @@ function FileParserNode({ id, data }: NodeProps<FileParserNode>) {
|
||||
nodeId={id}
|
||||
value={inputs.fileUrl}
|
||||
onChange={(value) => {
|
||||
if (!data.editable) {
|
||||
return;
|
||||
}
|
||||
setInputs({ ...inputs, fileUrl: value });
|
||||
updateNodeData(id, { fileUrl: value });
|
||||
handleChange("fileUrl", value);
|
||||
}}
|
||||
className="nopan text-xs"
|
||||
/>
|
||||
</div>
|
||||
<WorkflowDataSchemaInputGroup
|
||||
exampleValue={dataSchemaExampleForFileExtraction}
|
||||
value={inputs.jsonSchema}
|
||||
onChange={(value) => {
|
||||
handleChange("jsonSchema", value);
|
||||
}}
|
||||
suggestionContext={{}}
|
||||
/>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -1,9 +1,11 @@
|
||||
import type { Node } from "@xyflow/react";
|
||||
import { NodeBaseData } from "../types";
|
||||
import { AppNode } from "..";
|
||||
import { debuggableWorkflowBlockTypes } from "@/routes/workflows/types/workflowTypes";
|
||||
|
||||
export type FileParserNodeData = NodeBaseData & {
|
||||
fileUrl: string;
|
||||
jsonSchema: string;
|
||||
};
|
||||
|
||||
export type FileParserNode = Node<FileParserNodeData, "fileParser">;
|
||||
@@ -14,5 +16,10 @@ export const fileParserNodeDefaultData: FileParserNodeData = {
|
||||
label: "",
|
||||
fileUrl: "",
|
||||
continueOnFailure: false,
|
||||
jsonSchema: "null",
|
||||
model: null,
|
||||
} as const;
|
||||
|
||||
export function isFileParserNode(node: AppNode): node is FileParserNode {
|
||||
return node.type === "fileParser";
|
||||
}
|
||||
|
||||
@@ -162,20 +162,19 @@ const nodeLibraryItems: Array<{
|
||||
/>
|
||||
),
|
||||
title: "File Parser Block",
|
||||
description: "Parse data from files",
|
||||
},
|
||||
{
|
||||
nodeType: "pdfParser",
|
||||
icon: (
|
||||
<WorkflowBlockIcon
|
||||
workflowBlockType={WorkflowBlockTypes.PDFParser}
|
||||
className="size-6"
|
||||
/>
|
||||
),
|
||||
title: "PDF Parser Block",
|
||||
description: "Extract data from PDF files",
|
||||
description: "Parse PDFs, CSVs, and Excel files",
|
||||
},
|
||||
// {
|
||||
// nodeType: "pdfParser",
|
||||
// icon: (
|
||||
// <WorkflowBlockIcon
|
||||
// workflowBlockType={WorkflowBlockTypes.PDFParser}
|
||||
// className="size-6"
|
||||
// />
|
||||
// ),
|
||||
// title: "PDF Parser Block",
|
||||
// description: "Extract data from PDF files",
|
||||
// },
|
||||
// nodeType: "upload",
|
||||
// icon: (
|
||||
// <WorkflowBlockIcon
|
||||
|
||||
@@ -56,7 +56,10 @@ import { ParametersState } from "./types";
|
||||
import { AppNode, isWorkflowBlockNode, WorkflowBlockNode } from "./nodes";
|
||||
import { codeBlockNodeDefaultData } from "./nodes/CodeBlockNode/types";
|
||||
import { downloadNodeDefaultData } from "./nodes/DownloadNode/types";
|
||||
import { fileParserNodeDefaultData } from "./nodes/FileParserNode/types";
|
||||
import {
|
||||
isFileParserNode,
|
||||
fileParserNodeDefaultData,
|
||||
} from "./nodes/FileParserNode/types";
|
||||
import {
|
||||
isLoopNode,
|
||||
LoopNode,
|
||||
@@ -468,6 +471,7 @@ function convertToNode(
|
||||
data: {
|
||||
...commonData,
|
||||
fileUrl: block.file_url,
|
||||
jsonSchema: JSON.stringify(block.json_schema, null, 2),
|
||||
},
|
||||
};
|
||||
}
|
||||
@@ -1254,7 +1258,8 @@ function getWorkflowBlock(node: WorkflowBlockNode): BlockYAML {
|
||||
...base,
|
||||
block_type: "file_url_parser",
|
||||
file_url: node.data.fileUrl,
|
||||
file_type: "csv",
|
||||
file_type: "csv", // Backend will auto-detect based on file extension
|
||||
json_schema: JSONParseSafe(node.data.jsonSchema),
|
||||
};
|
||||
}
|
||||
case "textPrompt": {
|
||||
@@ -2187,6 +2192,15 @@ function getWorkflowErrors(nodes: Array<AppNode>): Array<string> {
|
||||
}
|
||||
});
|
||||
|
||||
const fileParserNodes = nodes.filter(isFileParserNode);
|
||||
fileParserNodes.forEach((node) => {
|
||||
try {
|
||||
JSON.parse(node.data.jsonSchema);
|
||||
} catch {
|
||||
errors.push(`${node.data.label}: Data schema is not valid JSON.`);
|
||||
}
|
||||
});
|
||||
|
||||
const waitNodes = nodes.filter(isWaitNode);
|
||||
waitNodes.forEach((node) => {
|
||||
const waitTimeString = node.data.waitInSeconds.trim();
|
||||
|
||||
@@ -354,7 +354,8 @@ export type SendEmailBlock = WorkflowBlockBase & {
|
||||
export type FileURLParserBlock = WorkflowBlockBase & {
|
||||
block_type: "file_url_parser";
|
||||
file_url: string;
|
||||
file_type: "csv";
|
||||
file_type: "csv" | "excel" | "pdf";
|
||||
json_schema: Record<string, unknown> | null;
|
||||
};
|
||||
|
||||
export type ValidationBlock = WorkflowBlockBase & {
|
||||
|
||||
@@ -308,7 +308,8 @@ export type SendEmailBlockYAML = BlockYAMLBase & {
|
||||
export type FileUrlParserBlockYAML = BlockYAMLBase & {
|
||||
block_type: "file_url_parser";
|
||||
file_url: string;
|
||||
file_type: "csv";
|
||||
file_type: "csv" | "excel" | "pdf";
|
||||
json_schema?: Record<string, unknown> | null;
|
||||
};
|
||||
|
||||
export type ForLoopBlockYAML = BlockYAMLBase & {
|
||||
|
||||
Reference in New Issue
Block a user