diff --git a/server/src/routes/storage.ts b/server/src/routes/storage.ts index 7d50b780..7eb0dd79 100644 --- a/server/src/routes/storage.ts +++ b/server/src/routes/storage.ts @@ -15,6 +15,7 @@ import { encrypt, decrypt } from '../utils/auth'; import { WorkflowFile } from 'maxun-core'; import { cancelScheduledWorkflow, scheduleWorkflow } from '../storage/schedule'; import { pgBossClient } from '../storage/pgboss'; +import { WorkflowEnricher } from '../sdk/workflowEnricher'; export const router = Router(); @@ -518,6 +519,92 @@ router.post('/recordings/scrape', requireSignIn, async (req: AuthenticatedReques } }); +/** + * POST endpoint for creating an LLM-powered extraction robot + */ +router.post('/recordings/llm', requireSignIn, async (req: AuthenticatedRequest, res) => { + try { + const { url, prompt, llmProvider, llmModel, llmApiKey, llmBaseUrl, robotName } = req.body; + + if (!url || !prompt) { + return res.status(400).json({ error: 'Both "url" and "prompt" fields are required.' }); + } + + if (!req.user) { + return res.status(401).send({ error: 'Unauthorized' }); + } + + try { + new URL(url); + } catch (err) { + return res.status(400).json({ error: 'Invalid URL format' }); + } + + logger.log('info', `Starting LLM workflow generation for URL: ${url}`); + + const workflowResult = await WorkflowEnricher.generateWorkflowFromPrompt(url, prompt, req.user.id, { + provider: llmProvider || 'ollama', + model: llmModel, + apiKey: llmApiKey, + baseUrl: llmBaseUrl + }); + + if (!workflowResult.success || !workflowResult.workflow) { + logger.log('error', `Failed to generate workflow: ${JSON.stringify(workflowResult.errors)}`); + return res.status(400).json({ + error: 'Failed to generate workflow from prompt', + details: workflowResult.errors + }); + } + + const robotId = uuid(); + const currentTimestamp = new Date().toISOString(); + const finalRobotName = robotName || `LLM Extract: ${prompt.substring(0, 50)}`; + + const newRobot = await Robot.create({ + id: uuid(), + userId: req.user.id, + recording_meta: { + name: finalRobotName, + id: robotId, + createdAt: currentTimestamp, + updatedAt: currentTimestamp, + pairs: workflowResult.workflow.length, + params: [], + type: 'extract', + url: workflowResult.url || url, + }, + recording: { workflow: workflowResult.workflow }, + google_sheet_email: null, + google_sheet_name: null, + google_sheet_id: null, + google_access_token: null, + google_refresh_token: null, + schedule: null, + }); + + logger.log('info', `LLM robot created with id: ${newRobot.id}`); + capture('maxun-oss-robot-created', { + robot_meta: newRobot.recording_meta, + recording: newRobot.recording, + llm_provider: llmProvider || 'ollama', + }); + + return res.status(201).json({ + message: 'LLM robot created successfully.', + robot: newRobot, + }); + } catch (error) { + if (error instanceof Error) { + logger.log('error', `Error creating LLM robot: ${error.message}`); + return res.status(500).json({ error: error.message }); + } else { + logger.log('error', 'Unknown error creating LLM robot'); + return res.status(500).json({ error: 'An unknown error occurred.' }); + } + } +}); + /** * DELETE endpoint for deleting a recording from the storage. */ @@ -1234,4 +1321,4 @@ export async function recoverOrphanedRuns() { } } -export { processQueuedRuns }; +export { processQueuedRuns }; \ No newline at end of file diff --git a/src/api/storage.ts b/src/api/storage.ts index d2b28d5e..4ac2f01b 100644 --- a/src/api/storage.ts +++ b/src/api/storage.ts @@ -58,12 +58,50 @@ export const createScrapeRobot = async ( } }; +export const createLLMRobot = async ( + url: string, + prompt: string, + llmProvider?: 'anthropic' | 'openai' | 'ollama', + llmModel?: string, + llmApiKey?: string, + llmBaseUrl?: string, + robotName?: string +): Promise => { + try { + const response = await axios.post( + `${apiUrl}/storage/recordings/llm`, + { + url, + prompt, + llmProvider, + llmModel, + llmApiKey, + llmBaseUrl, + robotName, + }, + { + headers: { 'Content-Type': 'application/json' }, + withCredentials: true, + timeout: 300000, + } + ); + + if (response.status === 201) { + return response.data; + } else { + throw new Error('Failed to create LLM robot'); + } + } catch (error: any) { + console.error('Error creating LLM robot:', error); + return null; + } +}; + export const updateRecording = async (id: string, data: { name?: string; limits?: Array<{pairIndex: number, actionIndex: number, argIndex: number, limit: number}>; credentials?: Credentials; targetUrl?: string; - // optional full workflow replacement (useful for action renames) workflow?: any[]; }): Promise => { try { diff --git a/src/components/robot/RecordingsTable.tsx b/src/components/robot/RecordingsTable.tsx index aed9ea74..704a7f97 100644 --- a/src/components/robot/RecordingsTable.tsx +++ b/src/components/robot/RecordingsTable.tsx @@ -81,8 +81,46 @@ interface RecordingsTableProps { handleDuplicateRobot: (id: string, name: string, params: string[]) => void; } +const LoadingRobotRow = memo(({ row, columns }: any) => { + return ( + + {columns.map((column: Column) => { + if (column.id === 'name') { + return ( + + + + + {row.name} (Creating...) + + + + ); + } else if (column.id === 'interpret') { + return ( + + + + ); + } else { + return ( + + - + + ); + } + })} + + ); +}); + // Virtualized row component for efficient rendering const TableRowMemoized = memo(({ row, columns, handlers }: any) => { + // If robot is loading, show loading row + if (row.isLoading) { + return ; + } + return ( {columns.map((column: Column) => { @@ -261,7 +299,9 @@ export const RecordingsTable = ({ id: index, ...recording.recording_meta, content: recording.recording, - parsedDate + parsedDate, + isLoading: recording.isLoading || false, + isOptimistic: recording.isOptimistic || false }; } return null; @@ -552,7 +592,7 @@ export const RecordingsTable = ({ <> - {/* */} + {columns.map((column) => ( ))} - {/* */} + {visibleRows.map((row) => ( { const [tabValue, setTabValue] = useState(0); const [url, setUrl] = useState(''); const [scrapeRobotName, setScrapeRobotName] = useState(''); + const [extractRobotName, setExtractRobotName] = useState(''); const [needsLogin, setNeedsLogin] = useState(false); const [isLoading, setIsLoading] = useState(false); const [isWarningModalOpen, setWarningModalOpen] = useState(false); const [activeBrowserId, setActiveBrowserId] = useState(''); const [outputFormats, setOutputFormats] = useState([]); + const [generationMode, setGenerationMode] = useState<'agent' | 'recorder' | null>(null); + + const [aiPrompt, setAiPrompt] = useState(''); + const [llmProvider, setLlmProvider] = useState<'anthropic' | 'openai' | 'ollama'>('ollama'); + const [llmModel, setLlmModel] = useState('default'); + const [llmApiKey, setLlmApiKey] = useState(''); + const [llmBaseUrl, setLlmBaseUrl] = useState(''); + const [aiRobotName, setAiRobotName] = useState(''); const { state } = React.useContext(AuthContext); const { user } = state; + const { addOptimisticRobot, removeOptimisticRobot, invalidateRecordings, invalidateRuns, addOptimisticRun } = useCacheInvalidation(); const handleTabChange = (event: React.SyntheticEvent, newValue: number) => { setTabValue(newValue); @@ -147,11 +155,6 @@ const RobotCreate: React.FC = () => { navigate('/robots'); }; - - - - - return ( @@ -210,11 +213,9 @@ const RobotCreate: React.FC = () => { - - + - {/* Logo (kept as original) */} { /> - Extract structured data from websites in a few clicks. + Extract structured data from websites using AI or record your own extraction workflow. - - {/* Origin URL Input */} - + setUrl(e.target.value)} + label="Website URL" /> - {/* Checkbox */} - - setNeedsLogin(e.target.checked)} - color="primary" - /> - } - label="This website needs logging in." - /> - + + + Choose How to Build + - {/* Button */} - - + + setGenerationMode('recorder')} + sx={{ + flex: 1, + cursor: 'pointer', + border: '2px solid', + borderColor: generationMode === 'recorder' ? '#ff00c3' : 'divider', + transition: 'all 0.2s', + '&:hover': { + borderColor: '#ff00c3', + } + }} + > + + + + Recorder Mode + + + Record your actions into a workflow. + + + + + setGenerationMode('agent')} + sx={{ + flex: 1, + cursor: 'pointer', + border: '2px solid', + borderColor: generationMode === 'agent' ? '#ff00c3' : 'divider', + transition: 'all 0.2s', + '&:hover': { + borderColor: '#ff00c3', + }, + position: 'relative' + }} + > + + Beta + + + + + + AI Mode + + + Describe the task. It builds it for you. + + + + + + {generationMode === 'agent' && ( + + + setExtractRobotName(e.target.value)} + label="Robot Name" + /> + + + + setAiPrompt(e.target.value)} + label="Extraction Prompt" + /> + + + + + LLM Provider + + + + + Model + + + + + {/* API Key for non-Ollama providers */} + {llmProvider !== 'ollama' && ( + + setLlmApiKey(e.target.value)} + label="API Key (Optional if set in .env)" + /> + + )} + + {llmProvider === 'ollama' && ( + + setLlmBaseUrl(e.target.value)} + label="Ollama Base URL (Optional)" + /> + + )} + + + + )} + + {generationMode === 'recorder' && ( + + + + )} + - - - - First time creating a robot? - - - Get help and learn how to use Maxun effectively. - - - - - {/* YouTube Tutorials */} - - window.open("https://www.youtube.com/@MaxunOSS/videos", "_blank")} - > - - theme.palette.mode === 'light' ? 'rgba(0, 0, 0, 0.54)' : '', - }} - > - - - - - Video Tutorials - - - Watch step-by-step guides - - - - - - - {/* Documentation */} - - window.open("https://docs.maxun.dev", "_blank")} - > - - theme.palette.mode === 'light' ? 'rgba(0, 0, 0, 0.54)' : '', - }} - > -
- - - - Documentation - - - Explore detailed guides - - - - - - - diff --git a/src/components/run/RunsTable.tsx b/src/components/run/RunsTable.tsx index 3729b1ca..65e57049 100644 --- a/src/components/run/RunsTable.tsx +++ b/src/components/run/RunsTable.tsx @@ -1,5 +1,5 @@ import * as React from 'react'; -import { useCallback, useEffect, useMemo, useState } from "react"; +import { useCallback, useEffect, useMemo, useState, useRef } from "react"; import { useTranslation } from 'react-i18next'; import Paper from '@mui/material/Paper'; import Table from '@mui/material/Table'; @@ -13,10 +13,12 @@ import { Accordion, AccordionSummary, AccordionDetails, Typography, Box, TextFie import ExpandMoreIcon from '@mui/icons-material/ExpandMore'; import SearchIcon from '@mui/icons-material/Search'; import { useLocation, useNavigate } from 'react-router-dom'; -import { useGlobalInfoStore, useCachedRuns } from "../../context/globalInfo"; +import { useGlobalInfoStore, useCachedRuns, useCacheInvalidation } from "../../context/globalInfo"; import { RunSettings } from "./RunSettings"; import { CollapsibleRow } from "./ColapsibleRow"; import { ArrowDownward, ArrowUpward, UnfoldMore } from '@mui/icons-material'; +import { io, Socket } from 'socket.io-client'; +import { apiUrl } from '../../apiConfig'; export const columns: readonly Column[] = [ { id: 'runStatus', label: 'Status', minWidth: 80 }, @@ -133,6 +135,9 @@ export const RunsTable: React.FC = ({ const { notify, rerenderRuns, setRerenderRuns } = useGlobalInfoStore(); const { data: rows = [], isLoading: isFetching, error, refetch } = useCachedRuns(); + const { invalidateRuns } = useCacheInvalidation(); + + const activeSocketsRef = useRef>(new Map()); const [searchTerm, setSearchTerm] = useState(''); const [paginationStates, setPaginationStates] = useState({}); @@ -285,6 +290,98 @@ export const RunsTable: React.FC = ({ } }, [rerenderRuns, refetch, setRerenderRuns]); + useEffect(() => { + if (!rows || rows.length === 0) return; + + const activeRuns = rows.filter((row: Data) => + row.status === 'running' && row.browserId && row.browserId.trim() !== '' + ); + + activeRuns.forEach((run: Data) => { + const { browserId, runId: currentRunId, name } = run; + + if (activeSocketsRef.current.has(browserId)) { + return; + } + + console.log(`[RunsTable] Connecting to browser socket: ${browserId} for run: ${currentRunId}`); + + try { + const socket = io(`${apiUrl}/${browserId}`, { + transports: ['websocket'], + rejectUnauthorized: false + }); + + socket.on('connect', () => { + console.log(`[RunsTable] Connected to browser ${browserId}`); + }); + + socket.on('debugMessage', (msg: string) => { + console.log(`[RunsTable] Debug message for ${browserId}:`, msg); + // Optionally update logs in real-time here + }); + + socket.on('run-completed', (data: any) => { + console.log(`[RunsTable] Run completed for ${browserId}:`, data); + + // Invalidate cache to show updated run status + invalidateRuns(); + setRerenderRuns(true); + + // Show notification + if (data.status === 'success') { + notify('success', t('main_page.notifications.interpretation_success', { name: data.robotName || name })); + } else { + notify('error', t('main_page.notifications.interpretation_failed', { name: data.robotName || name })); + } + + socket.disconnect(); + activeSocketsRef.current.delete(browserId); + }); + + socket.on('urlChanged', (url: string) => { + console.log(`[RunsTable] URL changed for ${browserId}:`, url); + }); + + socket.on('dom-snapshot-loading', () => { + console.log(`[RunsTable] DOM snapshot loading for ${browserId}`); + }); + + socket.on('connect_error', (error: Error) => { + console.error(`[RunsTable] Connection error for browser ${browserId}:`, error.message); + }); + + socket.on('disconnect', (reason: string) => { + console.log(`[RunsTable] Disconnected from browser ${browserId}:`, reason); + activeSocketsRef.current.delete(browserId); + }); + + activeSocketsRef.current.set(browserId, socket); + } catch (error) { + console.error(`[RunsTable] Error connecting to browser ${browserId}:`, error); + } + }); + + // Disconnect from sockets for runs that are no longer active + const activeBrowserIds = new Set(activeRuns.map((run: Data) => run.browserId)); + activeSocketsRef.current.forEach((socket, browserId) => { + if (!activeBrowserIds.has(browserId)) { + console.log(`[RunsTable] Disconnecting from inactive browser: ${browserId}`); + socket.disconnect(); + activeSocketsRef.current.delete(browserId); + } + }); + + // Cleanup on unmount + return () => { + console.log('[RunsTable] Cleaning up all socket connections'); + activeSocketsRef.current.forEach((socket) => { + socket.disconnect(); + }); + activeSocketsRef.current.clear(); + }; + }, [rows, notify, t, invalidateRuns, setRerenderRuns]); + const handleDelete = useCallback(() => { notify('success', t('runstable.notifications.delete_success')); refetch(); diff --git a/src/context/globalInfo.tsx b/src/context/globalInfo.tsx index 3dddab1c..c5bbc044 100644 --- a/src/context/globalInfo.tsx +++ b/src/context/globalInfo.tsx @@ -204,31 +204,47 @@ export const useCachedRuns = () => { export const useCacheInvalidation = () => { const queryClient = useQueryClient(); - + const invalidateRuns = () => { queryClient.invalidateQueries({ queryKey: dataCacheKeys.runs }); }; - + const invalidateRecordings = () => { queryClient.invalidateQueries({ queryKey: dataCacheKeys.recordings }); }; - + const addOptimisticRun = (newRun: any) => { queryClient.setQueryData(dataCacheKeys.runs, (oldData: any) => { if (!oldData) return [{ id: 0, ...newRun }]; return [{ id: oldData.length, ...newRun }, ...oldData]; }); }; - + + const addOptimisticRobot = (newRobot: any) => { + queryClient.setQueryData(dataCacheKeys.recordings, (oldData: any) => { + if (!oldData) return [newRobot]; + return [newRobot, ...oldData]; + }); + }; + + const removeOptimisticRobot = (tempId: string) => { + queryClient.setQueryData(dataCacheKeys.recordings, (oldData: any) => { + if (!oldData) return []; + return oldData.filter((robot: any) => robot.id !== tempId); + }); + }; + const invalidateAllCache = () => { invalidateRuns(); invalidateRecordings(); }; - + return { invalidateRuns, - invalidateRecordings, + invalidateRecordings, addOptimisticRun, + addOptimisticRobot, + removeOptimisticRobot, invalidateAllCache }; }; diff --git a/src/pages/MainPage.tsx b/src/pages/MainPage.tsx index 4f302135..004785f7 100644 --- a/src/pages/MainPage.tsx +++ b/src/pages/MainPage.tsx @@ -53,6 +53,10 @@ export const MainPage = ({ handleEditRecording, initialContent }: MainPageProps) const { invalidateRuns, addOptimisticRun } = useCacheInvalidation(); const navigate = useNavigate(); + React.useEffect(() => { + setContent(initialContent); + }, [initialContent]); + const { state } = useContext(AuthContext); const { user } = state;