From 1034eca20044be9a0760c4c73b7a7f8b1ed8a8d4 Mon Sep 17 00:00:00 2001 From: Rohit Date: Thu, 12 Jun 2025 12:50:22 +0530 Subject: [PATCH 01/12] feat: add mcp server --- server/src/mcp/index.ts | 623 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 623 insertions(+) create mode 100644 server/src/mcp/index.ts diff --git a/server/src/mcp/index.ts b/server/src/mcp/index.ts new file mode 100644 index 00000000..7985dce7 --- /dev/null +++ b/server/src/mcp/index.ts @@ -0,0 +1,623 @@ +// mcp-server/index.ts +import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js"; +import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; +import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js"; +import { z } from "zod"; +import fetch from 'node-fetch'; +import logger from '../logger'; + +// Configuration for the MCP server +interface MaxunMCPConfig { + name: string; + version: string; + maxunApiUrl: string; + apiKey: string; + transport: 'stdio' | 'http'; + httpPort?: number; +} + +class MaxunMCPServer { + private mcpServer: McpServer; + private config: MaxunMCPConfig; + + constructor(config: MaxunMCPConfig) { + this.config = config; + this.mcpServer = new McpServer({ + name: config.name, + version: config.version + }); + + this.setupTools(); + this.setupResources(); + this.setupPrompts(); + } + + private async makeApiRequest(endpoint: string, options: any = {}) { + const url = `${this.config.maxunApiUrl}${endpoint}`; + const headers = { + 'Content-Type': 'application/json', + 'x-api-key': this.config.apiKey, + ...options.headers + }; + + const response = await fetch(url, { + ...options, + headers + }); + + if (!response.ok) { + throw new Error(`API request failed: ${response.status} ${response.statusText}`); + } + + return await response.json(); + } + + private setupTools() { + // Tool: List all robots + this.mcpServer.tool( + "list_robots", + {}, + async () => { + try { + const data = await this.makeApiRequest('/api/robots'); + + return { + content: [{ + type: "text", + text: `Found ${data.robots.totalCount} robots:\n\n${JSON.stringify(data.robots.items, null, 2)}` + }] + }; + } catch (error: any) { + return { + content: [{ + type: "text", + text: `Error fetching robots: ${error.message}` + }], + isError: true + }; + } + } + ); + + // Tool: Get robot details by ID + this.mcpServer.tool( + "get_robot", + { + robot_id: z.string().describe("ID of the robot to get details for") + }, + async ({ robot_id }) => { + try { + const data = await this.makeApiRequest(`/api/robots/${robot_id}`); + + return { + content: [{ + type: "text", + text: `Robot Details:\n\n${JSON.stringify(data.robot, null, 2)}` + }] + }; + } catch (error: any) { + return { + content: [{ + type: "text", + text: `Error fetching robot: ${error.message}` + }], + isError: true + }; + } + } + ); + + // Tool: Run a robot and get results + this.mcpServer.tool( + "run_robot", + { + robot_id: z.string().describe("ID of the robot to run"), + wait_for_completion: z.boolean().default(true).describe("Whether to wait for the run to complete") + }, + async ({ robot_id, wait_for_completion }) => { + try { + const data = await this.makeApiRequest(`/api/robots/${robot_id}/runs`, { + method: 'POST' + }); + + if (wait_for_completion) { + // The API already waits for completion and returns the complete run data + const extractedData = data.run.data; + const screenshots = data.run.screenshots; + + let resultText = `Robot run completed successfully!\n\n`; + resultText += `Run ID: ${data.run.runId}\n`; + resultText += `Status: ${data.run.status}\n`; + resultText += `Started: ${data.run.startedAt}\n`; + resultText += `Finished: ${data.run.finishedAt}\n\n`; + + if (extractedData.textData && extractedData.textData.length > 0) { + resultText += `Extracted Text Data (${extractedData.textData.length} items):\n`; + resultText += JSON.stringify(extractedData.textData, null, 2) + '\n\n'; + } + + if (extractedData.listData && extractedData.listData.length > 0) { + resultText += `Extracted List Data (${extractedData.listData.length} items):\n`; + resultText += JSON.stringify(extractedData.listData, null, 2) + '\n\n'; + } + + if (screenshots && screenshots.length > 0) { + resultText += `Screenshots captured: ${screenshots.length}\n`; + resultText += `Screenshot URLs:\n`; + screenshots.forEach((screenshot: any, index: any) => { + resultText += `${index + 1}. ${screenshot}\n`; + }); + } + + return { + content: [{ + type: "text", + text: resultText + }] + }; + } else { + return { + content: [{ + type: "text", + text: `Robot run started! Run ID: ${data.run.runId}\nStatus: ${data.run.status}` + }] + }; + } + } catch (error: any) { + return { + content: [{ + type: "text", + text: `Error running robot: ${error.message}` + }], + isError: true + }; + } + } + ); + + // Tool: Get all runs for a robot + this.mcpServer.tool( + "get_robot_runs", + { + robot_id: z.string().describe("ID of the robot") + }, + async ({ robot_id }) => { + try { + const data = await this.makeApiRequest(`/api/robots/${robot_id}/runs`); + + return { + content: [{ + type: "text", + text: `Robot runs (${data.runs.totalCount} total):\n\n${JSON.stringify(data.runs.items, null, 2)}` + }] + }; + } catch (error: any) { + return { + content: [{ + type: "text", + text: `Error fetching runs: ${error.message}` + }], + isError: true + }; + } + } + ); + + // Tool: Get specific run details + this.mcpServer.tool( + "get_run_details", + { + robot_id: z.string().describe("ID of the robot"), + run_id: z.string().describe("ID of the specific run") + }, + async ({ robot_id, run_id }) => { + try { + const data = await this.makeApiRequest(`/api/robots/${robot_id}/runs/${run_id}`); + + const run = data.run; + let resultText = `Run Details:\n\n`; + resultText += `Run ID: ${run.runId}\n`; + resultText += `Status: ${run.status}\n`; + resultText += `Robot ID: ${run.robotId}\n`; + resultText += `Started: ${run.startedAt}\n`; + resultText += `Finished: ${run.finishedAt}\n\n`; + + if (run.data.textData && run.data.textData.length > 0) { + resultText += `Extracted Text Data:\n${JSON.stringify(run.data.textData, null, 2)}\n\n`; + } + + if (run.data.listData && run.data.listData.length > 0) { + resultText += `Extracted List Data:\n${JSON.stringify(run.data.listData, null, 2)}\n\n`; + } + + if (run.screenshots && run.screenshots.length > 0) { + resultText += `Screenshots:\n`; + run.screenshots.forEach((screenshot: any, index: any) => { + resultText += `${index + 1}. ${screenshot}\n`; + }); + } + + return { + content: [{ + type: "text", + text: resultText + }] + }; + } catch (error: any) { + return { + content: [{ + type: "text", + text: `Error fetching run details: ${error.message}` + }], + isError: true + }; + } + } + ); + + // Tool: Scrape any website with a one-time robot + this.mcpServer.tool( + "scrape_website", + { + url: z.string().url().describe("URL to scrape"), + description: z.string().describe("Description of what data to extract"), + robot_name: z.string().optional().describe("Optional name for the temporary robot") + }, + async ({ url, description, robot_name }) => { + try { + // Note: This would require creating a robot first, then running it + // Since your API doesn't have a direct scrape endpoint, we'll guide the user + const robotName = robot_name || `Temp_Robot_${Date.now()}`; + + return { + content: [{ + type: "text", + text: `To scrape ${url} for "${description}", you would need to: + +1. First create a robot using the Maxun web interface at your configured URL +2. Train the robot to extract the desired data: ${description} +3. Note the robot ID from the interface +4. Then use the 'run_robot' tool with that robot ID + +Alternatively, you can: +1. Use 'list_robots' to see existing robots +2. Find a robot that might work for similar data extraction +3. Use 'run_robot' with that robot's ID + +Robot name suggestion: ${robotName} +Target URL: ${url} +Extraction goal: ${description}` + }] + }; + } catch (error: any) { + return { + content: [{ + type: "text", + text: `Error: ${error.message}` + }], + isError: true + }; + } + } + ); + + // Tool: Get robot performance summary + this.mcpServer.tool( + "get_robot_summary", + { + robot_id: z.string().describe("ID of the robot") + }, + async ({ robot_id }) => { + try { + const [robotData, runsData] = await Promise.all([ + this.makeApiRequest(`/api/robots/${robot_id}`), + this.makeApiRequest(`/api/robots/${robot_id}/runs`) + ]); + + const robot = robotData.robot; + const runs = runsData.runs.items; + + const successfulRuns = runs.filter((run: any) => run.status === 'success'); + const failedRuns = runs.filter((run: any) => run.status === 'failed'); + + let totalTextItems = 0; + let totalListItems = 0; + let totalScreenshots = 0; + + successfulRuns.forEach((run: any) => { + if (run.data.textData) totalTextItems += run.data.textData.length; + if (run.data.listData) totalListItems += run.data.listData.length; + if (run.screenshots) totalScreenshots += run.screenshots.length; + }); + + const summary = `Robot Performance Summary: + +Robot Name: ${robot.name} +Robot ID: ${robot.id} +Created: ${robot.createdAt ? new Date(robot.createdAt).toLocaleString() : 'N/A'} + +Performance Metrics: +- Total Runs: ${runs.length} +- Successful Runs: ${successfulRuns.length} +- Failed Runs: ${failedRuns.length} +- Success Rate: ${runs.length > 0 ? ((successfulRuns.length / runs.length) * 100).toFixed(1) : 0}% + +Data Extracted: +- Total Text Items: ${totalTextItems} +- Total List Items: ${totalListItems} +- Total Screenshots: ${totalScreenshots} +- Total Data Points: ${totalTextItems + totalListItems} + +Input Parameters: +${JSON.stringify(robot.inputParameters, null, 2)}`; + + return { + content: [{ + type: "text", + text: summary + }] + }; + } catch (error: any) { + return { + content: [{ + type: "text", + text: `Error generating robot summary: ${error.message}` + }], + isError: true + }; + } + } + ); + } + + private setupResources() { + // Resource: Get robot data as JSON + this.mcpServer.resource( + "robot-data", + new ResourceTemplate("robot-data://{robot_id}?run_id={run_id}", { + list: undefined + }), + async (uri, { robot_id, run_id }) => { + if (!robot_id) { + throw new Error('robot_id parameter is required'); + } + + try { + let data; + if (run_id) { + data = await this.makeApiRequest(`/api/robots/${robot_id}/runs/${run_id}`); + } else { + data = await this.makeApiRequest(`/api/robots/${robot_id}/runs`); + } + + return { + contents: [{ + uri: uri.href, + text: JSON.stringify(data, null, 2), + mimeType: "application/json" + }] + }; + } catch (error: any) { + throw new Error(`Error fetching robot data: ${error.message}`); + } + } + ); + + // Resource: Get extracted data as CSV format + this.mcpServer.resource( + "extracted-data-csv", + new ResourceTemplate("extracted-data-csv://{robot_id}/{run_id}", { + list: undefined + }), + async (uri, { robot_id, run_id }) => { + if (!robot_id || !run_id) { + throw new Error('Both robot_id and run_id parameters are required'); + } + + try { + const data = await this.makeApiRequest(`/api/robots/${robot_id}/runs/${run_id}`); + const run = data.run; + + // Convert extracted data to CSV format + let csvContent = ''; + + if (run.data.textData && run.data.textData.length > 0) { + csvContent += 'Type,Data\n'; + run.data.textData.forEach((item: any) => { + csvContent += `"text","${JSON.stringify(item).replace(/"/g, '""')}"\n`; + }); + } + + if (run.data.listData && run.data.listData.length > 0) { + if (csvContent) csvContent += '\n'; + run.data.listData.forEach((item: any) => { + csvContent += `"list","${JSON.stringify(item).replace(/"/g, '""')}"\n`; + }); + } + + return { + contents: [{ + uri: uri.href, + text: csvContent, + mimeType: "text/csv" + }] + }; + } catch (error: any) { + throw new Error(`Error generating CSV: ${error.message}`); + } + } + ); + } + + private setupPrompts() { + // Prompt: Analyze website and suggest scraping strategy + this.mcpServer.prompt( + "analyze-website-for-scraping", + { + url: z.string().url().describe("URL of the website to analyze"), + target_data: z.string().describe("Description of the data you want to extract") + }, + ({ url, target_data }) => ({ + messages: [{ + role: "user", + content: { + type: "text", + text: `Analyze this website for web scraping and provide a strategy: + +Website URL: ${url} +Target Data: ${target_data} + +Please help me: +1. First, use the 'list_robots' tool to see if there are existing robots that might work +2. If there's a suitable robot, use 'get_robot_summary' to check its performance +3. If there's a good existing robot, use 'run_robot' to extract the data +4. If no suitable robot exists, provide detailed instructions for creating a new robot + +Focus on: +- Identifying the best approach for extracting: ${target_data} +- Recommending specific robots from the available list if applicable +- Providing step-by-step guidance for the scraping process` + } + }] + }) + ); + + // Prompt: Monitor and analyze robot performance + this.mcpServer.prompt( + "analyze-robot-performance", + { + robot_id: z.string().describe("ID of the robot to analyze") + }, + ({ robot_id }) => ({ + messages: [{ + role: "user", + content: { + type: "text", + text: `Perform a comprehensive analysis of robot performance: + +Robot ID: ${robot_id} + +Please: +1. Use 'get_robot_summary' to get overall performance metrics +2. Use 'get_robot_runs' to analyze recent run patterns +3. Identify any performance issues or trends +4. Suggest optimizations if needed +5. Provide recommendations for improving success rates + +Focus on: +- Success rate analysis +- Data extraction efficiency +- Error patterns +- Performance trends over time` + } + }] + }) + ); + + // Prompt: Extract and format data + this.mcpServer.prompt( + "extract-and-format-data", + { + robot_id: z.string().describe("ID of the robot to use"), + output_format: z.enum(["json", "csv", "summary"]).describe("Desired output format") + }, + ({ robot_id, output_format }) => ({ + messages: [{ + role: "user", + content: { + type: "text", + text: `Extract data using the specified robot and format the output: + +Robot ID: ${robot_id} +Output Format: ${output_format} + +Please: +1. Use 'run_robot' to execute the data extraction +2. Format the extracted data according to the requested format +3. Provide a clean, organized presentation of the results +4. Include metadata about the extraction (timing, data volume, etc.) + +For ${output_format} format: +${output_format === 'json' ? '- Structure data as clean JSON objects' : + output_format === 'csv' ? '- Format as comma-separated values with headers' : + '- Provide a human-readable summary with key insights'} + +Ensure the output is ready for immediate use in downstream applications.` + } + }] + }) + ); + + // Prompt: Compare robots for task suitability + this.mcpServer.prompt( + "compare-robots-for-task", + { + task_description: z.string().describe("Description of the scraping task"), + website_type: z.string().optional().describe("Type of website (e.g., e-commerce, news, social media)") + }, + ({ task_description, website_type }) => ({ + messages: [{ + role: "user", + content: { + type: "text", + text: `Help me find the best robot for this scraping task: + +Task: ${task_description} +${website_type ? `Website Type: ${website_type}` : ''} + +Please: +1. Use 'list_robots' to get all available robots +2. Analyze each robot's capabilities based on their names and parameters +3. Use 'get_robot_summary' for the most promising candidates +4. Compare their performance metrics and success rates +5. Recommend the best robot(s) for this specific task + +Consider: +- Robot specialization and target websites +- Success rates and reliability +- Data extraction capabilities +- Recent performance trends + +Provide a ranked recommendation with reasoning for each choice.` + } + }] + }) + ); + } + + async start() { + try { + let transport; + + if (this.config.transport === 'stdio') { + transport = new StdioServerTransport(); + logger.log('info', 'Starting Maxun MCP server with stdio transport'); + } else { + // HTTP transport for web-based MCP clients + transport = new StreamableHTTPServerTransport({ + sessionIdGenerator: undefined // Stateless for simplicity + }); + logger.log('info', `Starting Maxun MCP server with HTTP transport on port ${this.config.httpPort}`); + } + + await this.mcpServer.connect(transport); + logger.log('info', 'Maxun MCP server connected and ready'); + + return transport; + } catch (error: any) { + logger.log('error', `Failed to start Maxun MCP server: ${error.message}`); + throw error; + } + } + + async stop() { + try { + await this.mcpServer.close(); + logger.log('info', 'Maxun MCP server stopped'); + } catch (error: any) { + logger.log('error', `Error stopping Maxun MCP server: ${error.message}`); + } + } +} + +export default MaxunMCPServer; \ No newline at end of file From 1ad832965a763942ec76929056b37fabcd18282c Mon Sep 17 00:00:00 2001 From: Rohit Date: Thu, 12 Jun 2025 12:51:41 +0530 Subject: [PATCH 02/12] feat: add mcp tsconfig --- server/tsconfig.mcp.json | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 server/tsconfig.mcp.json diff --git a/server/tsconfig.mcp.json b/server/tsconfig.mcp.json new file mode 100644 index 00000000..be69f643 --- /dev/null +++ b/server/tsconfig.mcp.json @@ -0,0 +1,26 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "Node16", + "moduleResolution": "Node16", + "outDir": "./dist", + "rootDir": "./", + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "forceConsistentCasingInFileNames": true, + "declaration": true, + "declarationMap": true, + "sourceMap": true + }, + "include": [ + "mcp/**/*", + "src/mcp-worker.ts", + "src/logger.ts", + "src/types/**/*" + ], + "exclude": [ + "node_modules", + "dist" + ] +} \ No newline at end of file From d6ef31b3144dadd7cbea8f55a048caf86283de6e Mon Sep 17 00:00:00 2001 From: Rohit Date: Thu, 12 Jun 2025 12:52:31 +0530 Subject: [PATCH 03/12] feat: add stdio mcp worker --- server/src/mcp-worker.ts | 375 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 375 insertions(+) create mode 100644 server/src/mcp-worker.ts diff --git a/server/src/mcp-worker.ts b/server/src/mcp-worker.ts new file mode 100644 index 00000000..de719911 --- /dev/null +++ b/server/src/mcp-worker.ts @@ -0,0 +1,375 @@ +// mcp-worker.ts +import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; +import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; +import { z } from "zod"; +import fetch from 'node-fetch'; +import dotenv from 'dotenv'; + +dotenv.config(); + +// Simple logger that doesn't interfere with stdio +const log = (message: string) => { + if (process.env.NODE_ENV !== 'production') { + console.error(`[MCP Worker] ${message}`); // Use stderr for logging + } +}; + +class MaxunMCPWorker { + private mcpServer: McpServer; + private apiKey: string; + private apiUrl: string; + + constructor() { + this.apiKey = process.env.MCP_API_KEY || ''; + this.apiUrl = process.env.BACKEND_URL || 'http://localhost:8080'; + + if (!this.apiKey) { + throw new Error('MCP_API_KEY environment variable is required'); + } + + this.mcpServer = new McpServer({ + name: 'Maxun Web Scraping Server', + version: '1.0.0' + }); + + this.setupTools(); + } + + private async makeApiRequest(endpoint: string, options: any = {}) { + const url = `${this.apiUrl}${endpoint}`; + const headers = { + 'Content-Type': 'application/json', + 'x-api-key': this.apiKey, + ...options.headers + }; + + const response = await fetch(url, { + ...options, + headers + }); + + if (!response.ok) { + throw new Error(`API request failed: ${response.status} ${response.statusText}`); + } + + return await response.json(); + } + + private setupTools() { + // Tool: List all robots + this.mcpServer.tool( + "list_robots", + {}, + async () => { + try { + const data = await this.makeApiRequest('/api/robots'); + + return { + content: [{ + type: "text", + text: `Found ${data.robots.totalCount} robots:\n\n${JSON.stringify(data.robots.items, null, 2)}` + }] + }; + } catch (error: any) { + return { + content: [{ + type: "text", + text: `Error fetching robots: ${error.message}` + }], + isError: true + }; + } + } + ); + + // Tool: Get robot details by ID + this.mcpServer.tool( + "get_robot", + { + robot_id: z.string().describe("ID of the robot to get details for") + }, + async ({ robot_id }) => { + try { + const data = await this.makeApiRequest(`/api/robots/${robot_id}`); + + return { + content: [{ + type: "text", + text: `Robot Details:\n\n${JSON.stringify(data.robot, null, 2)}` + }] + }; + } catch (error: any) { + return { + content: [{ + type: "text", + text: `Error fetching robot: ${error.message}` + }], + isError: true + }; + } + } + ); + + // Tool: Run a robot and get results + this.mcpServer.tool( + "run_robot", + { + robot_id: z.string().describe("ID of the robot to run"), + wait_for_completion: z.boolean().default(true).describe("Whether to wait for the run to complete") + }, + async ({ robot_id, wait_for_completion }) => { + try { + const data = await this.makeApiRequest(`/api/robots/${robot_id}/runs`, { + method: 'POST' + }); + + if (wait_for_completion) { + const extractedData = data.run.data; + const screenshots = data.run.screenshots; + + let resultText = `Robot run completed successfully!\n\n`; + resultText += `Run ID: ${data.run.runId}\n`; + resultText += `Status: ${data.run.status}\n`; + resultText += `Started: ${data.run.startedAt}\n`; + resultText += `Finished: ${data.run.finishedAt}\n\n`; + + if (extractedData.textData && extractedData.textData.length > 0) { + resultText += `Extracted Text Data (${extractedData.textData.length} items):\n`; + resultText += JSON.stringify(extractedData.textData, null, 2) + '\n\n'; + } + + if (extractedData.listData && extractedData.listData.length > 0) { + resultText += `Extracted List Data (${extractedData.listData.length} items):\n`; + resultText += JSON.stringify(extractedData.listData, null, 2) + '\n\n'; + } + + if (screenshots && screenshots.length > 0) { + resultText += `Screenshots captured: ${screenshots.length}\n`; + resultText += `Screenshot URLs:\n`; + screenshots.forEach((screenshot: any, index: any) => { + resultText += `${index + 1}. ${screenshot}\n`; + }); + } + + return { + content: [{ + type: "text", + text: resultText + }] + }; + } else { + return { + content: [{ + type: "text", + text: `Robot run started! Run ID: ${data.run.runId}\nStatus: ${data.run.status}` + }] + }; + } + } catch (error: any) { + return { + content: [{ + type: "text", + text: `Error running robot: ${error.message}` + }], + isError: true + }; + } + } + ); + + // Tool: Get all runs for a robot + this.mcpServer.tool( + "get_robot_runs", + { + robot_id: z.string().describe("ID of the robot") + }, + async ({ robot_id }) => { + try { + const data = await this.makeApiRequest(`/api/robots/${robot_id}/runs`); + + return { + content: [{ + type: "text", + text: `Robot runs (${data.runs.totalCount} total):\n\n${JSON.stringify(data.runs.items, null, 2)}` + }] + }; + } catch (error: any) { + return { + content: [{ + type: "text", + text: `Error fetching runs: ${error.message}` + }], + isError: true + }; + } + } + ); + + // Tool: Get specific run details + this.mcpServer.tool( + "get_run_details", + { + robot_id: z.string().describe("ID of the robot"), + run_id: z.string().describe("ID of the specific run") + }, + async ({ robot_id, run_id }) => { + try { + const data = await this.makeApiRequest(`/api/robots/${robot_id}/runs/${run_id}`); + + const run = data.run; + let resultText = `Run Details:\n\n`; + resultText += `Run ID: ${run.runId}\n`; + resultText += `Status: ${run.status}\n`; + resultText += `Robot ID: ${run.robotId}\n`; + resultText += `Started: ${run.startedAt}\n`; + resultText += `Finished: ${run.finishedAt}\n\n`; + + if (run.data.textData && run.data.textData.length > 0) { + resultText += `Extracted Text Data:\n${JSON.stringify(run.data.textData, null, 2)}\n\n`; + } + + if (run.data.listData && run.data.listData.length > 0) { + resultText += `Extracted List Data:\n${JSON.stringify(run.data.listData, null, 2)}\n\n`; + } + + if (run.screenshots && run.screenshots.length > 0) { + resultText += `Screenshots:\n`; + run.screenshots.forEach((screenshot: any, index: any) => { + resultText += `${index + 1}. ${screenshot}\n`; + }); + } + + return { + content: [{ + type: "text", + text: resultText + }] + }; + } catch (error: any) { + return { + content: [{ + type: "text", + text: `Error fetching run details: ${error.message}` + }], + isError: true + }; + } + } + ); + + // Tool: Get robot performance summary + this.mcpServer.tool( + "get_robot_summary", + { + robot_id: z.string().describe("ID of the robot") + }, + async ({ robot_id }) => { + try { + const [robotData, runsData] = await Promise.all([ + this.makeApiRequest(`/api/robots/${robot_id}`), + this.makeApiRequest(`/api/robots/${robot_id}/runs`) + ]); + + const robot = robotData.robot; + const runs = runsData.runs.items; + + const successfulRuns = runs.filter((run: any) => run.status === 'success'); + const failedRuns = runs.filter((run: any) => run.status === 'failed'); + + let totalTextItems = 0; + let totalListItems = 0; + let totalScreenshots = 0; + + successfulRuns.forEach((run: any) => { + if (run.data.textData) totalTextItems += run.data.textData.length; + if (run.data.listData) totalListItems += run.data.listData.length; + if (run.screenshots) totalScreenshots += run.screenshots.length; + }); + + const summary = `Robot Performance Summary: + +Robot Name: ${robot.name} +Robot ID: ${robot.id} +Created: ${robot.createdAt ? new Date(robot.createdAt).toLocaleString() : 'N/A'} + +Performance Metrics: +- Total Runs: ${runs.length} +- Successful Runs: ${successfulRuns.length} +- Failed Runs: ${failedRuns.length} +- Success Rate: ${runs.length > 0 ? ((successfulRuns.length / runs.length) * 100).toFixed(1) : 0}% + +Data Extracted: +- Total Text Items: ${totalTextItems} +- Total List Items: ${totalListItems} +- Total Screenshots: ${totalScreenshots} +- Total Data Points: ${totalTextItems + totalListItems} + +Input Parameters: +${JSON.stringify(robot.inputParameters, null, 2)}`; + + return { + content: [{ + type: "text", + text: summary + }] + }; + } catch (error: any) { + return { + content: [{ + type: "text", + text: `Error generating robot summary: ${error.message}` + }], + isError: true + }; + } + } + ); + } + + async start() { + try { + const transport = new StdioServerTransport(); + await this.mcpServer.connect(transport); + log('Maxun MCP Worker connected and ready'); + } catch (error: any) { + log(`Failed to start MCP Worker: ${error.message}`); + throw error; + } + } + + async stop() { + try { + await this.mcpServer.close(); + log('Maxun MCP Worker stopped'); + } catch (error: any) { + log(`Error stopping MCP Worker: ${error.message}`); + } + } +} + +async function main() { + try { + const worker = new MaxunMCPWorker(); + await worker.start(); + + // Handle graceful shutdown + process.on('SIGTERM', async () => { + await worker.stop(); + process.exit(0); + }); + + process.on('SIGINT', async () => { + await worker.stop(); + process.exit(0); + }); + + } catch (error) { + console.error('Failed to start MCP Worker:', error); + process.exit(1); + } +} + +// Only start if this is run as a worker or directly +if (process.env.MCP_WORKER === 'true' || require.main === module) { + main(); +} \ No newline at end of file From c9fede4bd520d9842d48716229867db5dd39ef76 Mon Sep 17 00:00:00 2001 From: Rohit Date: Thu, 12 Jun 2025 12:53:29 +0530 Subject: [PATCH 04/12] feat: initialize stdio mcp server --- server/src/server.ts | 56 +++++++++++++++++++++++++++++++++++++------- 1 file changed, 48 insertions(+), 8 deletions(-) diff --git a/server/src/server.ts b/server/src/server.ts index bd4a1697..ddd8a50e 100644 --- a/server/src/server.ts +++ b/server/src/server.ts @@ -83,9 +83,48 @@ export const io = new Server(server); */ export const browserPool = new BrowserPool(); -// app.use(bodyParser.json({ limit: '10mb' })) -// app.use(bodyParser.urlencoded({ extended: true, limit: '10mb', parameterLimit: 9000 })); -// parse cookies - "cookie" is true in csrfProtection +const initializeMCPServer = async () => { + try { + const mcpEnabled = process.env.MCP_ENABLED === 'true'; + const mcpApiKey = process.env.MCP_API_KEY; + + if (!mcpEnabled) { + logger.log('info', 'MCP Server disabled (set MCP_ENABLED=true to enable)'); + return; + } + + if (!mcpApiKey) { + logger.log('warning', 'MCP_API_KEY not set. MCP server will not be able to authenticate with Maxun API.'); + return; + } + + const mcpWorkerPath = path.resolve(__dirname, './mcp-worker.ts'); + const mcpWorkerProcess = fork(mcpWorkerPath, [], { + execArgv: process.env.NODE_ENV === 'production' ? [] : ['--inspect=5861'], + env: { + ...process.env, + MCP_WORKER: 'true' + } + }); + + mcpWorkerProcess.on('message', (message: any) => { + logger.log('info', `MCP Worker message: ${message}`); + }); + + mcpWorkerProcess.on('error', (error: any) => { + logger.log('error', `MCP Worker error: ${error}`); + }); + + mcpWorkerProcess.on('exit', (code: any) => { + logger.log('info', `MCP Worker exited with code: ${code}`); + }); + + logger.log('info', 'MCP Server started with stdio transport in worker process'); + } catch (error: any) { + logger.log('error', `Failed to initialize MCP Server: ${error.message}`); + } +}; + app.use(cookieParser()) app.use('/webhook', webhook); @@ -99,9 +138,9 @@ app.use('/api-docs', swaggerUi.serve, swaggerUi.setup(swaggerSpec)); readdirSync(path.join(__dirname, 'api')).forEach((r) => { const route = require(path.join(__dirname, 'api', r)); - const router = route.default || route; // Use .default if available, fallback to route + const router = route.default || route; if (typeof router === 'function') { - app.use('/api', router); // Use the default export or named router + app.use('/api', router); } else { console.error(`Error: ${r} does not export a valid router`); } @@ -151,7 +190,6 @@ app.get('/', function (req, res) { return res.send('Maxun server started 🚀'); }); -// Add CORS headers app.use((req, res, next) => { res.header('Access-Control-Allow-Origin', process.env.PUBLIC_URL || 'http://localhost:5173'); res.header('Access-Control-Allow-Methods', 'GET,PUT,POST,DELETE,OPTIONS'); @@ -168,9 +206,11 @@ server.listen(SERVER_PORT, '0.0.0.0', async () => { await connectDB(); await syncDB(); logger.log('info', `Server listening on port ${SERVER_PORT}`); + + await initializeMCPServer(); } catch (error: any) { logger.log('error', `Failed to connect to the database: ${error.message}`); - process.exit(1); // Exit the process if DB connection fails + process.exit(1); } }); @@ -204,4 +244,4 @@ process.on('SIGINT', async () => { if (recordingWorkerProcess) recordingWorkerProcess.kill(); } process.exit(); -}); +}); \ No newline at end of file From cf5aba9a3a0fd8b730bfcb83ab677b8ae737e523 Mon Sep 17 00:00:00 2001 From: Rohit Date: Thu, 12 Jun 2025 12:54:34 +0530 Subject: [PATCH 05/12] chore: add mcp scripts and dependencies --- package.json | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/package.json b/package.json index 9652ed87..29988b7b 100644 --- a/package.json +++ b/package.json @@ -99,7 +99,11 @@ "migrate:undo:all": "sequelize-cli db:migrate:undo:all", "seed": "sequelize-cli db:seed:all", "seed:undo:all": "sequelize-cli db:seed:undo:all", - "migration:generate": "sequelize-cli migration:generate --name" + "migration:generate": "sequelize-cli migration:generate --name", + "mcp:build": "tsc --project server/tsconfig.mcp.json", + "mcp:start": "node dist/mcp/index.js", + "mcp:dev": "ts-node mcp/index.ts", + "mcp:inspector": "npx @modelcontextprotocol/inspector dist/mcp/index.js" }, "eslintConfig": { "extends": [ @@ -107,6 +111,7 @@ ] }, "devDependencies": { + "@modelcontextprotocol/sdk": "^1.12.1", "@types/connect-pg-simple": "^7.0.3", "@types/cookie-parser": "^1.4.7", "@types/express": "^4.17.13", @@ -115,6 +120,7 @@ "@types/loglevel": "^1.6.3", "@types/node": "22.7.9", "@types/node-cron": "^3.0.11", + "@types/node-fetch": "^2.6.12", "@types/prismjs": "^1.26.0", "@types/react-highlight": "^0.12.5", "@types/react-transition-group": "^4.4.4", @@ -129,6 +135,7 @@ "nodemon": "^2.0.15", "sequelize-cli": "^6.6.2", "ts-node": "^10.4.0", - "vite": "^5.4.10" + "vite": "^5.4.10", + "zod": "^3.25.62" } } From df0d8eabdfaf7865db77603a93a5db493db1a027 Mon Sep 17 00:00:00 2001 From: Rohit Date: Thu, 12 Jun 2025 14:53:45 +0530 Subject: [PATCH 06/12] chore: rm mcp scripts --- package.json | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/package.json b/package.json index 29988b7b..c84b301b 100644 --- a/package.json +++ b/package.json @@ -100,10 +100,7 @@ "seed": "sequelize-cli db:seed:all", "seed:undo:all": "sequelize-cli db:seed:undo:all", "migration:generate": "sequelize-cli migration:generate --name", - "mcp:build": "tsc --project server/tsconfig.mcp.json", - "mcp:start": "node dist/mcp/index.js", - "mcp:dev": "ts-node mcp/index.ts", - "mcp:inspector": "npx @modelcontextprotocol/inspector dist/mcp/index.js" + "mcp:build": "tsc --project server/tsconfig.mcp.json" }, "eslintConfig": { "extends": [ From dcee249ec460a953f2586f72343d5f3a4015457f Mon Sep 17 00:00:00 2001 From: Rohit Date: Thu, 12 Jun 2025 15:00:47 +0530 Subject: [PATCH 07/12] chore: rm additional paths --- server/tsconfig.mcp.json | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/server/tsconfig.mcp.json b/server/tsconfig.mcp.json index be69f643..d799017b 100644 --- a/server/tsconfig.mcp.json +++ b/server/tsconfig.mcp.json @@ -3,8 +3,8 @@ "target": "ES2022", "module": "Node16", "moduleResolution": "Node16", - "outDir": "./dist", - "rootDir": "./", + "outDir": "../dist", + "rootDir": "./src", "strict": true, "esModuleInterop": true, "skipLibCheck": true, @@ -14,10 +14,7 @@ "sourceMap": true }, "include": [ - "mcp/**/*", - "src/mcp-worker.ts", - "src/logger.ts", - "src/types/**/*" + "src/mcp-worker.ts" ], "exclude": [ "node_modules", From d9a607af14e0d740c5080832493139b1476f3b0b Mon Sep 17 00:00:00 2001 From: Rohit Date: Thu, 12 Jun 2025 15:13:49 +0530 Subject: [PATCH 08/12] feat: rm mcp index file --- server/src/mcp/index.ts | 623 ---------------------------------------- 1 file changed, 623 deletions(-) delete mode 100644 server/src/mcp/index.ts diff --git a/server/src/mcp/index.ts b/server/src/mcp/index.ts deleted file mode 100644 index 7985dce7..00000000 --- a/server/src/mcp/index.ts +++ /dev/null @@ -1,623 +0,0 @@ -// mcp-server/index.ts -import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js"; -import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; -import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js"; -import { z } from "zod"; -import fetch from 'node-fetch'; -import logger from '../logger'; - -// Configuration for the MCP server -interface MaxunMCPConfig { - name: string; - version: string; - maxunApiUrl: string; - apiKey: string; - transport: 'stdio' | 'http'; - httpPort?: number; -} - -class MaxunMCPServer { - private mcpServer: McpServer; - private config: MaxunMCPConfig; - - constructor(config: MaxunMCPConfig) { - this.config = config; - this.mcpServer = new McpServer({ - name: config.name, - version: config.version - }); - - this.setupTools(); - this.setupResources(); - this.setupPrompts(); - } - - private async makeApiRequest(endpoint: string, options: any = {}) { - const url = `${this.config.maxunApiUrl}${endpoint}`; - const headers = { - 'Content-Type': 'application/json', - 'x-api-key': this.config.apiKey, - ...options.headers - }; - - const response = await fetch(url, { - ...options, - headers - }); - - if (!response.ok) { - throw new Error(`API request failed: ${response.status} ${response.statusText}`); - } - - return await response.json(); - } - - private setupTools() { - // Tool: List all robots - this.mcpServer.tool( - "list_robots", - {}, - async () => { - try { - const data = await this.makeApiRequest('/api/robots'); - - return { - content: [{ - type: "text", - text: `Found ${data.robots.totalCount} robots:\n\n${JSON.stringify(data.robots.items, null, 2)}` - }] - }; - } catch (error: any) { - return { - content: [{ - type: "text", - text: `Error fetching robots: ${error.message}` - }], - isError: true - }; - } - } - ); - - // Tool: Get robot details by ID - this.mcpServer.tool( - "get_robot", - { - robot_id: z.string().describe("ID of the robot to get details for") - }, - async ({ robot_id }) => { - try { - const data = await this.makeApiRequest(`/api/robots/${robot_id}`); - - return { - content: [{ - type: "text", - text: `Robot Details:\n\n${JSON.stringify(data.robot, null, 2)}` - }] - }; - } catch (error: any) { - return { - content: [{ - type: "text", - text: `Error fetching robot: ${error.message}` - }], - isError: true - }; - } - } - ); - - // Tool: Run a robot and get results - this.mcpServer.tool( - "run_robot", - { - robot_id: z.string().describe("ID of the robot to run"), - wait_for_completion: z.boolean().default(true).describe("Whether to wait for the run to complete") - }, - async ({ robot_id, wait_for_completion }) => { - try { - const data = await this.makeApiRequest(`/api/robots/${robot_id}/runs`, { - method: 'POST' - }); - - if (wait_for_completion) { - // The API already waits for completion and returns the complete run data - const extractedData = data.run.data; - const screenshots = data.run.screenshots; - - let resultText = `Robot run completed successfully!\n\n`; - resultText += `Run ID: ${data.run.runId}\n`; - resultText += `Status: ${data.run.status}\n`; - resultText += `Started: ${data.run.startedAt}\n`; - resultText += `Finished: ${data.run.finishedAt}\n\n`; - - if (extractedData.textData && extractedData.textData.length > 0) { - resultText += `Extracted Text Data (${extractedData.textData.length} items):\n`; - resultText += JSON.stringify(extractedData.textData, null, 2) + '\n\n'; - } - - if (extractedData.listData && extractedData.listData.length > 0) { - resultText += `Extracted List Data (${extractedData.listData.length} items):\n`; - resultText += JSON.stringify(extractedData.listData, null, 2) + '\n\n'; - } - - if (screenshots && screenshots.length > 0) { - resultText += `Screenshots captured: ${screenshots.length}\n`; - resultText += `Screenshot URLs:\n`; - screenshots.forEach((screenshot: any, index: any) => { - resultText += `${index + 1}. ${screenshot}\n`; - }); - } - - return { - content: [{ - type: "text", - text: resultText - }] - }; - } else { - return { - content: [{ - type: "text", - text: `Robot run started! Run ID: ${data.run.runId}\nStatus: ${data.run.status}` - }] - }; - } - } catch (error: any) { - return { - content: [{ - type: "text", - text: `Error running robot: ${error.message}` - }], - isError: true - }; - } - } - ); - - // Tool: Get all runs for a robot - this.mcpServer.tool( - "get_robot_runs", - { - robot_id: z.string().describe("ID of the robot") - }, - async ({ robot_id }) => { - try { - const data = await this.makeApiRequest(`/api/robots/${robot_id}/runs`); - - return { - content: [{ - type: "text", - text: `Robot runs (${data.runs.totalCount} total):\n\n${JSON.stringify(data.runs.items, null, 2)}` - }] - }; - } catch (error: any) { - return { - content: [{ - type: "text", - text: `Error fetching runs: ${error.message}` - }], - isError: true - }; - } - } - ); - - // Tool: Get specific run details - this.mcpServer.tool( - "get_run_details", - { - robot_id: z.string().describe("ID of the robot"), - run_id: z.string().describe("ID of the specific run") - }, - async ({ robot_id, run_id }) => { - try { - const data = await this.makeApiRequest(`/api/robots/${robot_id}/runs/${run_id}`); - - const run = data.run; - let resultText = `Run Details:\n\n`; - resultText += `Run ID: ${run.runId}\n`; - resultText += `Status: ${run.status}\n`; - resultText += `Robot ID: ${run.robotId}\n`; - resultText += `Started: ${run.startedAt}\n`; - resultText += `Finished: ${run.finishedAt}\n\n`; - - if (run.data.textData && run.data.textData.length > 0) { - resultText += `Extracted Text Data:\n${JSON.stringify(run.data.textData, null, 2)}\n\n`; - } - - if (run.data.listData && run.data.listData.length > 0) { - resultText += `Extracted List Data:\n${JSON.stringify(run.data.listData, null, 2)}\n\n`; - } - - if (run.screenshots && run.screenshots.length > 0) { - resultText += `Screenshots:\n`; - run.screenshots.forEach((screenshot: any, index: any) => { - resultText += `${index + 1}. ${screenshot}\n`; - }); - } - - return { - content: [{ - type: "text", - text: resultText - }] - }; - } catch (error: any) { - return { - content: [{ - type: "text", - text: `Error fetching run details: ${error.message}` - }], - isError: true - }; - } - } - ); - - // Tool: Scrape any website with a one-time robot - this.mcpServer.tool( - "scrape_website", - { - url: z.string().url().describe("URL to scrape"), - description: z.string().describe("Description of what data to extract"), - robot_name: z.string().optional().describe("Optional name for the temporary robot") - }, - async ({ url, description, robot_name }) => { - try { - // Note: This would require creating a robot first, then running it - // Since your API doesn't have a direct scrape endpoint, we'll guide the user - const robotName = robot_name || `Temp_Robot_${Date.now()}`; - - return { - content: [{ - type: "text", - text: `To scrape ${url} for "${description}", you would need to: - -1. First create a robot using the Maxun web interface at your configured URL -2. Train the robot to extract the desired data: ${description} -3. Note the robot ID from the interface -4. Then use the 'run_robot' tool with that robot ID - -Alternatively, you can: -1. Use 'list_robots' to see existing robots -2. Find a robot that might work for similar data extraction -3. Use 'run_robot' with that robot's ID - -Robot name suggestion: ${robotName} -Target URL: ${url} -Extraction goal: ${description}` - }] - }; - } catch (error: any) { - return { - content: [{ - type: "text", - text: `Error: ${error.message}` - }], - isError: true - }; - } - } - ); - - // Tool: Get robot performance summary - this.mcpServer.tool( - "get_robot_summary", - { - robot_id: z.string().describe("ID of the robot") - }, - async ({ robot_id }) => { - try { - const [robotData, runsData] = await Promise.all([ - this.makeApiRequest(`/api/robots/${robot_id}`), - this.makeApiRequest(`/api/robots/${robot_id}/runs`) - ]); - - const robot = robotData.robot; - const runs = runsData.runs.items; - - const successfulRuns = runs.filter((run: any) => run.status === 'success'); - const failedRuns = runs.filter((run: any) => run.status === 'failed'); - - let totalTextItems = 0; - let totalListItems = 0; - let totalScreenshots = 0; - - successfulRuns.forEach((run: any) => { - if (run.data.textData) totalTextItems += run.data.textData.length; - if (run.data.listData) totalListItems += run.data.listData.length; - if (run.screenshots) totalScreenshots += run.screenshots.length; - }); - - const summary = `Robot Performance Summary: - -Robot Name: ${robot.name} -Robot ID: ${robot.id} -Created: ${robot.createdAt ? new Date(robot.createdAt).toLocaleString() : 'N/A'} - -Performance Metrics: -- Total Runs: ${runs.length} -- Successful Runs: ${successfulRuns.length} -- Failed Runs: ${failedRuns.length} -- Success Rate: ${runs.length > 0 ? ((successfulRuns.length / runs.length) * 100).toFixed(1) : 0}% - -Data Extracted: -- Total Text Items: ${totalTextItems} -- Total List Items: ${totalListItems} -- Total Screenshots: ${totalScreenshots} -- Total Data Points: ${totalTextItems + totalListItems} - -Input Parameters: -${JSON.stringify(robot.inputParameters, null, 2)}`; - - return { - content: [{ - type: "text", - text: summary - }] - }; - } catch (error: any) { - return { - content: [{ - type: "text", - text: `Error generating robot summary: ${error.message}` - }], - isError: true - }; - } - } - ); - } - - private setupResources() { - // Resource: Get robot data as JSON - this.mcpServer.resource( - "robot-data", - new ResourceTemplate("robot-data://{robot_id}?run_id={run_id}", { - list: undefined - }), - async (uri, { robot_id, run_id }) => { - if (!robot_id) { - throw new Error('robot_id parameter is required'); - } - - try { - let data; - if (run_id) { - data = await this.makeApiRequest(`/api/robots/${robot_id}/runs/${run_id}`); - } else { - data = await this.makeApiRequest(`/api/robots/${robot_id}/runs`); - } - - return { - contents: [{ - uri: uri.href, - text: JSON.stringify(data, null, 2), - mimeType: "application/json" - }] - }; - } catch (error: any) { - throw new Error(`Error fetching robot data: ${error.message}`); - } - } - ); - - // Resource: Get extracted data as CSV format - this.mcpServer.resource( - "extracted-data-csv", - new ResourceTemplate("extracted-data-csv://{robot_id}/{run_id}", { - list: undefined - }), - async (uri, { robot_id, run_id }) => { - if (!robot_id || !run_id) { - throw new Error('Both robot_id and run_id parameters are required'); - } - - try { - const data = await this.makeApiRequest(`/api/robots/${robot_id}/runs/${run_id}`); - const run = data.run; - - // Convert extracted data to CSV format - let csvContent = ''; - - if (run.data.textData && run.data.textData.length > 0) { - csvContent += 'Type,Data\n'; - run.data.textData.forEach((item: any) => { - csvContent += `"text","${JSON.stringify(item).replace(/"/g, '""')}"\n`; - }); - } - - if (run.data.listData && run.data.listData.length > 0) { - if (csvContent) csvContent += '\n'; - run.data.listData.forEach((item: any) => { - csvContent += `"list","${JSON.stringify(item).replace(/"/g, '""')}"\n`; - }); - } - - return { - contents: [{ - uri: uri.href, - text: csvContent, - mimeType: "text/csv" - }] - }; - } catch (error: any) { - throw new Error(`Error generating CSV: ${error.message}`); - } - } - ); - } - - private setupPrompts() { - // Prompt: Analyze website and suggest scraping strategy - this.mcpServer.prompt( - "analyze-website-for-scraping", - { - url: z.string().url().describe("URL of the website to analyze"), - target_data: z.string().describe("Description of the data you want to extract") - }, - ({ url, target_data }) => ({ - messages: [{ - role: "user", - content: { - type: "text", - text: `Analyze this website for web scraping and provide a strategy: - -Website URL: ${url} -Target Data: ${target_data} - -Please help me: -1. First, use the 'list_robots' tool to see if there are existing robots that might work -2. If there's a suitable robot, use 'get_robot_summary' to check its performance -3. If there's a good existing robot, use 'run_robot' to extract the data -4. If no suitable robot exists, provide detailed instructions for creating a new robot - -Focus on: -- Identifying the best approach for extracting: ${target_data} -- Recommending specific robots from the available list if applicable -- Providing step-by-step guidance for the scraping process` - } - }] - }) - ); - - // Prompt: Monitor and analyze robot performance - this.mcpServer.prompt( - "analyze-robot-performance", - { - robot_id: z.string().describe("ID of the robot to analyze") - }, - ({ robot_id }) => ({ - messages: [{ - role: "user", - content: { - type: "text", - text: `Perform a comprehensive analysis of robot performance: - -Robot ID: ${robot_id} - -Please: -1. Use 'get_robot_summary' to get overall performance metrics -2. Use 'get_robot_runs' to analyze recent run patterns -3. Identify any performance issues or trends -4. Suggest optimizations if needed -5. Provide recommendations for improving success rates - -Focus on: -- Success rate analysis -- Data extraction efficiency -- Error patterns -- Performance trends over time` - } - }] - }) - ); - - // Prompt: Extract and format data - this.mcpServer.prompt( - "extract-and-format-data", - { - robot_id: z.string().describe("ID of the robot to use"), - output_format: z.enum(["json", "csv", "summary"]).describe("Desired output format") - }, - ({ robot_id, output_format }) => ({ - messages: [{ - role: "user", - content: { - type: "text", - text: `Extract data using the specified robot and format the output: - -Robot ID: ${robot_id} -Output Format: ${output_format} - -Please: -1. Use 'run_robot' to execute the data extraction -2. Format the extracted data according to the requested format -3. Provide a clean, organized presentation of the results -4. Include metadata about the extraction (timing, data volume, etc.) - -For ${output_format} format: -${output_format === 'json' ? '- Structure data as clean JSON objects' : - output_format === 'csv' ? '- Format as comma-separated values with headers' : - '- Provide a human-readable summary with key insights'} - -Ensure the output is ready for immediate use in downstream applications.` - } - }] - }) - ); - - // Prompt: Compare robots for task suitability - this.mcpServer.prompt( - "compare-robots-for-task", - { - task_description: z.string().describe("Description of the scraping task"), - website_type: z.string().optional().describe("Type of website (e.g., e-commerce, news, social media)") - }, - ({ task_description, website_type }) => ({ - messages: [{ - role: "user", - content: { - type: "text", - text: `Help me find the best robot for this scraping task: - -Task: ${task_description} -${website_type ? `Website Type: ${website_type}` : ''} - -Please: -1. Use 'list_robots' to get all available robots -2. Analyze each robot's capabilities based on their names and parameters -3. Use 'get_robot_summary' for the most promising candidates -4. Compare their performance metrics and success rates -5. Recommend the best robot(s) for this specific task - -Consider: -- Robot specialization and target websites -- Success rates and reliability -- Data extraction capabilities -- Recent performance trends - -Provide a ranked recommendation with reasoning for each choice.` - } - }] - }) - ); - } - - async start() { - try { - let transport; - - if (this.config.transport === 'stdio') { - transport = new StdioServerTransport(); - logger.log('info', 'Starting Maxun MCP server with stdio transport'); - } else { - // HTTP transport for web-based MCP clients - transport = new StreamableHTTPServerTransport({ - sessionIdGenerator: undefined // Stateless for simplicity - }); - logger.log('info', `Starting Maxun MCP server with HTTP transport on port ${this.config.httpPort}`); - } - - await this.mcpServer.connect(transport); - logger.log('info', 'Maxun MCP server connected and ready'); - - return transport; - } catch (error: any) { - logger.log('error', `Failed to start Maxun MCP server: ${error.message}`); - throw error; - } - } - - async stop() { - try { - await this.mcpServer.close(); - logger.log('info', 'Maxun MCP server stopped'); - } catch (error: any) { - logger.log('error', `Error stopping Maxun MCP server: ${error.message}`); - } - } -} - -export default MaxunMCPServer; \ No newline at end of file From 35d3ac59c8c2d07725b05d5af3d6fb6ffc3f1e68 Mon Sep 17 00:00:00 2001 From: Rohit Date: Thu, 12 Jun 2025 15:17:08 +0530 Subject: [PATCH 09/12] chore: rm comments --- server/src/mcp-worker.ts | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/server/src/mcp-worker.ts b/server/src/mcp-worker.ts index de719911..38cabdd9 100644 --- a/server/src/mcp-worker.ts +++ b/server/src/mcp-worker.ts @@ -1,4 +1,3 @@ -// mcp-worker.ts import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; import { z } from "zod"; @@ -7,10 +6,9 @@ import dotenv from 'dotenv'; dotenv.config(); -// Simple logger that doesn't interfere with stdio const log = (message: string) => { if (process.env.NODE_ENV !== 'production') { - console.error(`[MCP Worker] ${message}`); // Use stderr for logging + console.error(`[MCP Worker] ${message}`); } }; From c5874439a8919e8922b518c8866875d72be8da2a Mon Sep 17 00:00:00 2001 From: Rohit Date: Tue, 24 Jun 2025 14:21:33 +0530 Subject: [PATCH 10/12] chore: change import paths --- server/src/mcp-worker.ts | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/server/src/mcp-worker.ts b/server/src/mcp-worker.ts index 38cabdd9..c2384797 100644 --- a/server/src/mcp-worker.ts +++ b/server/src/mcp-worker.ts @@ -1,5 +1,5 @@ -import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; -import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; +import { McpServer } from "@modelcontextprotocol/sdk/server/mcp"; +import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio"; import { z } from "zod"; import fetch from 'node-fetch'; import dotenv from 'dotenv'; @@ -86,7 +86,7 @@ class MaxunMCPWorker { { robot_id: z.string().describe("ID of the robot to get details for") }, - async ({ robot_id }) => { + async ({ robot_id }: { robot_id: string }) => { try { const data = await this.makeApiRequest(`/api/robots/${robot_id}`); @@ -115,7 +115,7 @@ class MaxunMCPWorker { robot_id: z.string().describe("ID of the robot to run"), wait_for_completion: z.boolean().default(true).describe("Whether to wait for the run to complete") }, - async ({ robot_id, wait_for_completion }) => { + async ({ robot_id, wait_for_completion }: { robot_id: string; wait_for_completion: boolean }) => { try { const data = await this.makeApiRequest(`/api/robots/${robot_id}/runs`, { method: 'POST' @@ -181,7 +181,7 @@ class MaxunMCPWorker { { robot_id: z.string().describe("ID of the robot") }, - async ({ robot_id }) => { + async ({ robot_id }: { robot_id: string }) => { try { const data = await this.makeApiRequest(`/api/robots/${robot_id}/runs`); @@ -210,7 +210,7 @@ class MaxunMCPWorker { robot_id: z.string().describe("ID of the robot"), run_id: z.string().describe("ID of the specific run") }, - async ({ robot_id, run_id }) => { + async ({ robot_id, run_id }: { robot_id: string; run_id: string }) => { try { const data = await this.makeApiRequest(`/api/robots/${robot_id}/runs/${run_id}`); @@ -261,7 +261,7 @@ class MaxunMCPWorker { { robot_id: z.string().describe("ID of the robot") }, - async ({ robot_id }) => { + async ({ robot_id }: { robot_id: string }) => { try { const [robotData, runsData] = await Promise.all([ this.makeApiRequest(`/api/robots/${robot_id}`), From d322a7db04631144f09ff7c4acb22f2549cfb65e Mon Sep 17 00:00:00 2001 From: Rohit Date: Tue, 24 Jun 2025 14:25:17 +0530 Subject: [PATCH 11/12] chore: resolve mcp imports --- server/src/mcp-worker.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/mcp-worker.ts b/server/src/mcp-worker.ts index c2384797..259ef2fd 100644 --- a/server/src/mcp-worker.ts +++ b/server/src/mcp-worker.ts @@ -1,5 +1,5 @@ -import { McpServer } from "@modelcontextprotocol/sdk/server/mcp"; -import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio"; +import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; +import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; import { z } from "zod"; import fetch from 'node-fetch'; import dotenv from 'dotenv'; From a74d4e07939a9f5c9fa363d873fe909593cfb552 Mon Sep 17 00:00:00 2001 From: Rohit Date: Tue, 24 Jun 2025 15:13:05 +0530 Subject: [PATCH 12/12] feat: rm mcp server init --- server/src/server.ts | 46 +------------------------------------------- 1 file changed, 1 insertion(+), 45 deletions(-) diff --git a/server/src/server.ts b/server/src/server.ts index ddd8a50e..2594c712 100644 --- a/server/src/server.ts +++ b/server/src/server.ts @@ -83,48 +83,6 @@ export const io = new Server(server); */ export const browserPool = new BrowserPool(); -const initializeMCPServer = async () => { - try { - const mcpEnabled = process.env.MCP_ENABLED === 'true'; - const mcpApiKey = process.env.MCP_API_KEY; - - if (!mcpEnabled) { - logger.log('info', 'MCP Server disabled (set MCP_ENABLED=true to enable)'); - return; - } - - if (!mcpApiKey) { - logger.log('warning', 'MCP_API_KEY not set. MCP server will not be able to authenticate with Maxun API.'); - return; - } - - const mcpWorkerPath = path.resolve(__dirname, './mcp-worker.ts'); - const mcpWorkerProcess = fork(mcpWorkerPath, [], { - execArgv: process.env.NODE_ENV === 'production' ? [] : ['--inspect=5861'], - env: { - ...process.env, - MCP_WORKER: 'true' - } - }); - - mcpWorkerProcess.on('message', (message: any) => { - logger.log('info', `MCP Worker message: ${message}`); - }); - - mcpWorkerProcess.on('error', (error: any) => { - logger.log('error', `MCP Worker error: ${error}`); - }); - - mcpWorkerProcess.on('exit', (code: any) => { - logger.log('info', `MCP Worker exited with code: ${code}`); - }); - - logger.log('info', 'MCP Server started with stdio transport in worker process'); - } catch (error: any) { - logger.log('error', `Failed to initialize MCP Server: ${error.message}`); - } -}; - app.use(cookieParser()) app.use('/webhook', webhook); @@ -205,9 +163,7 @@ server.listen(SERVER_PORT, '0.0.0.0', async () => { try { await connectDB(); await syncDB(); - logger.log('info', `Server listening on port ${SERVER_PORT}`); - - await initializeMCPServer(); + logger.log('info', `Server listening on port ${SERVER_PORT}`); } catch (error: any) { logger.log('error', `Failed to connect to the database: ${error.message}`); process.exit(1);