feat: add mcp server

2025-06-12 12:50:22 +05:30
parent 24276c73b8
commit 1034eca200
1 changed files with 623 additions and 0 deletions
--- a/server/src/mcp/index.ts
+++ b/server/src/mcp/index.ts
@@ -0,0 +1,623 @@
+// mcp-server/index.ts
+import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
+import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
+import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
+import { z } from "zod";
+import fetch from 'node-fetch';
+import logger from '../logger';
+
+// Configuration for the MCP server
+interface MaxunMCPConfig {
+  name: string;
+  version: string;
+  maxunApiUrl: string;
+  apiKey: string;
+  transport: 'stdio' | 'http';
+  httpPort?: number;
+}
+
+class MaxunMCPServer {
+  private mcpServer: McpServer;
+  private config: MaxunMCPConfig;
+
+  constructor(config: MaxunMCPConfig) {
+    this.config = config;
+    this.mcpServer = new McpServer({
+      name: config.name,
+      version: config.version
+    });
+
+    this.setupTools();
+    this.setupResources();
+    this.setupPrompts();
+  }
+
+  private async makeApiRequest(endpoint: string, options: any = {}) {
+    const url = `${this.config.maxunApiUrl}${endpoint}`;
+    const headers = {
+      'Content-Type': 'application/json',
+      'x-api-key': this.config.apiKey,
+      ...options.headers
+    };
+
+    const response = await fetch(url, {
+      ...options,
+      headers
+    });
+
+    if (!response.ok) {
+      throw new Error(`API request failed: ${response.status} ${response.statusText}`);
+    }
+
+    return await response.json();
+  }
+
+  private setupTools() {
+    // Tool: List all robots
+    this.mcpServer.tool(
+      "list_robots",
+      {},
+      async () => {
+        try {
+          const data = await this.makeApiRequest('/api/robots');
+          
+          return {
+            content: [{
+              type: "text",
+              text: `Found ${data.robots.totalCount} robots:\n\n${JSON.stringify(data.robots.items, null, 2)}`
+            }]
+          };
+        } catch (error: any) {
+          return {
+            content: [{
+              type: "text",
+              text: `Error fetching robots: ${error.message}`
+            }],
+            isError: true
+          };
+        }
+      }
+    );
+
+    // Tool: Get robot details by ID
+    this.mcpServer.tool(
+      "get_robot",
+      {
+        robot_id: z.string().describe("ID of the robot to get details for")
+      },
+      async ({ robot_id }) => {
+        try {
+          const data = await this.makeApiRequest(`/api/robots/${robot_id}`);
+          
+          return {
+            content: [{
+              type: "text",
+              text: `Robot Details:\n\n${JSON.stringify(data.robot, null, 2)}`
+            }]
+          };
+        } catch (error: any) {
+          return {
+            content: [{
+              type: "text",
+              text: `Error fetching robot: ${error.message}`
+            }],
+            isError: true
+          };
+        }
+      }
+    );
+
+    // Tool: Run a robot and get results
+    this.mcpServer.tool(
+      "run_robot",
+      {
+        robot_id: z.string().describe("ID of the robot to run"),
+        wait_for_completion: z.boolean().default(true).describe("Whether to wait for the run to complete")
+      },
+      async ({ robot_id, wait_for_completion }) => {
+        try {
+          const data = await this.makeApiRequest(`/api/robots/${robot_id}/runs`, {
+            method: 'POST'
+          });
+
+          if (wait_for_completion) {
+            // The API already waits for completion and returns the complete run data
+            const extractedData = data.run.data;
+            const screenshots = data.run.screenshots;
+            
+            let resultText = `Robot run completed successfully!\n\n`;
+            resultText += `Run ID: ${data.run.runId}\n`;
+            resultText += `Status: ${data.run.status}\n`;
+            resultText += `Started: ${data.run.startedAt}\n`;
+            resultText += `Finished: ${data.run.finishedAt}\n\n`;
+
+            if (extractedData.textData && extractedData.textData.length > 0) {
+              resultText += `Extracted Text Data (${extractedData.textData.length} items):\n`;
+              resultText += JSON.stringify(extractedData.textData, null, 2) + '\n\n';
+            }
+
+            if (extractedData.listData && extractedData.listData.length > 0) {
+              resultText += `Extracted List Data (${extractedData.listData.length} items):\n`;
+              resultText += JSON.stringify(extractedData.listData, null, 2) + '\n\n';
+            }
+
+            if (screenshots && screenshots.length > 0) {
+              resultText += `Screenshots captured: ${screenshots.length}\n`;
+              resultText += `Screenshot URLs:\n`;
+              screenshots.forEach((screenshot: any, index: any) => {
+                resultText += `${index + 1}. ${screenshot}\n`;
+              });
+            }
+
+            return {
+              content: [{
+                type: "text",
+                text: resultText
+              }]
+            };
+          } else {
+            return {
+              content: [{
+                type: "text",
+                text: `Robot run started! Run ID: ${data.run.runId}\nStatus: ${data.run.status}`
+              }]
+            };
+          }
+        } catch (error: any) {
+          return {
+            content: [{
+              type: "text",
+              text: `Error running robot: ${error.message}`
+            }],
+            isError: true
+          };
+        }
+      }
+    );
+
+    // Tool: Get all runs for a robot
+    this.mcpServer.tool(
+      "get_robot_runs",
+      {
+        robot_id: z.string().describe("ID of the robot")
+      },
+      async ({ robot_id }) => {
+        try {
+          const data = await this.makeApiRequest(`/api/robots/${robot_id}/runs`);
+          
+          return {
+            content: [{
+              type: "text",
+              text: `Robot runs (${data.runs.totalCount} total):\n\n${JSON.stringify(data.runs.items, null, 2)}`
+            }]
+          };
+        } catch (error: any) {
+          return {
+            content: [{
+              type: "text",
+              text: `Error fetching runs: ${error.message}`
+            }],
+            isError: true
+          };
+        }
+      }
+    );
+
+    // Tool: Get specific run details
+    this.mcpServer.tool(
+      "get_run_details",
+      {
+        robot_id: z.string().describe("ID of the robot"),
+        run_id: z.string().describe("ID of the specific run")
+      },
+      async ({ robot_id, run_id }) => {
+        try {
+          const data = await this.makeApiRequest(`/api/robots/${robot_id}/runs/${run_id}`);
+          
+          const run = data.run;
+          let resultText = `Run Details:\n\n`;
+          resultText += `Run ID: ${run.runId}\n`;
+          resultText += `Status: ${run.status}\n`;
+          resultText += `Robot ID: ${run.robotId}\n`;
+          resultText += `Started: ${run.startedAt}\n`;
+          resultText += `Finished: ${run.finishedAt}\n\n`;
+
+          if (run.data.textData && run.data.textData.length > 0) {
+            resultText += `Extracted Text Data:\n${JSON.stringify(run.data.textData, null, 2)}\n\n`;
+          }
+
+          if (run.data.listData && run.data.listData.length > 0) {
+            resultText += `Extracted List Data:\n${JSON.stringify(run.data.listData, null, 2)}\n\n`;
+          }
+
+          if (run.screenshots && run.screenshots.length > 0) {
+            resultText += `Screenshots:\n`;
+            run.screenshots.forEach((screenshot: any, index: any) => {
+              resultText += `${index + 1}. ${screenshot}\n`;
+            });
+          }
+
+          return {
+            content: [{
+              type: "text",
+              text: resultText
+            }]
+          };
+        } catch (error: any) {
+          return {
+            content: [{
+              type: "text",
+              text: `Error fetching run details: ${error.message}`
+            }],
+            isError: true
+          };
+        }
+      }
+    );
+
+    // Tool: Scrape any website with a one-time robot
+    this.mcpServer.tool(
+      "scrape_website",
+      {
+        url: z.string().url().describe("URL to scrape"),
+        description: z.string().describe("Description of what data to extract"),
+        robot_name: z.string().optional().describe("Optional name for the temporary robot")
+      },
+      async ({ url, description, robot_name }) => {
+        try {
+          // Note: This would require creating a robot first, then running it
+          // Since your API doesn't have a direct scrape endpoint, we'll guide the user
+          const robotName = robot_name || `Temp_Robot_${Date.now()}`;
+          
+          return {
+            content: [{
+              type: "text",
+              text: `To scrape ${url} for "${description}", you would need to:
+
+1. First create a robot using the Maxun web interface at your configured URL
+2. Train the robot to extract the desired data: ${description}
+3. Note the robot ID from the interface
+4. Then use the 'run_robot' tool with that robot ID
+
+Alternatively, you can:
+1. Use 'list_robots' to see existing robots
+2. Find a robot that might work for similar data extraction
+3. Use 'run_robot' with that robot's ID
+
+Robot name suggestion: ${robotName}
+Target URL: ${url}
+Extraction goal: ${description}`
+            }]
+          };
+        } catch (error: any) {
+          return {
+            content: [{
+              type: "text",
+              text: `Error: ${error.message}`
+            }],
+            isError: true
+          };
+        }
+      }
+    );
+
+    // Tool: Get robot performance summary
+    this.mcpServer.tool(
+      "get_robot_summary",
+      {
+        robot_id: z.string().describe("ID of the robot")
+      },
+      async ({ robot_id }) => {
+        try {
+          const [robotData, runsData] = await Promise.all([
+            this.makeApiRequest(`/api/robots/${robot_id}`),
+            this.makeApiRequest(`/api/robots/${robot_id}/runs`)
+          ]);
+
+          const robot = robotData.robot;
+          const runs = runsData.runs.items;
+
+          const successfulRuns = runs.filter((run: any) => run.status === 'success');
+          const failedRuns = runs.filter((run: any) => run.status === 'failed');
+          
+          let totalTextItems = 0;
+          let totalListItems = 0;
+          let totalScreenshots = 0;
+
+          successfulRuns.forEach((run: any) => {
+            if (run.data.textData) totalTextItems += run.data.textData.length;
+            if (run.data.listData) totalListItems += run.data.listData.length;
+            if (run.screenshots) totalScreenshots += run.screenshots.length;
+          });
+
+          const summary = `Robot Performance Summary:
+
+Robot Name: ${robot.name}
+Robot ID: ${robot.id}
+Created: ${robot.createdAt ? new Date(robot.createdAt).toLocaleString() : 'N/A'}
+
+Performance Metrics:
+- Total Runs: ${runs.length}
+- Successful Runs: ${successfulRuns.length}
+- Failed Runs: ${failedRuns.length}
+- Success Rate: ${runs.length > 0 ? ((successfulRuns.length / runs.length) * 100).toFixed(1) : 0}%
+
+Data Extracted:
+- Total Text Items: ${totalTextItems}
+- Total List Items: ${totalListItems}
+- Total Screenshots: ${totalScreenshots}
+- Total Data Points: ${totalTextItems + totalListItems}
+
+Input Parameters:
+${JSON.stringify(robot.inputParameters, null, 2)}`;
+
+          return {
+            content: [{
+              type: "text",
+              text: summary
+            }]
+          };
+        } catch (error: any) {
+          return {
+            content: [{
+              type: "text",
+              text: `Error generating robot summary: ${error.message}`
+            }],
+            isError: true
+          };
+        }
+      }
+    );
+  }
+
+  private setupResources() {
+    // Resource: Get robot data as JSON
+    this.mcpServer.resource(
+      "robot-data",
+      new ResourceTemplate("robot-data://{robot_id}?run_id={run_id}", { 
+        list: undefined 
+      }),
+      async (uri, { robot_id, run_id }) => {
+        if (!robot_id) {
+          throw new Error('robot_id parameter is required');
+        }
+
+        try {
+          let data;
+          if (run_id) {
+            data = await this.makeApiRequest(`/api/robots/${robot_id}/runs/${run_id}`);
+          } else {
+            data = await this.makeApiRequest(`/api/robots/${robot_id}/runs`);
+          }
+          
+          return {
+            contents: [{
+              uri: uri.href,
+              text: JSON.stringify(data, null, 2),
+              mimeType: "application/json"
+            }]
+          };
+        } catch (error: any) {
+          throw new Error(`Error fetching robot data: ${error.message}`);
+        }
+      }
+    );
+
+    // Resource: Get extracted data as CSV format
+    this.mcpServer.resource(
+      "extracted-data-csv",
+      new ResourceTemplate("extracted-data-csv://{robot_id}/{run_id}", { 
+        list: undefined 
+      }),
+      async (uri, { robot_id, run_id }) => {
+        if (!robot_id || !run_id) {
+          throw new Error('Both robot_id and run_id parameters are required');
+        }
+
+        try {
+          const data = await this.makeApiRequest(`/api/robots/${robot_id}/runs/${run_id}`);
+          const run = data.run;
+          
+          // Convert extracted data to CSV format
+          let csvContent = '';
+          
+          if (run.data.textData && run.data.textData.length > 0) {
+            csvContent += 'Type,Data\n';
+            run.data.textData.forEach((item: any) => {
+              csvContent += `"text","${JSON.stringify(item).replace(/"/g, '""')}"\n`;
+            });
+          }
+          
+          if (run.data.listData && run.data.listData.length > 0) {
+            if (csvContent) csvContent += '\n';
+            run.data.listData.forEach((item: any) => {
+              csvContent += `"list","${JSON.stringify(item).replace(/"/g, '""')}"\n`;
+            });
+          }
+          
+          return {
+            contents: [{
+              uri: uri.href,
+              text: csvContent,
+              mimeType: "text/csv"
+            }]
+          };
+        } catch (error: any) {
+          throw new Error(`Error generating CSV: ${error.message}`);
+        }
+      }
+    );
+  }
+
+  private setupPrompts() {
+    // Prompt: Analyze website and suggest scraping strategy
+    this.mcpServer.prompt(
+      "analyze-website-for-scraping",
+      {
+        url: z.string().url().describe("URL of the website to analyze"),
+        target_data: z.string().describe("Description of the data you want to extract")
+      },
+      ({ url, target_data }) => ({
+        messages: [{
+          role: "user",
+          content: {
+            type: "text",
+            text: `Analyze this website for web scraping and provide a strategy:
+
+Website URL: ${url}
+Target Data: ${target_data}
+
+Please help me:
+1. First, use the 'list_robots' tool to see if there are existing robots that might work
+2. If there's a suitable robot, use 'get_robot_summary' to check its performance
+3. If there's a good existing robot, use 'run_robot' to extract the data
+4. If no suitable robot exists, provide detailed instructions for creating a new robot
+
+Focus on:
+- Identifying the best approach for extracting: ${target_data}
+- Recommending specific robots from the available list if applicable
+- Providing step-by-step guidance for the scraping process`
+          }
+        }]
+      })
+    );
+
+    // Prompt: Monitor and analyze robot performance
+    this.mcpServer.prompt(
+      "analyze-robot-performance",
+      {
+        robot_id: z.string().describe("ID of the robot to analyze")
+      },
+      ({ robot_id }) => ({
+        messages: [{
+          role: "user",
+          content: {
+            type: "text",
+            text: `Perform a comprehensive analysis of robot performance:
+
+Robot ID: ${robot_id}
+
+Please:
+1. Use 'get_robot_summary' to get overall performance metrics
+2. Use 'get_robot_runs' to analyze recent run patterns
+3. Identify any performance issues or trends
+4. Suggest optimizations if needed
+5. Provide recommendations for improving success rates
+
+Focus on:
+- Success rate analysis
+- Data extraction efficiency
+- Error patterns
+- Performance trends over time`
+          }
+        }]
+      })
+    );
+
+    // Prompt: Extract and format data
+    this.mcpServer.prompt(
+      "extract-and-format-data",
+      {
+        robot_id: z.string().describe("ID of the robot to use"),
+        output_format: z.enum(["json", "csv", "summary"]).describe("Desired output format")
+      },
+      ({ robot_id, output_format }) => ({
+        messages: [{
+          role: "user",
+          content: {
+            type: "text",
+            text: `Extract data using the specified robot and format the output:
+
+Robot ID: ${robot_id}
+Output Format: ${output_format}
+
+Please:
+1. Use 'run_robot' to execute the data extraction
+2. Format the extracted data according to the requested format
+3. Provide a clean, organized presentation of the results
+4. Include metadata about the extraction (timing, data volume, etc.)
+
+For ${output_format} format:
+${output_format === 'json' ? '- Structure data as clean JSON objects' : 
+  output_format === 'csv' ? '- Format as comma-separated values with headers' :
+  '- Provide a human-readable summary with key insights'}
+
+Ensure the output is ready for immediate use in downstream applications.`
+          }
+        }]
+      })
+    );
+
+    // Prompt: Compare robots for task suitability
+    this.mcpServer.prompt(
+      "compare-robots-for-task",
+      {
+        task_description: z.string().describe("Description of the scraping task"),
+        website_type: z.string().optional().describe("Type of website (e.g., e-commerce, news, social media)")
+      },
+      ({ task_description, website_type }) => ({
+        messages: [{
+          role: "user",
+          content: {
+            type: "text",
+            text: `Help me find the best robot for this scraping task:
+
+Task: ${task_description}
+${website_type ? `Website Type: ${website_type}` : ''}
+
+Please:
+1. Use 'list_robots' to get all available robots
+2. Analyze each robot's capabilities based on their names and parameters
+3. Use 'get_robot_summary' for the most promising candidates
+4. Compare their performance metrics and success rates
+5. Recommend the best robot(s) for this specific task
+
+Consider:
+- Robot specialization and target websites
+- Success rates and reliability
+- Data extraction capabilities
+- Recent performance trends
+
+Provide a ranked recommendation with reasoning for each choice.`
+          }
+        }]
+      })
+    );
+  }
+
+  async start() {
+    try {
+      let transport;
+
+      if (this.config.transport === 'stdio') {
+        transport = new StdioServerTransport();
+        logger.log('info', 'Starting Maxun MCP server with stdio transport');
+      } else {
+        // HTTP transport for web-based MCP clients
+        transport = new StreamableHTTPServerTransport({
+          sessionIdGenerator: undefined // Stateless for simplicity
+        });
+        logger.log('info', `Starting Maxun MCP server with HTTP transport on port ${this.config.httpPort}`);
+      }
+
+      await this.mcpServer.connect(transport);
+      logger.log('info', 'Maxun MCP server connected and ready');
+
+      return transport;
+    } catch (error: any) {
+      logger.log('error', `Failed to start Maxun MCP server: ${error.message}`);
+      throw error;
+    }
+  }
+
+  async stop() {
+    try {
+      await this.mcpServer.close();
+      logger.log('info', 'Maxun MCP server stopped');
+    } catch (error: any) {
+      logger.log('error', `Error stopping Maxun MCP server: ${error.message}`);
+    }
+  }
+}
+
+export default MaxunMCPServer;