feat: add mcp server

This commit is contained in:
Rohit
2025-06-12 12:50:22 +05:30
parent 24276c73b8
commit 1034eca200

623
server/src/mcp/index.ts Normal file
View File

@@ -0,0 +1,623 @@
// mcp-server/index.ts
import { McpServer, ResourceTemplate } from "@modelcontextprotocol/sdk/server/mcp.js";
import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js";
import { StreamableHTTPServerTransport } from "@modelcontextprotocol/sdk/server/streamableHttp.js";
import { z } from "zod";
import fetch from 'node-fetch';
import logger from '../logger';
// Configuration for the MCP server
interface MaxunMCPConfig {
name: string;
version: string;
maxunApiUrl: string;
apiKey: string;
transport: 'stdio' | 'http';
httpPort?: number;
}
class MaxunMCPServer {
private mcpServer: McpServer;
private config: MaxunMCPConfig;
constructor(config: MaxunMCPConfig) {
this.config = config;
this.mcpServer = new McpServer({
name: config.name,
version: config.version
});
this.setupTools();
this.setupResources();
this.setupPrompts();
}
private async makeApiRequest(endpoint: string, options: any = {}) {
const url = `${this.config.maxunApiUrl}${endpoint}`;
const headers = {
'Content-Type': 'application/json',
'x-api-key': this.config.apiKey,
...options.headers
};
const response = await fetch(url, {
...options,
headers
});
if (!response.ok) {
throw new Error(`API request failed: ${response.status} ${response.statusText}`);
}
return await response.json();
}
private setupTools() {
// Tool: List all robots
this.mcpServer.tool(
"list_robots",
{},
async () => {
try {
const data = await this.makeApiRequest('/api/robots');
return {
content: [{
type: "text",
text: `Found ${data.robots.totalCount} robots:\n\n${JSON.stringify(data.robots.items, null, 2)}`
}]
};
} catch (error: any) {
return {
content: [{
type: "text",
text: `Error fetching robots: ${error.message}`
}],
isError: true
};
}
}
);
// Tool: Get robot details by ID
this.mcpServer.tool(
"get_robot",
{
robot_id: z.string().describe("ID of the robot to get details for")
},
async ({ robot_id }) => {
try {
const data = await this.makeApiRequest(`/api/robots/${robot_id}`);
return {
content: [{
type: "text",
text: `Robot Details:\n\n${JSON.stringify(data.robot, null, 2)}`
}]
};
} catch (error: any) {
return {
content: [{
type: "text",
text: `Error fetching robot: ${error.message}`
}],
isError: true
};
}
}
);
// Tool: Run a robot and get results
this.mcpServer.tool(
"run_robot",
{
robot_id: z.string().describe("ID of the robot to run"),
wait_for_completion: z.boolean().default(true).describe("Whether to wait for the run to complete")
},
async ({ robot_id, wait_for_completion }) => {
try {
const data = await this.makeApiRequest(`/api/robots/${robot_id}/runs`, {
method: 'POST'
});
if (wait_for_completion) {
// The API already waits for completion and returns the complete run data
const extractedData = data.run.data;
const screenshots = data.run.screenshots;
let resultText = `Robot run completed successfully!\n\n`;
resultText += `Run ID: ${data.run.runId}\n`;
resultText += `Status: ${data.run.status}\n`;
resultText += `Started: ${data.run.startedAt}\n`;
resultText += `Finished: ${data.run.finishedAt}\n\n`;
if (extractedData.textData && extractedData.textData.length > 0) {
resultText += `Extracted Text Data (${extractedData.textData.length} items):\n`;
resultText += JSON.stringify(extractedData.textData, null, 2) + '\n\n';
}
if (extractedData.listData && extractedData.listData.length > 0) {
resultText += `Extracted List Data (${extractedData.listData.length} items):\n`;
resultText += JSON.stringify(extractedData.listData, null, 2) + '\n\n';
}
if (screenshots && screenshots.length > 0) {
resultText += `Screenshots captured: ${screenshots.length}\n`;
resultText += `Screenshot URLs:\n`;
screenshots.forEach((screenshot: any, index: any) => {
resultText += `${index + 1}. ${screenshot}\n`;
});
}
return {
content: [{
type: "text",
text: resultText
}]
};
} else {
return {
content: [{
type: "text",
text: `Robot run started! Run ID: ${data.run.runId}\nStatus: ${data.run.status}`
}]
};
}
} catch (error: any) {
return {
content: [{
type: "text",
text: `Error running robot: ${error.message}`
}],
isError: true
};
}
}
);
// Tool: Get all runs for a robot
this.mcpServer.tool(
"get_robot_runs",
{
robot_id: z.string().describe("ID of the robot")
},
async ({ robot_id }) => {
try {
const data = await this.makeApiRequest(`/api/robots/${robot_id}/runs`);
return {
content: [{
type: "text",
text: `Robot runs (${data.runs.totalCount} total):\n\n${JSON.stringify(data.runs.items, null, 2)}`
}]
};
} catch (error: any) {
return {
content: [{
type: "text",
text: `Error fetching runs: ${error.message}`
}],
isError: true
};
}
}
);
// Tool: Get specific run details
this.mcpServer.tool(
"get_run_details",
{
robot_id: z.string().describe("ID of the robot"),
run_id: z.string().describe("ID of the specific run")
},
async ({ robot_id, run_id }) => {
try {
const data = await this.makeApiRequest(`/api/robots/${robot_id}/runs/${run_id}`);
const run = data.run;
let resultText = `Run Details:\n\n`;
resultText += `Run ID: ${run.runId}\n`;
resultText += `Status: ${run.status}\n`;
resultText += `Robot ID: ${run.robotId}\n`;
resultText += `Started: ${run.startedAt}\n`;
resultText += `Finished: ${run.finishedAt}\n\n`;
if (run.data.textData && run.data.textData.length > 0) {
resultText += `Extracted Text Data:\n${JSON.stringify(run.data.textData, null, 2)}\n\n`;
}
if (run.data.listData && run.data.listData.length > 0) {
resultText += `Extracted List Data:\n${JSON.stringify(run.data.listData, null, 2)}\n\n`;
}
if (run.screenshots && run.screenshots.length > 0) {
resultText += `Screenshots:\n`;
run.screenshots.forEach((screenshot: any, index: any) => {
resultText += `${index + 1}. ${screenshot}\n`;
});
}
return {
content: [{
type: "text",
text: resultText
}]
};
} catch (error: any) {
return {
content: [{
type: "text",
text: `Error fetching run details: ${error.message}`
}],
isError: true
};
}
}
);
// Tool: Scrape any website with a one-time robot
this.mcpServer.tool(
"scrape_website",
{
url: z.string().url().describe("URL to scrape"),
description: z.string().describe("Description of what data to extract"),
robot_name: z.string().optional().describe("Optional name for the temporary robot")
},
async ({ url, description, robot_name }) => {
try {
// Note: This would require creating a robot first, then running it
// Since your API doesn't have a direct scrape endpoint, we'll guide the user
const robotName = robot_name || `Temp_Robot_${Date.now()}`;
return {
content: [{
type: "text",
text: `To scrape ${url} for "${description}", you would need to:
1. First create a robot using the Maxun web interface at your configured URL
2. Train the robot to extract the desired data: ${description}
3. Note the robot ID from the interface
4. Then use the 'run_robot' tool with that robot ID
Alternatively, you can:
1. Use 'list_robots' to see existing robots
2. Find a robot that might work for similar data extraction
3. Use 'run_robot' with that robot's ID
Robot name suggestion: ${robotName}
Target URL: ${url}
Extraction goal: ${description}`
}]
};
} catch (error: any) {
return {
content: [{
type: "text",
text: `Error: ${error.message}`
}],
isError: true
};
}
}
);
// Tool: Get robot performance summary
this.mcpServer.tool(
"get_robot_summary",
{
robot_id: z.string().describe("ID of the robot")
},
async ({ robot_id }) => {
try {
const [robotData, runsData] = await Promise.all([
this.makeApiRequest(`/api/robots/${robot_id}`),
this.makeApiRequest(`/api/robots/${robot_id}/runs`)
]);
const robot = robotData.robot;
const runs = runsData.runs.items;
const successfulRuns = runs.filter((run: any) => run.status === 'success');
const failedRuns = runs.filter((run: any) => run.status === 'failed');
let totalTextItems = 0;
let totalListItems = 0;
let totalScreenshots = 0;
successfulRuns.forEach((run: any) => {
if (run.data.textData) totalTextItems += run.data.textData.length;
if (run.data.listData) totalListItems += run.data.listData.length;
if (run.screenshots) totalScreenshots += run.screenshots.length;
});
const summary = `Robot Performance Summary:
Robot Name: ${robot.name}
Robot ID: ${robot.id}
Created: ${robot.createdAt ? new Date(robot.createdAt).toLocaleString() : 'N/A'}
Performance Metrics:
- Total Runs: ${runs.length}
- Successful Runs: ${successfulRuns.length}
- Failed Runs: ${failedRuns.length}
- Success Rate: ${runs.length > 0 ? ((successfulRuns.length / runs.length) * 100).toFixed(1) : 0}%
Data Extracted:
- Total Text Items: ${totalTextItems}
- Total List Items: ${totalListItems}
- Total Screenshots: ${totalScreenshots}
- Total Data Points: ${totalTextItems + totalListItems}
Input Parameters:
${JSON.stringify(robot.inputParameters, null, 2)}`;
return {
content: [{
type: "text",
text: summary
}]
};
} catch (error: any) {
return {
content: [{
type: "text",
text: `Error generating robot summary: ${error.message}`
}],
isError: true
};
}
}
);
}
private setupResources() {
// Resource: Get robot data as JSON
this.mcpServer.resource(
"robot-data",
new ResourceTemplate("robot-data://{robot_id}?run_id={run_id}", {
list: undefined
}),
async (uri, { robot_id, run_id }) => {
if (!robot_id) {
throw new Error('robot_id parameter is required');
}
try {
let data;
if (run_id) {
data = await this.makeApiRequest(`/api/robots/${robot_id}/runs/${run_id}`);
} else {
data = await this.makeApiRequest(`/api/robots/${robot_id}/runs`);
}
return {
contents: [{
uri: uri.href,
text: JSON.stringify(data, null, 2),
mimeType: "application/json"
}]
};
} catch (error: any) {
throw new Error(`Error fetching robot data: ${error.message}`);
}
}
);
// Resource: Get extracted data as CSV format
this.mcpServer.resource(
"extracted-data-csv",
new ResourceTemplate("extracted-data-csv://{robot_id}/{run_id}", {
list: undefined
}),
async (uri, { robot_id, run_id }) => {
if (!robot_id || !run_id) {
throw new Error('Both robot_id and run_id parameters are required');
}
try {
const data = await this.makeApiRequest(`/api/robots/${robot_id}/runs/${run_id}`);
const run = data.run;
// Convert extracted data to CSV format
let csvContent = '';
if (run.data.textData && run.data.textData.length > 0) {
csvContent += 'Type,Data\n';
run.data.textData.forEach((item: any) => {
csvContent += `"text","${JSON.stringify(item).replace(/"/g, '""')}"\n`;
});
}
if (run.data.listData && run.data.listData.length > 0) {
if (csvContent) csvContent += '\n';
run.data.listData.forEach((item: any) => {
csvContent += `"list","${JSON.stringify(item).replace(/"/g, '""')}"\n`;
});
}
return {
contents: [{
uri: uri.href,
text: csvContent,
mimeType: "text/csv"
}]
};
} catch (error: any) {
throw new Error(`Error generating CSV: ${error.message}`);
}
}
);
}
private setupPrompts() {
// Prompt: Analyze website and suggest scraping strategy
this.mcpServer.prompt(
"analyze-website-for-scraping",
{
url: z.string().url().describe("URL of the website to analyze"),
target_data: z.string().describe("Description of the data you want to extract")
},
({ url, target_data }) => ({
messages: [{
role: "user",
content: {
type: "text",
text: `Analyze this website for web scraping and provide a strategy:
Website URL: ${url}
Target Data: ${target_data}
Please help me:
1. First, use the 'list_robots' tool to see if there are existing robots that might work
2. If there's a suitable robot, use 'get_robot_summary' to check its performance
3. If there's a good existing robot, use 'run_robot' to extract the data
4. If no suitable robot exists, provide detailed instructions for creating a new robot
Focus on:
- Identifying the best approach for extracting: ${target_data}
- Recommending specific robots from the available list if applicable
- Providing step-by-step guidance for the scraping process`
}
}]
})
);
// Prompt: Monitor and analyze robot performance
this.mcpServer.prompt(
"analyze-robot-performance",
{
robot_id: z.string().describe("ID of the robot to analyze")
},
({ robot_id }) => ({
messages: [{
role: "user",
content: {
type: "text",
text: `Perform a comprehensive analysis of robot performance:
Robot ID: ${robot_id}
Please:
1. Use 'get_robot_summary' to get overall performance metrics
2. Use 'get_robot_runs' to analyze recent run patterns
3. Identify any performance issues or trends
4. Suggest optimizations if needed
5. Provide recommendations for improving success rates
Focus on:
- Success rate analysis
- Data extraction efficiency
- Error patterns
- Performance trends over time`
}
}]
})
);
// Prompt: Extract and format data
this.mcpServer.prompt(
"extract-and-format-data",
{
robot_id: z.string().describe("ID of the robot to use"),
output_format: z.enum(["json", "csv", "summary"]).describe("Desired output format")
},
({ robot_id, output_format }) => ({
messages: [{
role: "user",
content: {
type: "text",
text: `Extract data using the specified robot and format the output:
Robot ID: ${robot_id}
Output Format: ${output_format}
Please:
1. Use 'run_robot' to execute the data extraction
2. Format the extracted data according to the requested format
3. Provide a clean, organized presentation of the results
4. Include metadata about the extraction (timing, data volume, etc.)
For ${output_format} format:
${output_format === 'json' ? '- Structure data as clean JSON objects' :
output_format === 'csv' ? '- Format as comma-separated values with headers' :
'- Provide a human-readable summary with key insights'}
Ensure the output is ready for immediate use in downstream applications.`
}
}]
})
);
// Prompt: Compare robots for task suitability
this.mcpServer.prompt(
"compare-robots-for-task",
{
task_description: z.string().describe("Description of the scraping task"),
website_type: z.string().optional().describe("Type of website (e.g., e-commerce, news, social media)")
},
({ task_description, website_type }) => ({
messages: [{
role: "user",
content: {
type: "text",
text: `Help me find the best robot for this scraping task:
Task: ${task_description}
${website_type ? `Website Type: ${website_type}` : ''}
Please:
1. Use 'list_robots' to get all available robots
2. Analyze each robot's capabilities based on their names and parameters
3. Use 'get_robot_summary' for the most promising candidates
4. Compare their performance metrics and success rates
5. Recommend the best robot(s) for this specific task
Consider:
- Robot specialization and target websites
- Success rates and reliability
- Data extraction capabilities
- Recent performance trends
Provide a ranked recommendation with reasoning for each choice.`
}
}]
})
);
}
async start() {
try {
let transport;
if (this.config.transport === 'stdio') {
transport = new StdioServerTransport();
logger.log('info', 'Starting Maxun MCP server with stdio transport');
} else {
// HTTP transport for web-based MCP clients
transport = new StreamableHTTPServerTransport({
sessionIdGenerator: undefined // Stateless for simplicity
});
logger.log('info', `Starting Maxun MCP server with HTTP transport on port ${this.config.httpPort}`);
}
await this.mcpServer.connect(transport);
logger.log('info', 'Maxun MCP server connected and ready');
return transport;
} catch (error: any) {
logger.log('error', `Failed to start Maxun MCP server: ${error.message}`);
throw error;
}
}
async stop() {
try {
await this.mcpServer.close();
logger.log('info', 'Maxun MCP server stopped');
} catch (error: any) {
logger.log('error', `Error stopping Maxun MCP server: ${error.message}`);
}
}
}
export default MaxunMCPServer;