Files
parcer/server/src/api/record.ts

1104 lines
36 KiB
TypeScript
Raw Normal View History

2024-09-26 22:39:39 +05:30
import { Router, Request, Response } from 'express';
2024-11-22 23:04:03 +05:30
import { chromium } from "playwright-extra";
import stealthPlugin from 'puppeteer-extra-plugin-stealth';
2024-09-26 22:41:46 +05:30
import { requireAPIKey } from "../middlewares/api";
2024-10-10 05:47:42 +05:30
import Robot from "../models/Robot";
import Run from "../models/Run";
import { getDecryptedProxyConfig } from "../routes/proxy";
2025-05-28 14:22:43 +05:30
import { v4 as uuid } from "uuid";
import { createRemoteBrowserForRun, destroyRemoteBrowser } from "../browser-management/controller";
import logger from "../logger";
2025-10-21 00:43:08 +05:30
import { browserPool, io as serverIo } from "../server";
2024-10-12 15:55:00 +05:30
import { io, Socket } from "socket.io-client";
2024-10-15 22:17:16 +05:30
import { BinaryOutputService } from "../storage/mino";
2024-10-24 22:26:12 +05:30
import { AuthenticatedRequest } from "../routes/record"
2024-10-29 03:37:59 +05:30
import {capture} from "../utils/analytics";
2024-12-08 18:07:40 +05:30
import { Page } from "playwright";
import { WorkflowFile } from "maxun-core";
import { googleSheetUpdateTasks, processGoogleSheetUpdates } from "../workflow-management/integrations/gsheet";
import { airtableUpdateTasks, processAirtableUpdates } from "../workflow-management/integrations/airtable";
2025-05-27 23:46:19 +05:30
import { sendWebhook } from "../routes/webhook";
2025-10-21 00:43:08 +05:30
2024-11-22 23:04:03 +05:30
chromium.use(stealthPlugin());
2024-09-26 22:39:39 +05:30
2025-10-21 00:43:08 +05:30
const router = Router();
2024-09-26 22:39:39 +05:30
const formatRecording = (recordingData: any) => {
const recordingMeta = recordingData.recording_meta;
const workflow = recordingData.recording.workflow || [];
const firstWorkflowStep = workflow[0]?.where?.url || '';
2024-09-26 22:39:57 +05:30
2024-09-26 22:39:39 +05:30
const inputParameters = [
2024-09-26 22:39:57 +05:30
{
type: "string",
name: "originUrl",
label: "Origin URL",
required: true,
defaultValue: firstWorkflowStep,
},
2024-09-26 22:39:39 +05:30
];
2024-09-26 22:39:57 +05:30
2024-09-26 22:39:39 +05:30
return {
2024-09-26 22:39:57 +05:30
id: recordingMeta.id,
name: recordingMeta.name,
2024-10-08 20:50:17 +05:30
createdAt: new Date(recordingMeta.createdAt).getTime(),
2024-09-26 22:39:57 +05:30
inputParameters,
2024-09-26 22:39:39 +05:30
};
2024-09-26 22:39:57 +05:30
};
2024-10-28 23:27:24 +05:30
/**
* @swagger
* /api/robots:
* get:
* summary: Get all robots
* description: Retrieve a list of all robots.
* security:
* - api_key: []
* responses:
* 200:
* description: A list of robots.
* content:
* application/json:
* schema:
* type: object
* properties:
* statusCode:
* type: integer
* example: 200
* messageCode:
* type: string
* example: success
* robots:
* type: object
* properties:
* totalCount:
* type: integer
* example: 5
* items:
* type: array
* items:
* type: object
* properties:
* id:
* type: string
* example: "12345"
* name:
* type: string
* example: "Sample Robot"
* 500:
* description: Error retrieving robots.
* content:
* application/json:
* schema:
* type: object
* properties:
* statusCode:
* type: integer
* example: 500
* messageCode:
* type: string
* example: error
* message:
* type: string
2024-10-29 00:05:45 +05:30
* example: "Failed to retrieve robots"
2024-10-28 23:27:24 +05:30
*/
router.get("/robots", requireAPIKey, async (req: Request, res: Response) => {
2024-09-26 22:39:39 +05:30
try {
2024-10-10 06:03:56 +05:30
const robots = await Robot.findAll({ raw: true });
2024-10-10 05:57:49 +05:30
const formattedRecordings = robots.map(formatRecording);
2024-09-26 22:39:57 +05:30
const response = {
statusCode: 200,
messageCode: "success",
robots: {
totalCount: formattedRecordings.length,
items: formattedRecordings,
},
};
res.status(200).json(response);
2024-09-26 22:39:39 +05:30
} catch (error) {
2024-10-29 00:05:45 +05:30
console.error("Error fetching robots:", error);
2024-09-26 22:39:57 +05:30
res.status(500).json({
statusCode: 500,
messageCode: "error",
2024-10-29 00:05:45 +05:30
message: "Failed to retrieve robots",
2024-09-26 22:39:57 +05:30
});
2024-09-26 22:39:39 +05:30
}
2024-09-26 22:39:57 +05:30
});
2024-09-26 22:56:44 +05:30
const formatRecordingById = (recordingData: any) => {
const recordingMeta = recordingData.recording_meta;
const workflow = recordingData.recording.workflow || [];
const firstWorkflowStep = workflow[0]?.where?.url || '';
2024-09-26 22:57:07 +05:30
2024-09-26 22:56:44 +05:30
const inputParameters = [
2024-09-26 22:57:07 +05:30
{
type: "string",
name: "originUrl",
label: "Origin URL",
required: true,
defaultValue: firstWorkflowStep,
},
2024-09-26 22:56:44 +05:30
];
2024-09-26 22:57:07 +05:30
2024-09-26 22:56:44 +05:30
return {
2024-09-26 22:57:07 +05:30
id: recordingMeta.id,
name: recordingMeta.name,
2024-10-08 20:50:17 +05:30
createdAt: new Date(recordingMeta.createdAt).getTime(),
2024-09-26 22:57:07 +05:30
inputParameters,
2024-09-26 22:56:44 +05:30
};
2024-09-26 22:57:07 +05:30
};
2024-09-26 22:56:44 +05:30
2024-10-28 23:27:24 +05:30
/**
* @swagger
* /api/robots/{id}:
* get:
* summary: Get robot by ID
* description: Retrieve a robot by its ID.
* security:
* - api_key: []
* parameters:
* - in: path
* name: id
* schema:
* type: string
* required: true
* description: The ID of the robot to retrieve.
* responses:
* 200:
* description: Robot details.
* content:
* application/json:
* schema:
* type: object
* properties:
* statusCode:
* type: integer
* example: 200
* messageCode:
* type: string
* example: success
* robot:
* type: object
* properties:
* id:
* type: string
* example: "12345"
* name:
* type: string
* example: "Sample Robot"
* 404:
* description: Robot not found.
* content:
* application/json:
* schema:
* type: object
* properties:
* statusCode:
* type: integer
* example: 404
* messageCode:
* type: string
* example: not_found
* message:
* type: string
* example: "Recording with ID not found."
*/
2024-10-10 06:08:11 +05:30
router.get("/robots/:id", requireAPIKey, async (req: Request, res: Response) => {
2024-09-26 22:56:44 +05:30
try {
2024-10-10 06:08:11 +05:30
const robot = await Robot.findOne({
where: {
2024-10-10 06:08:26 +05:30
'recording_meta.id': req.params.id
2024-10-10 06:08:11 +05:30
},
raw: true
2024-10-10 06:08:26 +05:30
});
2024-09-26 22:57:07 +05:30
2024-10-10 06:08:11 +05:30
const formattedRecording = formatRecordingById(robot);
2024-09-26 22:57:07 +05:30
const response = {
statusCode: 200,
messageCode: "success",
robot: formattedRecording,
};
res.status(200).json(response);
2024-09-26 22:56:44 +05:30
} catch (error) {
2024-10-29 00:05:45 +05:30
console.error("Error fetching robot:", error);
2024-09-26 22:57:07 +05:30
res.status(404).json({
statusCode: 404,
messageCode: "not_found",
2024-10-29 00:05:45 +05:30
message: `Robot with ID "${req.params.id}" not found.`,
2024-09-26 22:57:07 +05:30
});
2024-09-26 22:56:44 +05:30
}
2024-09-26 22:57:07 +05:30
});
2024-10-28 23:27:24 +05:30
/**
* @swagger
* /api/robots/{id}/runs:
* get:
* summary: Get all runs for a robot
* description: Retrieve all runs associated with a specific robot.
* security:
* - api_key: []
* parameters:
* - in: path
* name: id
* schema:
* type: string
* required: true
* description: The ID of the robot.
* responses:
* 200:
* description: A list of runs for the robot.
* content:
* application/json:
* schema:
* type: object
* properties:
* statusCode:
* type: integer
* example: 200
* messageCode:
* type: string
* example: success
* runs:
* type: object
* properties:
* totalCount:
* type: integer
* example: 5
* items:
* type: array
* items:
* type: object
* properties:
* runId:
* type: string
* example: "67890"
* status:
* type: string
* example: "completed"
* 500:
* description: Error retrieving runs.
* content:
* application/json:
* schema:
* type: object
* properties:
* statusCode:
* type: integer
* example: 500
* messageCode:
* type: string
* example: error
* message:
* type: string
* example: "Failed to retrieve runs"
*/
router.get("/robots/:id/runs",requireAPIKey, async (req: Request, res: Response) => {
try {
const runs = await Run.findAll({
where: {
2024-10-10 06:18:42 +05:30
robotMetaId: req.params.id
},
raw: true
});
2024-10-30 03:52:15 +05:30
const formattedRuns = runs.map(formatRunResponse);
const response = {
statusCode: 200,
messageCode: "success",
runs: {
2024-10-30 03:52:15 +05:30
totalCount: formattedRuns.length,
items: formattedRuns,
},
};
res.status(200).json(response);
} catch (error) {
console.error("Error fetching runs:", error);
res.status(500).json({
statusCode: 500,
messageCode: "error",
message: "Failed to retrieve runs",
});
}
}
);
2024-10-30 03:21:45 +05:30
function formatRunResponse(run: any) {
const formattedRun = {
id: run.id,
status: run.status,
name: run.name,
2025-10-21 00:43:08 +05:30
robotId: run.robotMetaId,
2024-10-30 03:21:45 +05:30
startedAt: run.startedAt,
finishedAt: run.finishedAt,
runId: run.runId,
runByUserId: run.runByUserId,
runByScheduleId: run.runByScheduleId,
runByAPI: run.runByAPI,
data: {
2025-10-21 00:43:08 +05:30
textData: {},
listData: {},
markdown: ''
},
screenshots: [] as any[],
2024-10-30 03:21:45 +05:30
};
2025-10-21 00:43:08 +05:30
const output = run.serializableOutput || {};
2025-10-21 00:43:08 +05:30
if (output.scrapeSchema && typeof output.scrapeSchema === 'object') {
formattedRun.data.textData = output.scrapeSchema;
}
if (output.scrapeList && typeof output.scrapeList === 'object') {
formattedRun.data.listData = output.scrapeList;
}
if (output.markdown && Array.isArray(output.markdown)) {
formattedRun.data.markdown = output.markdown[0]?.content || '';
}
if (run.binaryOutput) {
Object.keys(run.binaryOutput).forEach(key => {
if (run.binaryOutput[key]) {
formattedRun.screenshots.push(run.binaryOutput[key]);
}
});
2024-10-30 03:21:45 +05:30
}
return formattedRun;
}
2024-10-28 23:27:24 +05:30
/**
* @swagger
* /api/robots/{id}/runs/{runId}:
* get:
* summary: Get a specific run by ID for a robot
* description: Retrieve details of a specific run by its ID.
* security:
* - api_key: []
* parameters:
* - in: path
* name: id
* schema:
* type: string
* required: true
* description: The ID of the robot.
* - in: path
* name: runId
* schema:
* type: string
* required: true
* description: The ID of the run.
* responses:
* 200:
* description: Run details.
* content:
* application/json:
* schema:
* type: object
* properties:
* statusCode:
* type: integer
* example: 200
* messageCode:
* type: string
* example: success
* run:
* type: object
* properties:
* runId:
* type: string
* example: "67890"
* status:
* type: string
* example: "completed"
* 404:
* description: Run not found.
* content:
* application/json:
* schema:
* type: object
* properties:
* statusCode:
* type: integer
* example: 404
* messageCode:
* type: string
* example: not_found
* message:
* type: string
* example: "Run with id not found."
*/
router.get("/robots/:id/runs/:runId", requireAPIKey, async (req: Request, res: Response) => {
try {
const run = await Run.findOne({
where: {
2024-10-10 06:22:53 +05:30
runId: req.params.runId,
robotMetaId: req.params.id,
},
raw: true
});
2024-10-12 15:55:43 +05:30
const response = {
statusCode: 200,
messageCode: "success",
2024-10-30 03:21:45 +05:30
run: formatRunResponse(run),
};
res.status(200).json(response);
} catch (error) {
console.error("Error fetching run:", error);
res.status(404).json({
statusCode: 404,
messageCode: "not_found",
2024-10-10 06:24:04 +05:30
message: `Run with id "${req.params.runId}" for robot with id "${req.params.id}" not found.`,
});
}
});
async function createWorkflowAndStoreMetadata(id: string, userId: string) {
try {
2024-10-12 22:20:06 +05:30
const recording = await Robot.findOne({
where: {
'recording_meta.id': id
},
raw: true
});
2024-10-12 22:20:06 +05:30
if (!recording || !recording.recording_meta || !recording.recording_meta.id) {
return {
success: false,
error: 'Recording not found'
};
}
2024-10-12 22:20:06 +05:30
const proxyConfig = await getDecryptedProxyConfig(userId);
let proxyOptions: any = {};
2024-10-12 22:20:06 +05:30
if (proxyConfig.proxy_url) {
proxyOptions = {
server: proxyConfig.proxy_url,
...(proxyConfig.proxy_username && proxyConfig.proxy_password && {
username: proxyConfig.proxy_username,
password: proxyConfig.proxy_password,
}),
};
}
2024-11-03 02:59:30 +05:30
const browserId = createRemoteBrowserForRun(userId);
const runId = uuid();
const run = await Run.create({
2024-10-28 23:27:24 +05:30
status: 'running',
name: recording.recording_meta.name,
robotId: recording.id,
robotMetaId: recording.recording_meta.id,
startedAt: new Date().toLocaleString(),
finishedAt: '',
browserId,
interpreterSettings: { maxConcurrency: 1, maxRepeats: 1, debug: true },
log: '',
runId,
2024-10-21 19:09:02 +05:30
runByAPI: true,
serializableOutput: {},
binaryOutput: {},
2025-10-21 00:43:08 +05:30
retryCount: 0
});
const plainRun = run.toJSON();
2025-10-21 00:43:08 +05:30
try {
const runStartedData = {
runId: plainRun.runId,
robotMetaId: plainRun.robotMetaId,
robotName: plainRun.name,
status: 'running',
startedAt: plainRun.startedAt,
runByUserId: plainRun.runByUserId,
runByScheduleId: plainRun.runByScheduleId,
runByAPI: plainRun.runByAPI || false,
browserId: plainRun.browserId
};
serverIo.of('/queued-run').to(`user-${userId}`).emit('run-started', runStartedData);
logger.log('info', `API run started notification sent for run: ${plainRun.runId} to user-${userId}`);
} catch (socketError: any) {
logger.log('warn', `Failed to send run-started notification for API run ${plainRun.runId}: ${socketError.message}`);
}
return {
browserId,
runId: plainRun.runId,
}
} catch (e) {
const { message } = e as Error;
logger.log('info', `Error while scheduling a run with id: ${id}`);
2024-11-19 16:57:42 +05:30
console.log(`Error scheduling run:`, message);
return {
success: false,
error: message,
};
}
}
2025-10-21 00:43:08 +05:30
async function triggerIntegrationUpdates(runId: string, robotMetaId: string): Promise<void> {
try {
googleSheetUpdateTasks[runId] = {
robotId: robotMetaId,
runId: runId,
status: 'pending',
retries: 5,
};
airtableUpdateTasks[runId] = {
robotId: robotMetaId,
runId: runId,
status: 'pending',
retries: 5,
};
processAirtableUpdates().catch(err => logger.log('error', `Airtable update error: ${err.message}`));
processGoogleSheetUpdates().catch(err => logger.log('error', `Google Sheets update error: ${err.message}`));
} catch (err: any) {
logger.log('error', `Failed to update integrations for run: ${runId}: ${err.message}`);
}
}
2025-03-05 21:27:08 +05:30
async function readyForRunHandler(browserId: string, id: string, userId: string){
2024-10-12 15:52:26 +05:30
try {
2025-03-05 21:27:08 +05:30
const result = await executeRun(id, userId);
2024-10-12 15:55:43 +05:30
if (result && result.success) {
2024-10-12 15:55:43 +05:30
logger.log('info', `Interpretation of ${id} succeeded`);
2024-10-12 22:19:38 +05:30
resetRecordingState(browserId, id);
return result.interpretationInfo;
2024-10-12 15:55:43 +05:30
} else {
logger.log('error', `Interpretation of ${id} failed`);
await destroyRemoteBrowser(browserId, userId);
2024-10-12 22:19:38 +05:30
resetRecordingState(browserId, id);
return null;
2024-10-12 15:55:43 +05:30
}
2024-10-12 15:52:26 +05:30
} catch (error: any) {
2024-10-12 15:55:43 +05:30
logger.error(`Error during readyForRunHandler: ${error.message}`);
await destroyRemoteBrowser(browserId, userId);
return null;
2024-10-12 15:52:26 +05:30
}
2024-10-12 15:55:43 +05:30
}
2024-10-12 15:52:26 +05:30
2024-10-12 15:55:43 +05:30
function resetRecordingState(browserId: string, id: string) {
2024-10-12 15:53:13 +05:30
browserId = '';
id = '';
2024-10-12 15:55:43 +05:30
}
2024-10-12 15:53:13 +05:30
2024-12-08 18:07:40 +05:30
function AddGeneratedFlags(workflow: WorkflowFile) {
const copy = JSON.parse(JSON.stringify(workflow));
for (let i = 0; i < workflow.workflow.length; i++) {
copy.workflow[i].what.unshift({
action: 'flag',
args: ['generated'],
});
}
return copy;
};
2025-03-05 21:27:08 +05:30
async function executeRun(id: string, userId: string) {
2025-10-21 00:43:08 +05:30
let browser: any = null;
try {
2024-10-12 15:55:43 +05:30
const run = await Run.findOne({ where: { runId: id } });
if (!run) {
return {
success: false,
error: 'Run not found'
};
}
2024-10-12 15:55:43 +05:30
const plainRun = run.toJSON();
2025-10-21 00:43:08 +05:30
if (run.status === 'aborted' || run.status === 'aborting') {
logger.log('info', `API Run ${id} has status ${run.status}, skipping execution`);
return { success: true };
}
if (run.status === 'queued') {
logger.log('info', `API Run ${id} has status 'queued', skipping stale execution - will be handled by recovery`);
return { success: true };
}
const retryCount = plainRun.retryCount || 0;
if (retryCount >= 3) {
logger.log('warn', `API Run ${id} has exceeded max retries (${retryCount}/3), marking as failed`);
await run.update({
status: 'failed',
finishedAt: new Date().toLocaleString(),
log: `Max retries exceeded (${retryCount}/3) - Run permanently failed`
});
return { success: false, error: 'Max retries exceeded' };
}
2024-10-12 15:55:43 +05:30
const recording = await Robot.findOne({ where: { 'recording_meta.id': plainRun.robotMetaId }, raw: true });
if (!recording) {
return {
success: false,
error: 'Recording not found'
};
2024-10-12 15:55:43 +05:30
}
if (recording.recording_meta.type === 'markdown') {
logger.log('info', `Executing markdown robot for API run ${id}`);
await run.update({
status: 'running',
log: 'Converting page to markdown'
});
try {
const { convertPageToMarkdown } = await import('../markdownify/scrape');
const url = recording.recording_meta.url;
if (!url) {
throw new Error('No URL specified for markdown robot');
}
const markdown = await convertPageToMarkdown(url);
await run.update({
status: 'success',
finishedAt: new Date().toLocaleString(),
log: 'Markdown conversion completed successfully',
serializableOutput: {
markdown: [{ content: markdown }]
},
binaryOutput: {},
});
logger.log('info', `Markdown robot execution completed for API run ${id}`);
try {
const completionData = {
runId: plainRun.runId,
robotMetaId: plainRun.robotMetaId,
robotName: recording.recording_meta.name,
status: 'success',
finishedAt: new Date().toLocaleString()
};
serverIo.of('/queued-run').to(`user-${userId}`).emit('run-completed', completionData);
} catch (socketError: any) {
logger.log('warn', `Failed to send run-completed notification for markdown robot run ${id}: ${socketError.message}`);
}
const webhookPayload = {
robot_id: plainRun.robotMetaId,
run_id: plainRun.runId,
robot_name: recording.recording_meta.name,
status: 'success',
started_at: plainRun.startedAt,
finished_at: new Date().toLocaleString(),
markdown: markdown,
metadata: {
browser_id: plainRun.browserId,
user_id: userId,
}
};
try {
await sendWebhook(plainRun.robotMetaId, 'run_completed', webhookPayload);
logger.log('info', `Webhooks sent successfully for markdown robot API run ${plainRun.runId}`);
} catch (webhookError: any) {
logger.log('warn', `Failed to send webhooks for markdown robot run ${plainRun.runId}: ${webhookError.message}`);
}
await destroyRemoteBrowser(plainRun.browserId, userId);
return {
success: true,
interpretationInfo: run.toJSON()
};
} catch (error: any) {
logger.log('error', `Markdown conversion failed for API run ${id}: ${error.message}`);
await run.update({
status: 'failed',
finishedAt: new Date().toLocaleString(),
log: `Markdown conversion failed: ${error.message}`,
});
try {
const failureData = {
runId: plainRun.runId,
robotMetaId: plainRun.robotMetaId,
robotName: recording.recording_meta.name,
status: 'failed',
finishedAt: new Date().toLocaleString()
};
serverIo.of('/queued-run').to(`user-${userId}`).emit('run-completed', failureData);
} catch (socketError: any) {
logger.log('warn', `Failed to send run-failed notification for markdown robot run ${id}: ${socketError.message}`);
}
await destroyRemoteBrowser(plainRun.browserId, userId);
throw error;
}
}
2024-10-12 15:55:43 +05:30
plainRun.status = 'running';
2025-10-21 00:43:08 +05:30
browser = browserPool.getRemoteBrowser(plainRun.browserId);
2024-10-12 15:55:43 +05:30
if (!browser) {
throw new Error('Could not access browser');
}
2024-12-08 18:07:40 +05:30
let currentPage = await browser.getCurrentPage();
2024-10-12 15:55:43 +05:30
if (!currentPage) {
throw new Error('Could not create a new page');
}
2024-10-12 15:55:43 +05:30
2024-12-08 18:07:40 +05:30
const workflow = AddGeneratedFlags(recording.recording);
2025-10-21 00:43:08 +05:30
browser.interpreter.setRunId(plainRun.runId);
2025-09-10 00:17:04 +05:30
2024-10-12 15:55:43 +05:30
const interpretationInfo = await browser.interpreter.InterpretRecording(
2024-12-08 18:07:40 +05:30
workflow, currentPage, (newPage: Page) => currentPage = newPage, plainRun.interpreterSettings
);
2024-10-12 15:55:43 +05:30
2025-10-21 00:43:08 +05:30
const binaryOutputService = new BinaryOutputService('maxun-run-screenshots');
const uploadedBinaryOutput = await binaryOutputService.uploadAndStoreBinaryOutput(run, interpretationInfo.binaryOutput);
await destroyRemoteBrowser(plainRun.browserId, userId);
2024-10-12 15:55:43 +05:30
2024-10-12 22:16:23 +05:30
const updatedRun = await run.update({
2024-10-12 15:55:43 +05:30
status: 'success',
finishedAt: new Date().toLocaleString(),
log: interpretationInfo.log.join('\n'),
2025-10-21 00:43:08 +05:30
binaryOutput: uploadedBinaryOutput,
2024-10-12 15:55:43 +05:30
});
2025-09-10 00:17:04 +05:30
let totalSchemaItemsExtracted = 0;
let totalListItemsExtracted = 0;
let extractedScreenshotsCount = 0;
2025-10-21 00:43:08 +05:30
if (updatedRun) {
if (updatedRun.dataValues.serializableOutput) {
if (updatedRun.dataValues.serializableOutput.scrapeSchema) {
Object.values(updatedRun.dataValues.serializableOutput.scrapeSchema).forEach((schemaResult: any) => {
2025-09-10 00:17:04 +05:30
if (Array.isArray(schemaResult)) {
totalSchemaItemsExtracted += schemaResult.length;
} else if (schemaResult && typeof schemaResult === 'object') {
totalSchemaItemsExtracted += 1;
}
});
}
2025-10-21 00:43:08 +05:30
if (updatedRun.dataValues.serializableOutput.scrapeList) {
Object.values(updatedRun.dataValues.serializableOutput.scrapeList).forEach((listResult: any) => {
2025-09-10 00:17:04 +05:30
if (Array.isArray(listResult)) {
totalListItemsExtracted += listResult.length;
}
});
}
}
2025-10-21 00:43:08 +05:30
if (updatedRun.dataValues.binaryOutput) {
extractedScreenshotsCount = Object.keys(updatedRun.dataValues.binaryOutput).length;
2025-09-10 00:17:04 +05:30
}
}
const totalRowsExtracted = totalSchemaItemsExtracted + totalListItemsExtracted;
2024-10-28 04:49:54 +05:30
2024-10-29 03:37:59 +05:30
capture('maxun-oss-run-created-api',{
2024-10-28 04:21:05 +05:30
runId: id,
created_at: new Date().toISOString(),
status: 'success',
2024-10-29 04:25:01 +05:30
totalRowsExtracted,
schemaItemsExtracted: totalSchemaItemsExtracted,
listItemsExtracted: totalListItemsExtracted,
2024-10-29 04:25:01 +05:30
extractedScreenshotsCount,
2024-10-28 04:20:46 +05:30
}
2024-10-29 03:37:59 +05:30
)
2024-10-28 04:20:46 +05:30
2025-10-21 00:43:08 +05:30
const parsedOutput =
typeof updatedRun.dataValues.serializableOutput === "string"
? JSON.parse(updatedRun.dataValues.serializableOutput)
: updatedRun.dataValues.serializableOutput || {};
const parsedList =
typeof parsedOutput.scrapeList === "string"
? JSON.parse(parsedOutput.scrapeList)
: parsedOutput.scrapeList || {};
const parsedSchema =
typeof parsedOutput.scrapeSchema === "string"
? JSON.parse(parsedOutput.scrapeSchema)
: parsedOutput.scrapeSchema || {};
2025-05-27 23:46:19 +05:30
const webhookPayload = {
robot_id: plainRun.robotMetaId,
run_id: plainRun.runId,
robot_name: recording.recording_meta.name,
2025-10-21 00:43:08 +05:30
status: "success",
2025-05-27 23:46:19 +05:30
started_at: plainRun.startedAt,
finished_at: new Date().toLocaleString(),
extracted_data: {
2025-10-21 00:43:08 +05:30
captured_texts: parsedSchema || {},
captured_lists: parsedList || {},
2025-05-27 23:46:19 +05:30
captured_texts_count: totalSchemaItemsExtracted,
captured_lists_count: totalListItemsExtracted,
screenshots_count: extractedScreenshotsCount
},
metadata: {
browser_id: plainRun.browserId,
user_id: userId,
2025-10-21 00:43:08 +05:30
},
2025-05-27 23:46:19 +05:30
};
try {
await sendWebhook(plainRun.robotMetaId, 'run_completed', webhookPayload);
logger.log('info', `Webhooks sent successfully for completed run ${plainRun.runId}`);
} catch (webhookError: any) {
logger.log('error', `Failed to send webhooks for run ${plainRun.runId}: ${webhookError.message}`);
}
2025-10-21 00:43:08 +05:30
await triggerIntegrationUpdates(plainRun.runId, plainRun.robotMetaId);
return {
success: true,
2024-10-12 22:16:23 +05:30
interpretationInfo: updatedRun.toJSON()
};
} catch (error: any) {
2024-11-29 22:07:26 +05:30
logger.log('info', `Error while running a robot with id: ${id} - ${error.message}`);
2024-10-28 04:20:46 +05:30
const run = await Run.findOne({ where: { runId: id } });
if (run) {
2024-10-28 04:21:05 +05:30
await run.update({
status: 'failed',
finishedAt: new Date().toLocaleString(),
2025-10-21 00:43:08 +05:30
log: (run.log ? run.log + '\n' : '') + `Error: ${error.message}\n` + (error.stack ? error.stack : ''),
2024-10-28 04:21:05 +05:30
});
2025-05-27 23:46:19 +05:30
2025-10-21 00:43:08 +05:30
try {
const recording = await Robot.findOne({ where: { 'recording_meta.id': run.robotMetaId }, raw: true });
const failureData = {
runId: run.runId,
robotMetaId: run.robotMetaId,
robotName: recording ? recording.recording_meta.name : 'Unknown Robot',
status: 'failed',
finishedAt: new Date().toLocaleString(),
runByUserId: run.runByUserId,
runByScheduleId: run.runByScheduleId,
runByAPI: run.runByAPI || false,
browserId: run.browserId
};
serverIo.of('/queued-run').to(`user-${userId}`).emit('run-completed', failureData);
logger.log('info', `API run permanently failed notification sent for run: ${run.runId} to user-${userId}`);
} catch (socketError: any) {
logger.log('warn', `Failed to send run-completed notification for permanently failed API run ${run.runId}: ${socketError.message}`);
}
2025-05-27 23:46:19 +05:30
const recording = await Robot.findOne({ where: { 'recording_meta.id': run.robotMetaId }, raw: true });
// Trigger webhooks for run failure
const failedWebhookPayload = {
robot_id: run.robotMetaId,
run_id: run.runId,
robot_name: recording ? recording.recording_meta.name : 'Unknown Robot',
status: 'failed',
started_at: run.startedAt,
finished_at: new Date().toLocaleString(),
error: {
message: error.message,
stack: error.stack,
type: error.name || 'ExecutionError'
},
metadata: {
browser_id: run.browserId,
user_id: userId,
}
};
try {
await sendWebhook(run.robotMetaId, 'run_failed', failedWebhookPayload);
logger.log('info', `Failure webhooks sent successfully for run ${run.runId}`);
} catch (webhookError: any) {
logger.log('error', `Failed to send failure webhooks for run ${run.runId}: ${webhookError.message}`);
}
2024-10-28 04:20:46 +05:30
}
2024-10-29 03:37:59 +05:30
capture(
'maxun-oss-run-created-api',
{
2024-10-28 04:21:05 +05:30
runId: id,
created_at: new Date().toISOString(),
status: 'failed',
2024-10-28 04:20:46 +05:30
}
2024-10-29 03:37:59 +05:30
);
return {
success: false,
error: error.message,
};
}
2024-10-12 15:55:43 +05:30
}
2024-10-12 15:55:43 +05:30
export async function handleRunRecording(id: string, userId: string) {
2024-10-12 15:54:14 +05:30
try {
2024-10-12 15:55:43 +05:30
const result = await createWorkflowAndStoreMetadata(id, userId);
const { browserId, runId: newRunId } = result;
if (!browserId || !newRunId || !userId) {
throw new Error('browserId or runId or userId is undefined');
}
2024-11-04 20:35:42 +05:30
const socket = io(`${process.env.BACKEND_URL ? process.env.BACKEND_URL : 'http://localhost:8080'}/${browserId}`, {
2024-10-12 15:55:43 +05:30
transports: ['websocket'],
rejectUnauthorized: false
});
2025-03-05 21:27:08 +05:30
socket.on('ready-for-run', () => readyForRunHandler(browserId, newRunId, userId));
2024-10-12 15:55:43 +05:30
2024-11-29 22:07:51 +05:30
logger.log('info', `Running Robot: ${id}`);
2024-10-12 15:55:43 +05:30
socket.on('disconnect', () => {
2025-03-05 21:27:08 +05:30
cleanupSocketListeners(socket, browserId, newRunId, userId);
2024-10-12 15:55:43 +05:30
});
2024-10-12 22:43:10 +05:30
// Return the runId immediately, so the client knows the run is started
return newRunId;
2024-10-12 15:54:14 +05:30
} catch (error: any) {
2024-11-29 22:08:03 +05:30
logger.error('Error running robot:', error);
2024-10-12 15:54:14 +05:30
}
2024-10-12 15:55:43 +05:30
}
2025-03-05 21:27:08 +05:30
function cleanupSocketListeners(socket: Socket, browserId: string, id: string, userId: string) {
socket.off('ready-for-run', () => readyForRunHandler(browserId, id, userId));
2024-10-12 15:54:14 +05:30
logger.log('info', `Cleaned up listeners for browserId: ${browserId}, runId: ${id}`);
2024-10-12 15:55:43 +05:30
}
2024-10-12 15:54:14 +05:30
2024-10-12 22:43:10 +05:30
async function waitForRunCompletion(runId: string, interval: number = 2000) {
while (true) {
const run = await Run.findOne({ where: { runId }, raw: true });
if (!run) throw new Error('Run not found');
if (run.status === 'success') {
return run;
2024-10-28 04:20:46 +05:30
} else if (run.status === 'failed') {
2024-10-12 22:43:10 +05:30
throw new Error('Run failed');
}
await new Promise(resolve => setTimeout(resolve, interval));
}
}
2024-10-28 23:27:24 +05:30
/**
* @swagger
* /api/robots/{id}/runs:
* post:
* summary: Run a robot by ID
2024-10-29 00:14:26 +05:30
* description: When you need to run a robot and get its captured data, you can use this endpoint to create a run for the robot. For now, you can poll the GET endpoint to retrieve a run's details as soon as it is finished. We are working on adding a webhook feature to notify you when a run is finished.
2024-10-28 23:27:24 +05:30
* security:
* - api_key: []
* parameters:
* - in: path
* name: id
* schema:
* type: string
* required: true
* description: The ID of the robot to run.
* responses:
* 200:
* description: Robot run started successfully.
* content:
* application/json:
* schema:
* type: object
* properties:
* statusCode:
* type: integer
* example: 200
* messageCode:
* type: string
* example: success
* run:
* type: object
* properties:
* runId:
* type: string
* example: "67890"
* status:
* type: string
* example: "in_progress"
* 401:
* description: Unauthorized access.
* content:
* application/json:
* schema:
* type: object
* properties:
* ok:
* type: boolean
* example: false
* error:
* type: string
* example: "Unauthorized"
* 500:
* description: Error running robot.
* content:
* application/json:
* schema:
* type: object
* properties:
* statusCode:
* type: integer
* example: 500
* messageCode:
* type: string
* example: error
* message:
* type: string
* example: "Failed to run robot"
*/
2024-10-24 22:26:12 +05:30
router.post("/robots/:id/runs", requireAPIKey, async (req: AuthenticatedRequest, res: Response) => {
try {
2024-10-24 22:26:12 +05:30
if (!req.user) {
return res.status(401).json({ ok: false, error: 'Unauthorized' });
}
const runId = await handleRunRecording(req.params.id, req.user.id);
2024-10-12 22:43:10 +05:30
if (!runId) {
throw new Error('Run ID is undefined');
}
const completedRun = await waitForRunCompletion(runId);
const response = {
statusCode: 200,
messageCode: "success",
2024-10-30 03:21:45 +05:30
run: formatRunResponse(completedRun),
};
res.status(200).json(response);
} catch (error) {
console.error("Error running robot:", error);
res.status(500).json({
statusCode: 500,
messageCode: "error",
message: "Failed to run robot",
});
}
});
2024-10-12 22:14:33 +05:30
export default router;