Files
parcer/server/src/routes/storage.ts

1238 lines
37 KiB
TypeScript
Raw Normal View History

2024-07-31 21:10:25 +05:30
import { Router } from 'express';
import logger from "../logger";
import { createRemoteBrowserForRun, destroyRemoteBrowser, getActiveBrowserIdByState } from "../browser-management/controller";
2024-09-19 17:39:33 +05:30
import { browserPool } from "../server";
2025-05-28 14:22:43 +05:30
import { v4 as uuid } from "uuid";
2024-09-11 23:33:04 +05:30
import moment from 'moment-timezone';
import cron from 'node-cron';
import { requireSignIn } from '../middlewares/auth';
import Robot from '../models/Robot';
import Run from '../models/Run';
2024-10-24 22:26:12 +05:30
import { AuthenticatedRequest } from './record';
2024-10-26 00:49:26 +05:30
import { computeNextRun } from '../utils/schedule';
2024-10-29 03:44:20 +05:30
import { capture } from "../utils/analytics";
import { encrypt, decrypt } from '../utils/auth';
2024-12-08 18:08:05 +05:30
import { WorkflowFile } from 'maxun-core';
2025-11-28 15:51:45 +05:30
import { cancelScheduledWorkflow, scheduleWorkflow } from '../storage/schedule';
import { pgBossClient } from '../storage/pgboss';
2024-07-31 21:10:25 +05:30
export const router = Router();
export const processWorkflowActions = async (workflow: any[], checkLimit: boolean = false): Promise<any[]> => {
2025-06-06 00:53:02 +05:30
const processedWorkflow = JSON.parse(JSON.stringify(workflow));
2025-02-23 13:56:19 +05:30
processedWorkflow.forEach((pair: any) => {
pair.what.forEach((action: any) => {
// Handle limit validation for scrapeList action
if (action.action === 'scrapeList' && checkLimit && Array.isArray(action.args) && action.args.length > 0) {
const scrapeConfig = action.args[0];
if (scrapeConfig && typeof scrapeConfig === 'object' && 'limit' in scrapeConfig) {
if (typeof scrapeConfig.limit === 'number' && scrapeConfig.limit > 5) {
scrapeConfig.limit = 5;
}
}
}
// Handle decryption for type and press actions
if ((action.action === 'type' || action.action === 'press') && Array.isArray(action.args) && action.args.length > 1) {
try {
const encryptedValue = action.args[1];
if (typeof encryptedValue === 'string') {
const decryptedValue = decrypt(encryptedValue);
action.args[1] = decryptedValue;
} else {
logger.log('error', 'Encrypted value is not a string');
action.args[1] = '';
}
} catch (error: unknown) {
const errorMessage = error instanceof Error ? error.message : String(error);
logger.log('error', `Failed to decrypt input value: ${errorMessage}`);
action.args[1] = '';
}
}
});
});
return processedWorkflow;
}
2024-07-31 21:10:25 +05:30
/**
* Logs information about recordings API.
*/
router.all('/', requireSignIn, (req, res, next) => {
2024-09-19 17:39:33 +05:30
logger.log('debug', `The recordings API was invoked: ${req.url}`)
2024-07-31 21:10:25 +05:30
next() // pass control to the next handler
})
/**
* GET endpoint for getting an array of all stored recordings.
*/
router.get('/recordings', requireSignIn, async (req, res) => {
2024-07-31 21:10:25 +05:30
try {
2024-10-21 21:03:42 +05:30
const data = await Robot.findAll();
2024-07-31 21:10:25 +05:30
return res.send(data);
} catch (e) {
2024-11-29 22:12:16 +05:30
logger.log('info', 'Error while reading robots');
2024-07-31 21:10:25 +05:30
return res.send(null);
}
});
2024-10-17 14:34:25 +05:30
/**
* GET endpoint for getting a recording.
*/
2024-10-17 14:33:51 +05:30
router.get('/recordings/:id', requireSignIn, async (req, res) => {
try {
const data = await Robot.findOne({
2024-10-21 21:03:42 +05:30
where: { 'recording_meta.id': req.params.id },
2024-10-17 14:33:51 +05:30
raw: true
}
2024-10-22 16:59:18 +05:30
);
if (data?.recording?.workflow) {
data.recording.workflow = await processWorkflowActions(
data.recording.workflow,
);
}
2024-10-17 14:33:51 +05:30
return res.send(data);
} catch (e) {
2024-11-29 22:12:16 +05:30
logger.log('info', 'Error while reading robots');
2024-10-17 14:33:51 +05:30
return res.send(null);
}
})
2024-11-15 22:26:36 +05:30
router.get(('/recordings/:id/runs'), requireSignIn, async (req, res) => {
try {
const runs = await Run.findAll({
2025-06-06 00:53:02 +05:30
where: {
robotMetaId: req.params.id
},
raw: true
2024-11-15 22:26:36 +05:30
});
const formattedRuns = runs.map(formatRunResponse);
const response = {
2025-06-06 00:53:02 +05:30
statusCode: 200,
messageCode: "success",
runs: {
2024-11-15 22:26:36 +05:30
totalCount: formattedRuns.length,
items: formattedRuns,
2025-06-06 00:53:02 +05:30
},
2024-11-15 22:26:36 +05:30
};
res.status(200).json(response);
2025-06-06 00:53:02 +05:30
} catch (error) {
2024-11-15 22:26:36 +05:30
console.error("Error fetching runs:", error);
res.status(500).json({
2025-06-06 00:53:02 +05:30
statusCode: 500,
messageCode: "error",
message: "Failed to retrieve runs",
2024-11-15 22:26:36 +05:30
});
2025-06-06 00:53:02 +05:30
}
2024-11-15 22:26:36 +05:30
})
function formatRunResponse(run: any) {
const formattedRun = {
2025-06-06 00:53:02 +05:30
id: run.id,
status: run.status,
name: run.name,
robotId: run.robotMetaId, // Renaming robotMetaId to robotId
startedAt: run.startedAt,
finishedAt: run.finishedAt,
runId: run.runId,
runByUserId: run.runByUserId,
runByScheduleId: run.runByScheduleId,
runByAPI: run.runByAPI,
data: {},
screenshot: null,
2024-11-15 22:26:36 +05:30
};
if (run.serializableOutput && run.serializableOutput['item-0']) {
2025-06-06 00:53:02 +05:30
formattedRun.data = run.serializableOutput['item-0'];
2024-11-15 22:26:36 +05:30
} else if (run.binaryOutput && run.binaryOutput['item-0']) {
2025-06-06 00:53:02 +05:30
formattedRun.screenshot = run.binaryOutput['item-0'];
2024-11-15 22:26:36 +05:30
}
return formattedRun;
}
interface CredentialInfo {
value: string;
type: string;
}
interface Credentials {
[key: string]: CredentialInfo;
}
2025-02-13 16:13:42 +05:30
function handleWorkflowActions(workflow: any[], credentials: Credentials) {
return workflow.map(step => {
2025-06-06 00:53:02 +05:30
if (!step.what) return step;
2025-06-06 00:53:02 +05:30
const newWhat: any[] = [];
const processedSelectors = new Set<string>();
2025-06-06 00:53:02 +05:30
for (let i = 0; i < step.what.length; i++) {
const action = step.what[i];
if (!action?.action || !action?.args?.[0]) {
newWhat.push(action);
continue;
}
const selector = action.args[0];
const credential = credentials[selector];
2025-06-06 00:53:02 +05:30
if (!credential) {
newWhat.push(action);
continue;
}
if (action.action === 'click') {
newWhat.push(action);
if (!processedSelectors.has(selector) &&
i + 1 < step.what.length &&
(step.what[i + 1].action === 'type' || step.what[i + 1].action === 'press')) {
newWhat.push({
action: 'type',
args: [selector, encrypt(credential.value), credential.type]
});
newWhat.push({
action: 'waitForLoadState',
args: ['networkidle']
});
processedSelectors.add(selector);
while (i + 1 < step.what.length &&
(step.what[i + 1].action === 'type' ||
step.what[i + 1].action === 'press' ||
step.what[i + 1].action === 'waitForLoadState')) {
i++;
}
2025-06-06 00:53:02 +05:30
}
} else if ((action.action === 'type' || action.action === 'press') &&
!processedSelectors.has(selector)) {
newWhat.push({
action: 'type',
args: [selector, encrypt(credential.value), credential.type]
});
newWhat.push({
action: 'waitForLoadState',
args: ['networkidle']
});
processedSelectors.add(selector);
// Skip subsequent type/press/waitForLoadState actions for this selector
while (i + 1 < step.what.length &&
(step.what[i + 1].action === 'type' ||
step.what[i + 1].action === 'press' ||
step.what[i + 1].action === 'waitForLoadState')) {
i++;
}
2025-02-13 16:13:42 +05:30
}
2025-06-06 00:53:02 +05:30
}
2025-06-06 00:53:02 +05:30
return {
...step,
what: newWhat
};
});
}
2024-11-19 02:40:00 +05:30
/**
* PUT endpoint to update the name and limit of a robot.
*/
router.put('/recordings/:id', requireSignIn, async (req: AuthenticatedRequest, res) => {
try {
const { id } = req.params;
const { name, limits, credentials, targetUrl, workflow: incomingWorkflow } = req.body;
2024-11-19 02:40:00 +05:30
// Validate input
if (!name && !limits && !credentials && !targetUrl) {
return res.status(400).json({ error: 'Either "name", "limits", "credentials" or "target_url" must be provided.' });
2024-11-19 02:40:00 +05:30
}
// Fetch the robot by ID
const robot = await Robot.findOne({ where: { 'recording_meta.id': id } });
if (!robot) {
return res.status(404).json({ error: 'Robot not found.' });
}
// Update fields if provided
if (name) {
robot.set('recording_meta', { ...robot.recording_meta, name });
}
2025-03-18 23:44:02 +05:30
if (targetUrl) {
2025-11-20 13:22:54 +05:30
robot.set('recording_meta', { ...robot.recording_meta, url: targetUrl });
const updatedWorkflow = [...robot.recording.workflow];
2025-11-20 13:22:54 +05:30
let foundGoto = false;
2025-06-06 00:53:02 +05:30
for (let i = updatedWorkflow.length - 1; i >= 0; i--) {
const step = updatedWorkflow[i];
for (let j = 0; j < step.what.length; j++) {
const action = step.what[j];
2025-03-18 23:44:02 +05:30
if (action.action === "goto" && action.args?.length) {
action.args[0] = targetUrl;
if (step.where?.url && step.where.url !== "about:blank") {
step.where.url = targetUrl;
}
2025-06-06 00:53:02 +05:30
robot.set('recording', { ...robot.recording, workflow: updatedWorkflow });
robot.changed('recording', true);
2025-11-20 13:22:54 +05:30
foundGoto = true;
i = -1;
break;
}
}
}
2025-03-18 23:44:02 +05:30
}
await robot.save();
// Start with existing workflow or allow client to supply a full workflow replacement
let workflow = incomingWorkflow && Array.isArray(incomingWorkflow)
? JSON.parse(JSON.stringify(incomingWorkflow))
: [...robot.recording.workflow]; // Create a copy of the workflow
if (credentials) {
2025-02-13 16:13:42 +05:30
workflow = handleWorkflowActions(workflow, credentials);
}
if (limits && Array.isArray(limits) && limits.length > 0) {
for (const limitInfo of limits) {
const { pairIndex, actionIndex, argIndex, limit } = limitInfo;
2025-06-06 00:53:02 +05:30
const pair = workflow[pairIndex];
if (!pair || !pair.what) continue;
2025-06-06 00:53:02 +05:30
const action = pair.what[actionIndex];
if (!action || !action.args) continue;
2025-06-06 00:53:02 +05:30
const arg = action.args[argIndex];
if (!arg || typeof arg !== 'object') continue;
2025-06-06 00:53:02 +05:30
(arg as { limit: number }).limit = limit;
2024-11-19 02:40:00 +05:30
}
}
const updates: any = {
recording: {
...robot.recording,
workflow
}
};
2025-11-20 13:22:54 +05:30
if (name || targetUrl) {
updates.recording_meta = {
...robot.recording_meta,
2025-11-20 13:22:54 +05:30
...(name && { name }),
...(targetUrl && { url: targetUrl })
};
}
await Robot.update(updates, {
where: { 'recording_meta.id': id }
});
2024-11-19 02:40:00 +05:30
const updatedRobot = await Robot.findOne({ where: { 'recording_meta.id': id } });
logger.log('info', `Robot with ID ${id} was updated successfully.`);
return res.status(200).json({ message: 'Robot updated successfully', robot });
} catch (error) {
// Safely handle the error type
if (error instanceof Error) {
logger.log('error', `Error updating robot with ID ${req.params.id}: ${error.message}`);
return res.status(500).json({ error: error.message });
} else {
logger.log('error', `Unknown error updating robot with ID ${req.params.id}`);
return res.status(500).json({ error: 'An unknown error occurred.' });
}
}
});
/**
* POST endpoint to duplicate a robot and update its target URL.
*/
router.post('/recordings/:id/duplicate', requireSignIn, async (req: AuthenticatedRequest, res) => {
try {
const { id } = req.params;
const { targetUrl } = req.body;
if (!targetUrl) {
return res.status(400).json({ error: 'The "targetUrl" field is required.' });
}
const originalRobot = await Robot.findOne({ where: { 'recording_meta.id': id } });
if (!originalRobot) {
return res.status(404).json({ error: 'Original robot not found.' });
}
const lastWord = targetUrl.split('/').filter(Boolean).pop() || 'Unnamed';
const workflow = originalRobot.recording.workflow.map((step) => {
if (step.where?.url && step.where.url !== "about:blank") {
step.where.url = targetUrl;
}
step.what.forEach((action) => {
if (action.action === "goto" && action.args?.length) {
2025-06-06 00:53:02 +05:30
action.args[0] = targetUrl;
2024-11-19 02:40:00 +05:30
}
});
return step;
});
2025-05-01 00:08:53 +05:30
const currentTimestamp = new Date().toLocaleString();
2024-11-19 02:40:00 +05:30
const newRobot = await Robot.create({
2025-06-06 00:53:02 +05:30
id: uuid(),
userId: originalRobot.userId,
2024-11-19 02:40:00 +05:30
recording_meta: {
...originalRobot.recording_meta,
id: uuid(),
name: `${originalRobot.recording_meta.name} (${lastWord})`,
2025-06-06 00:53:02 +05:30
createdAt: currentTimestamp,
updatedAt: currentTimestamp,
},
recording: { ...originalRobot.recording, workflow },
google_sheet_email: null,
2024-11-19 02:40:00 +05:30
google_sheet_name: null,
google_sheet_id: null,
google_access_token: null,
google_refresh_token: null,
2025-06-06 00:53:02 +05:30
schedule: null,
2024-11-19 02:40:00 +05:30
});
logger.log('info', `Robot with ID ${id} duplicated successfully as ${newRobot.id}.`);
return res.status(201).json({
message: 'Robot duplicated and target URL updated successfully.',
robot: newRobot,
});
} catch (error) {
if (error instanceof Error) {
logger.log('error', `Error duplicating robot with ID ${req.params.id}: ${error.message}`);
return res.status(500).json({ error: error.message });
} else {
logger.log('error', `Unknown error duplicating robot with ID ${req.params.id}`);
return res.status(500).json({ error: 'An unknown error occurred.' });
}
}
});
2025-11-20 13:22:54 +05:30
/**
* POST endpoint for creating a markdown robot
*/
2025-11-20 18:49:39 +05:30
router.post('/recordings/scrape', requireSignIn, async (req: AuthenticatedRequest, res) => {
2025-11-20 13:22:54 +05:30
try {
2025-11-20 18:49:39 +05:30
const { url, name, formats } = req.body;
2025-11-20 13:22:54 +05:30
if (!url) {
return res.status(400).json({ error: 'The "url" field is required.' });
}
if (!req.user) {
return res.status(401).send({ error: 'Unauthorized' });
}
// Validate URL format
try {
new URL(url);
} catch (err) {
return res.status(400).json({ error: 'Invalid URL format' });
}
2025-11-20 18:49:39 +05:30
// Validate format
const validFormats = ['markdown', 'html'];
if (!Array.isArray(formats) || formats.length === 0) {
return res.status(400).json({ error: 'At least one output format must be selected.' });
}
const invalid = formats.filter(f => !validFormats.includes(f));
if (invalid.length > 0) {
return res.status(400).json({ error: `Invalid formats: ${invalid.join(', ')}` });
}
2025-11-20 13:22:54 +05:30
const robotName = name || `Markdown Robot - ${new URL(url).hostname}`;
const currentTimestamp = new Date().toLocaleString();
const robotId = uuid();
const newRobot = await Robot.create({
id: uuid(),
userId: req.user.id,
recording_meta: {
name: robotName,
id: robotId,
createdAt: currentTimestamp,
updatedAt: currentTimestamp,
pairs: 0,
params: [],
2025-11-20 18:49:39 +05:30
type: 'scrape',
2025-11-20 13:22:54 +05:30
url: url,
2025-11-20 18:49:39 +05:30
formats: formats,
2025-11-20 13:22:54 +05:30
},
recording: { workflow: [] },
google_sheet_email: null,
google_sheet_name: null,
google_sheet_id: null,
google_access_token: null,
google_refresh_token: null,
schedule: null,
});
logger.log('info', `Markdown robot created with id: ${newRobot.id}`);
capture(
'maxun-oss-robot-created',
2025-11-20 13:22:54 +05:30
{
robot_meta: newRobot.recording_meta,
recording: newRobot.recording,
2025-11-20 13:22:54 +05:30
}
)
2025-11-20 13:22:54 +05:30
return res.status(201).json({
message: 'Markdown robot created successfully.',
robot: newRobot,
});
} catch (error) {
if (error instanceof Error) {
logger.log('error', `Error creating markdown robot: ${error.message}`);
return res.status(500).json({ error: error.message });
} else {
logger.log('error', 'Unknown error creating markdown robot');
return res.status(500).json({ error: 'An unknown error occurred.' });
}
}
});
2024-07-31 21:10:25 +05:30
/**
* DELETE endpoint for deleting a recording from the storage.
*/
2024-10-28 02:47:21 +05:30
router.delete('/recordings/:id', requireSignIn, async (req: AuthenticatedRequest, res) => {
if (!req.user) {
return res.status(401).send({ error: 'Unauthorized' });
}
2024-07-31 21:10:25 +05:30
try {
await Robot.destroy({
2024-10-21 21:03:42 +05:30
where: { 'recording_meta.id': req.params.id }
});
2024-10-29 03:44:20 +05:30
capture(
'maxun-oss-robot-deleted',
{
2024-10-28 02:47:21 +05:30
robotId: req.params.id,
user_id: req.user?.id,
deleted_at: new Date().toISOString(),
}
2024-10-29 03:44:20 +05:30
)
2024-07-31 21:10:25 +05:30
return res.send(true);
} catch (e) {
2024-09-19 17:39:33 +05:30
const { message } = e as Error;
2024-10-09 03:10:22 +05:30
logger.log('info', `Error while deleting a recording with name: ${req.params.fileName}.json`);
2024-07-31 21:10:25 +05:30
return res.send(false);
}
});
/**
* GET endpoint for getting an array of runs from the storage.
*/
router.get('/runs', requireSignIn, async (req, res) => {
2024-07-31 21:10:25 +05:30
try {
const data = await Run.findAll();
2024-07-31 21:10:25 +05:30
return res.send(data);
} catch (e) {
logger.log('info', 'Error while reading runs');
return res.send(null);
}
});
/**
* DELETE endpoint for deleting a run from the storage.
*/
2024-10-28 02:47:21 +05:30
router.delete('/runs/:id', requireSignIn, async (req: AuthenticatedRequest, res) => {
if (!req.user) {
return res.status(401).send({ error: 'Unauthorized' });
}
2024-07-31 21:10:25 +05:30
try {
2024-10-10 02:02:43 +05:30
await Run.destroy({ where: { runId: req.params.id } });
2024-10-29 03:44:20 +05:30
capture(
'maxun-oss-run-deleted',
{
2024-10-28 02:47:21 +05:30
runId: req.params.id,
user_id: req.user?.id,
deleted_at: new Date().toISOString(),
}
2024-10-29 03:44:20 +05:30
)
2024-07-31 21:10:25 +05:30
return res.send(true);
} catch (e) {
2024-09-19 17:39:33 +05:30
const { message } = e as Error;
2024-07-31 21:10:25 +05:30
logger.log('info', `Error while deleting a run with name: ${req.params.fileName}.json`);
return res.send(false);
}
});
/**
* PUT endpoint for starting a remote browser instance and saving run metadata to the storage.
* Making it ready for interpretation and returning a runId.
2025-03-12 19:25:18 +05:30
*
2025-11-28 15:51:45 +05:30
* If the user has reached their browser limit, the run will be queued using pgBossClient.
2024-07-31 21:10:25 +05:30
*/
2024-10-24 22:26:12 +05:30
router.put('/runs/:id', requireSignIn, async (req: AuthenticatedRequest, res) => {
2024-07-31 21:10:25 +05:30
try {
const recording = await Robot.findOne({
2024-10-10 01:19:31 +05:30
where: {
2024-10-21 21:03:42 +05:30
'recording_meta.id': req.params.id
2024-10-10 01:19:03 +05:30
},
raw: true
});
2024-10-08 23:25:24 +05:30
if (!recording || !recording.recording_meta || !recording.recording_meta.id) {
2024-10-08 22:00:05 +05:30
return res.status(404).send({ error: 'Recording not found' });
}
2024-10-24 22:26:12 +05:30
if (!req.user) {
return res.status(401).send({ error: 'Unauthorized' });
}
2025-03-12 19:25:18 +05:30
// Generate runId first
2024-07-31 21:10:25 +05:30
const runId = uuid();
2025-03-12 19:25:18 +05:30
const canCreateBrowser = await browserPool.hasAvailableBrowserSlots(req.user.id, "run");
2024-10-06 03:15:45 +05:30
2025-03-12 19:25:18 +05:30
if (canCreateBrowser) {
let browserId: string;
try {
browserId = await createRemoteBrowserForRun(req.user.id);
if (!browserId || browserId.trim() === '') {
throw new Error('Failed to generate valid browser ID');
}
logger.log('info', `Created browser ${browserId} for run ${runId}`);
} catch (browserError: any) {
logger.log('error', `Failed to create browser: ${browserError.message}`);
return res.status(500).send({ error: 'Failed to create browser instance' });
}
2024-10-06 03:15:45 +05:30
try {
await Run.create({
status: 'running',
name: recording.recording_meta.name,
robotId: recording.id,
robotMetaId: recording.recording_meta.id,
startedAt: new Date().toLocaleString(),
finishedAt: '',
browserId: browserId,
interpreterSettings: req.body,
log: '',
runId,
runByUserId: req.user.id,
serializableOutput: {},
binaryOutput: {},
});
2024-07-31 21:10:25 +05:30
logger.log('info', `Created run ${runId} with browser ${browserId}`);
} catch (dbError: any) {
logger.log('error', `Database error creating run: ${dbError.message}`);
try {
await destroyRemoteBrowser(browserId, req.user.id);
} catch (cleanupError: any) {
logger.log('warn', `Failed to cleanup browser after run creation failure: ${cleanupError.message}`);
}
return res.status(500).send({ error: 'Failed to create run record' });
}
2025-06-06 00:53:02 +05:30
try {
const userQueueName = `execute-run-user-${req.user.id}`;
2025-11-28 15:51:45 +05:30
await pgBossClient.createQueue(userQueueName);
const jobId = await pgBossClient.send(userQueueName, {
userId: req.user.id,
runId: runId,
browserId: browserId,
});
logger.log('info', `Queued run execution job with ID: ${jobId} for run: ${runId}`);
} catch (queueError: any) {
logger.log('error', `Failed to queue run execution: ${queueError.message}`);
try {
await Run.update({
status: 'failed',
finishedAt: new Date().toLocaleString(),
log: 'Failed to queue execution job'
}, { where: { runId: runId } });
await destroyRemoteBrowser(browserId, req.user.id);
} catch (cleanupError: any) {
logger.log('warn', `Failed to cleanup after queue error: ${cleanupError.message}`);
}
2025-06-06 00:53:02 +05:30
return res.status(503).send({ error: 'Unable to queue run, please try again later' });
}
return res.send({
browserId: browserId,
runId: runId,
robotMetaId: recording.recording_meta.id,
queued: false
});
} else {
const browserId = uuid();
2025-03-12 19:25:18 +05:30
await Run.create({
status: 'queued',
2025-03-12 19:25:18 +05:30
name: recording.recording_meta.name,
robotId: recording.id,
robotMetaId: recording.recording_meta.id,
startedAt: new Date().toLocaleString(),
finishedAt: '',
browserId,
2025-03-12 19:25:18 +05:30
interpreterSettings: req.body,
log: 'Run queued - waiting for available browser slot',
2025-03-12 19:25:18 +05:30
runId,
runByUserId: req.user.id,
serializableOutput: {},
binaryOutput: {},
});
2025-03-12 19:25:18 +05:30
return res.send({
browserId: browserId,
runId: runId,
2025-03-12 19:25:18 +05:30
robotMetaId: recording.recording_meta.id,
queued: true
2025-03-12 19:25:18 +05:30
});
2025-06-12 23:46:46 +05:30
}
2024-07-31 21:10:25 +05:30
} catch (e) {
2024-09-19 17:39:33 +05:30
const { message } = e as Error;
2025-06-03 19:20:13 +05:30
logger.log('error', `Error while creating a run with robot id: ${req.params.id} - ${message}`);
return res.status(500).send({ error: 'Internal server error' });
2024-07-31 21:10:25 +05:30
}
});
/**
* GET endpoint for getting a run from the storage.
*/
router.get('/runs/run/:id', requireSignIn, async (req, res) => {
2024-07-31 21:10:25 +05:30
try {
2024-10-10 01:19:03 +05:30
const run = await Run.findOne({ where: { runId: req.params.runId }, raw: true });
if (!run) {
return res.status(404).send(null);
}
return res.send(run);
2024-07-31 21:10:25 +05:30
} catch (e) {
const { message } = e as Error;
2024-10-10 01:55:23 +05:30
logger.log('error', `Error ${message} while reading a run with id: ${req.params.id}.json`);
2024-07-31 21:10:25 +05:30
return res.send(null);
}
});
2024-12-08 18:08:05 +05:30
function AddGeneratedFlags(workflow: WorkflowFile) {
const copy = JSON.parse(JSON.stringify(workflow));
for (let i = 0; i < workflow.workflow.length; i++) {
copy.workflow[i].what.unshift({
action: 'flag',
args: ['generated'],
});
}
return copy;
};
2024-07-31 21:10:25 +05:30
/**
* PUT endpoint for finishing a run and saving it to the storage.
*/
2024-10-28 02:47:21 +05:30
router.post('/runs/run/:id', requireSignIn, async (req: AuthenticatedRequest, res) => {
2024-07-31 21:10:25 +05:30
try {
2024-10-28 02:47:21 +05:30
if (!req.user) { return res.status(401).send({ error: 'Unauthorized' }); }
const run = await Run.findOne({ where: { runId: req.params.id } });
if (!run) {
return res.status(404).send(false);
}
const plainRun = run.toJSON();
2024-10-21 21:03:42 +05:30
const recording = await Robot.findOne({ where: { 'recording_meta.id': plainRun.robotMetaId }, raw: true });
if (!recording) {
return res.status(404).send(false);
}
2024-07-31 21:10:25 +05:30
2025-03-12 19:25:18 +05:30
try {
2025-03-12 23:08:51 +05:30
const userQueueName = `execute-run-user-${req.user.id}`;
2025-03-12 19:25:18 +05:30
// Queue the execution job
2025-11-28 15:51:45 +05:30
await pgBossClient.createQueue(userQueueName);
2025-06-06 00:53:02 +05:30
2025-11-28 15:51:45 +05:30
const jobId = await pgBossClient.send(userQueueName, {
2025-03-12 19:25:18 +05:30
userId: req.user.id,
runId: req.params.id,
browserId: plainRun.browserId
});
2025-06-06 00:53:02 +05:30
2025-03-12 19:25:18 +05:30
logger.log('info', `Queued run execution job with ID: ${jobId} for run: ${req.params.id}`);
} catch (queueError: any) {
logger.log('error', `Failed to queue run execution`);
2025-06-06 00:53:02 +05:30
2024-09-19 17:39:33 +05:30
}
2024-07-31 21:10:25 +05:30
} catch (e) {
2024-09-19 17:39:33 +05:30
const { message } = e as Error;
2024-10-28 04:49:54 +05:30
// If error occurs, set run status to failed
const run = await Run.findOne({ where: { runId: req.params.id } });
if (run) {
await run.update({
status: 'failed',
finishedAt: new Date().toLocaleString(),
});
}
2024-11-29 22:13:03 +05:30
logger.log('info', `Error while running a robot with id: ${req.params.id} - ${message}`);
2024-10-29 03:44:20 +05:30
capture(
'maxun-oss-run-created-manual',
{
2024-10-28 04:17:17 +05:30
runId: req.params.id,
user_id: req.user?.id,
created_at: new Date().toISOString(),
status: 'failed',
error_message: message,
}
2024-10-29 03:44:20 +05:30
);
2024-07-31 21:10:25 +05:30
return res.send(false);
}
});
2024-10-24 22:26:12 +05:30
router.put('/schedule/:id/', requireSignIn, async (req: AuthenticatedRequest, res) => {
try {
2024-10-10 02:39:45 +05:30
const { id } = req.params;
const { runEvery, runEveryUnit, startFrom, dayOfMonth, atTimeStart, atTimeEnd, timezone } = req.body;
2024-09-11 23:33:04 +05:30
2024-10-22 19:20:03 +05:30
const robot = await Robot.findOne({ where: { 'recording_meta.id': id } });
if (!robot) {
return res.status(404).json({ error: 'Robot not found' });
}
2024-10-26 00:48:07 +05:30
// Validate required parameters
if (!runEvery || !runEveryUnit || !startFrom || !atTimeStart || !atTimeEnd || !timezone) {
return res.status(400).json({ error: 'Missing required parameters' });
}
2024-10-26 00:48:07 +05:30
// Validate time zone
2024-09-11 23:33:04 +05:30
if (!moment.tz.zone(timezone)) {
return res.status(400).json({ error: 'Invalid timezone' });
}
2024-10-26 00:48:07 +05:30
// Validate and parse start and end times
2024-10-22 18:26:28 +05:30
const [startHours, startMinutes] = atTimeStart.split(':').map(Number);
const [endHours, endMinutes] = atTimeEnd.split(':').map(Number);
2024-10-28 04:49:54 +05:30
2024-10-22 18:26:28 +05:30
if (isNaN(startHours) || isNaN(startMinutes) || isNaN(endHours) || isNaN(endMinutes) ||
2024-10-28 04:49:54 +05:30
startHours < 0 || startHours > 23 || startMinutes < 0 || startMinutes > 59 ||
endHours < 0 || endHours > 23 || endMinutes < 0 || endMinutes > 59) {
2024-09-11 23:33:04 +05:30
return res.status(400).json({ error: 'Invalid time format' });
}
const days = ['SUNDAY', 'MONDAY', 'TUESDAY', 'WEDNESDAY', 'THURSDAY', 'FRIDAY', 'SATURDAY'];
2024-09-13 08:11:29 +05:30
if (!days.includes(startFrom)) {
2024-09-11 23:33:04 +05:30
return res.status(400).json({ error: 'Invalid start day' });
}
2024-10-26 00:48:07 +05:30
// Build cron expression based on run frequency and starting day
2024-09-11 23:33:04 +05:30
let cronExpression;
2024-10-26 00:48:07 +05:30
const dayIndex = days.indexOf(startFrom);
2024-09-13 08:11:29 +05:30
switch (runEveryUnit) {
2024-10-22 18:26:28 +05:30
case 'MINUTES':
cronExpression = `*/${runEvery} * * * *`;
2024-09-11 23:33:04 +05:30
break;
2024-10-26 00:48:07 +05:30
case 'HOURS':
2024-10-29 00:44:38 +05:30
cronExpression = `${startMinutes} */${runEvery} * * *`;
2024-10-26 00:48:07 +05:30
break;
2024-09-11 23:33:04 +05:30
case 'DAYS':
2024-10-22 18:26:28 +05:30
cronExpression = `${startMinutes} ${startHours} */${runEvery} * *`;
2024-09-11 23:33:04 +05:30
break;
case 'WEEKS':
2024-10-26 00:48:07 +05:30
cronExpression = `${startMinutes} ${startHours} * * ${dayIndex}`;
2024-09-11 23:33:04 +05:30
break;
case 'MONTHS':
// todo: handle leap year
cronExpression = `${startMinutes} ${startHours} ${dayOfMonth} */${runEvery} *`;
2024-09-13 08:11:29 +05:30
if (startFrom !== 'SUNDAY') {
2024-09-11 23:33:04 +05:30
cronExpression += ` ${dayIndex}`;
}
break;
2024-10-26 00:48:07 +05:30
default:
return res.status(400).json({ error: 'Invalid runEveryUnit' });
2024-09-11 23:33:04 +05:30
}
2024-10-26 00:48:07 +05:30
// Validate cron expression
2024-09-11 23:33:04 +05:30
if (!cronExpression || !cron.validate(cronExpression)) {
return res.status(400).json({ error: 'Invalid cron expression generated' });
}
2024-10-24 22:26:12 +05:30
if (!req.user) {
2024-10-26 00:48:07 +05:30
return res.status(401).json({ error: 'Unauthorized' });
2024-10-24 22:26:12 +05:30
}
try {
await cancelScheduledWorkflow(id);
} catch (cancelError) {
logger.log('warn', `Failed to cancel existing schedule for robot ${id}: ${cancelError}`);
}
const jobId = await scheduleWorkflow(id, req.user.id, cronExpression, timezone);
2024-09-11 23:33:04 +05:30
2024-10-26 00:48:07 +05:30
const nextRunAt = computeNextRun(cronExpression, timezone);
2024-10-22 19:20:03 +05:30
await robot.update({
schedule: {
runEvery,
runEveryUnit,
startFrom,
dayOfMonth,
2024-10-22 19:20:03 +05:30
atTimeStart,
atTimeEnd,
timezone,
cronExpression,
lastRunAt: undefined,
2024-10-26 00:49:26 +05:30
nextRunAt: nextRunAt || undefined,
2024-10-26 00:48:07 +05:30
},
2024-10-22 19:20:03 +05:30
});
2024-10-29 03:44:20 +05:30
capture(
'maxun-oss-robot-scheduled',
{
2024-10-28 02:47:21 +05:30
robotId: id,
user_id: req.user.id,
scheduled_at: new Date().toISOString(),
}
2024-10-29 03:44:20 +05:30
)
2024-10-28 02:47:21 +05:30
2024-10-22 22:01:25 +05:30
// Fetch updated schedule details after setting it
const updatedRobot = await Robot.findOne({ where: { 'recording_meta.id': id } });
2024-09-19 17:39:33 +05:30
res.status(200).json({
message: 'success',
2024-10-26 00:48:07 +05:30
robot: updatedRobot,
2024-09-11 23:33:04 +05:30
});
} catch (error) {
console.error('Error scheduling workflow:', error);
res.status(500).json({ error: 'Failed to schedule workflow' });
}
});
2024-10-26 00:48:07 +05:30
2024-10-22 22:01:25 +05:30
// Endpoint to get schedule details
2024-10-22 19:20:03 +05:30
router.get('/schedule/:id', requireSignIn, async (req, res) => {
try {
2024-10-23 00:10:48 +05:30
const robot = await Robot.findOne({ where: { 'recording_meta.id': req.params.id }, raw: true });
2024-10-22 19:20:03 +05:30
if (!robot) {
return res.status(404).json({ error: 'Robot not found' });
}
return res.status(200).json({
2024-10-22 23:17:39 +05:30
schedule: robot.schedule
2024-10-22 19:20:03 +05:30
});
} catch (error) {
console.error('Error getting schedule:', error);
res.status(500).json({ error: 'Failed to get schedule' });
}
});
2024-10-22 18:26:28 +05:30
2024-10-22 22:01:25 +05:30
// Endpoint to delete schedule
2024-10-28 02:47:21 +05:30
router.delete('/schedule/:id', requireSignIn, async (req: AuthenticatedRequest, res) => {
2024-10-22 22:01:25 +05:30
try {
const { id } = req.params;
2024-10-28 02:47:21 +05:30
if (!req.user) {
return res.status(401).json({ error: 'Unauthorized' });
}
2024-10-22 22:01:25 +05:30
const robot = await Robot.findOne({ where: { 'recording_meta.id': id } });
if (!robot) {
return res.status(404).json({ error: 'Robot not found' });
}
2025-11-28 15:51:45 +05:30
// Cancel the scheduled job in pgBossClient
try {
await cancelScheduledWorkflow(id);
} catch (error) {
logger.log('error', `Error cancelling scheduled job for robot ${id}: ${error}`);
// Continue with robot update even if cancellation fails
2024-10-22 22:01:25 +05:30
}
// Delete the schedule from the robot
await robot.update({
schedule: null
});
2024-10-29 03:44:20 +05:30
capture(
'maxun-oss-robot-schedule-deleted',
{
2024-10-28 02:47:21 +05:30
robotId: id,
user_id: req.user?.id,
unscheduled_at: new Date().toISOString(),
}
2024-10-29 03:44:20 +05:30
)
2024-10-28 02:47:21 +05:30
2024-10-22 22:01:25 +05:30
res.status(200).json({ message: 'Schedule deleted successfully' });
} catch (error) {
console.error('Error deleting schedule:', error);
res.status(500).json({ error: 'Failed to delete schedule' });
}
});
2024-09-11 23:33:04 +05:30
2024-07-31 21:10:25 +05:30
/**
* POST endpoint for aborting a current interpretation of the run.
*/
router.post('/runs/abort/:id', requireSignIn, async (req: AuthenticatedRequest, res) => {
2024-07-31 21:10:25 +05:30
try {
if (!req.user) { return res.status(401).send({ error: 'Unauthorized' }); }
2025-06-12 11:26:44 +05:30
const run = await Run.findOne({ where: { runId: req.params.id } });
if (!run) {
2025-06-12 11:26:44 +05:30
return res.status(404).send({ error: 'Run not found' });
}
2025-06-12 11:26:44 +05:30
if (!['running', 'queued'].includes(run.status)) {
return res.status(400).send({
error: `Cannot abort run with status: ${run.status}`
});
}
const isQueued = run.status === 'queued';
await run.update({
status: 'aborting'
});
2025-06-12 11:26:44 +05:30
if (isQueued) {
await run.update({
status: 'aborted',
finishedAt: new Date().toLocaleString(),
log: 'Run aborted while queued'
});
return res.send({
success: true,
message: 'Queued run aborted',
isQueued: true
});
}
2025-09-10 00:22:09 +05:30
// Immediately stop interpreter like cloud version
try {
const browser = browserPool.getRemoteBrowser(run.browserId);
if (browser && browser.interpreter) {
logger.log('info', `Immediately stopping interpreter for run ${req.params.id}`);
await browser.interpreter.stopInterpretation();
}
} catch (immediateStopError: any) {
logger.log('warn', `Failed to immediately stop interpreter: ${immediateStopError.message}`);
}
const userQueueName = `abort-run-user-${req.user.id}`;
2025-11-28 15:51:45 +05:30
await pgBossClient.createQueue(userQueueName);
const jobId = await pgBossClient.send(userQueueName, {
userId: req.user.id,
runId: req.params.id
});
logger.log('info', `Abort signal sent for run ${req.params.id}, job ID: ${jobId}`);
2025-06-12 11:26:44 +05:30
return res.send({
success: true,
2025-09-10 00:22:09 +05:30
message: 'Run stopped immediately, cleanup queued',
2025-06-12 11:26:44 +05:30
jobId,
isQueued: false
});
2025-06-12 23:30:23 +05:30
2024-07-31 21:10:25 +05:30
} catch (e) {
2024-09-19 17:39:33 +05:30
const { message } = e as Error;
logger.log('error', `Error aborting run ${req.params.id}: ${message}`);
return res.status(500).send({ error: 'Failed to abort run' });
2024-07-31 21:10:25 +05:30
}
2025-03-09 00:21:52 +05:30
});
2025-06-03 19:20:13 +05:30
// Circuit breaker for database connection issues
let consecutiveDbErrors = 0;
const MAX_CONSECUTIVE_ERRORS = 3;
const CIRCUIT_BREAKER_COOLDOWN = 30000;
let circuitBreakerOpenUntil = 0;
2025-06-03 19:20:13 +05:30
async function processQueuedRuns() {
try {
if (Date.now() < circuitBreakerOpenUntil) {
return;
}
2025-06-03 19:20:13 +05:30
const queuedRun = await Run.findOne({
where: { status: 'queued' },
order: [['startedAt', 'ASC']],
2025-06-03 19:20:13 +05:30
});
consecutiveDbErrors = 0;
2025-06-03 19:20:13 +05:30
if (!queuedRun) return;
const userId = queuedRun.runByUserId;
const canCreateBrowser = await browserPool.hasAvailableBrowserSlots(userId, "run");
if (canCreateBrowser) {
logger.log('info', `Processing queued run ${queuedRun.runId} for user ${userId}`);
const recording = await Robot.findOne({
where: {
'recording_meta.id': queuedRun.robotMetaId
},
raw: true
});
if (!recording) {
await queuedRun.update({
status: 'failed',
finishedAt: new Date().toLocaleString(),
log: 'Recording not found'
});
return;
}
try {
const newBrowserId = await createRemoteBrowserForRun(userId);
logger.log('info', `Created and initialized browser ${newBrowserId} for queued run ${queuedRun.runId}`);
await queuedRun.update({
status: 'running',
browserId: newBrowserId,
log: 'Browser created and ready for execution'
});
const userQueueName = `execute-run-user-${userId}`;
2025-11-28 15:51:45 +05:30
await pgBossClient.createQueue(userQueueName);
2025-06-03 19:20:13 +05:30
2025-11-28 15:51:45 +05:30
const jobId = await pgBossClient.send(userQueueName, {
2025-06-03 19:20:13 +05:30
userId: userId,
runId: queuedRun.runId,
browserId: newBrowserId,
});
logger.log('info', `Queued execution for run ${queuedRun.runId} with ready browser ${newBrowserId}, job ID: ${jobId}`);
} catch (browserError: any) {
logger.log('error', `Failed to create browser for queued run: ${browserError.message}`);
await queuedRun.update({
status: 'failed',
finishedAt: new Date().toLocaleString(),
log: `Failed to create browser: ${browserError.message}`
});
}
}
} catch (error: any) {
consecutiveDbErrors++;
if (consecutiveDbErrors >= MAX_CONSECUTIVE_ERRORS) {
circuitBreakerOpenUntil = Date.now() + CIRCUIT_BREAKER_COOLDOWN;
logger.log('error', `Circuit breaker opened after ${MAX_CONSECUTIVE_ERRORS} consecutive errors. Cooling down for ${CIRCUIT_BREAKER_COOLDOWN/1000}s`);
}
logger.log('error', `Error processing queued runs (${consecutiveDbErrors}/${MAX_CONSECUTIVE_ERRORS}): ${error.message}`);
2025-06-03 19:20:13 +05:30
}
2025-06-06 00:52:52 +05:30
}
2025-06-03 19:20:13 +05:30
2025-09-10 00:22:09 +05:30
/**
* Recovers orphaned runs that were left in "running" status due to instance crashes
* This function runs on server startup to ensure data reliability
*/
export async function recoverOrphanedRuns() {
try {
logger.log('info', 'Starting recovery of orphaned runs...');
const orphanedRuns = await Run.findAll({
where: {
status: ['running', 'scheduled']
},
order: [['startedAt', 'ASC']]
});
if (orphanedRuns.length === 0) {
logger.log('info', 'No orphaned runs found');
return;
}
logger.log('info', `Found ${orphanedRuns.length} orphaned runs to recover (including scheduled runs)`);
for (const run of orphanedRuns) {
try {
const runData = run.toJSON();
logger.log('info', `Recovering orphaned run: ${runData.runId}`);
const browser = browserPool.getRemoteBrowser(runData.browserId);
if (!browser) {
const retryCount = runData.retryCount || 0;
if (retryCount < 3) {
await run.update({
status: 'queued',
retryCount: retryCount + 1,
serializableOutput: {},
binaryOutput: {},
browserId: undefined,
log: runData.log ? `${runData.log}\n[RETRY ${retryCount + 1}/3] Re-queuing due to server crash` : `[RETRY ${retryCount + 1}/3] Re-queuing due to server crash`
});
logger.log('info', `Re-queued crashed run ${runData.runId} (retry ${retryCount + 1}/3)`);
} else {
const crashRecoveryMessage = `Max retries exceeded (3/3) - Run failed after multiple server crashes.`;
await run.update({
status: 'failed',
finishedAt: new Date().toLocaleString(),
log: runData.log ? `${runData.log}\n${crashRecoveryMessage}` : crashRecoveryMessage
});
logger.log('warn', `Max retries reached for run ${runData.runId}, marked as permanently failed`);
}
if (runData.browserId) {
try {
browserPool.deleteRemoteBrowser(runData.browserId);
logger.log('info', `Cleaned up stale browser reference: ${runData.browserId}`);
} catch (cleanupError: any) {
logger.log('warn', `Failed to cleanup browser reference ${runData.browserId}: ${cleanupError.message}`);
}
}
} else {
logger.log('info', `Run ${runData.runId} browser still active, not orphaned`);
}
} catch (runError: any) {
logger.log('error', `Failed to recover run ${run.runId}: ${runError.message}`);
}
}
logger.log('info', `Orphaned run recovery completed. Processed ${orphanedRuns.length} runs.`);
} catch (error: any) {
logger.log('error', `Failed to recover orphaned runs: ${error.message}`);
}
}
2025-06-03 19:20:13 +05:30
export { processQueuedRuns };