Files
parcer/server/src/routes/storage.ts

550 lines
16 KiB
TypeScript
Raw Normal View History

2024-07-31 21:10:25 +05:30
import { Router } from 'express';
import logger from "../logger";
import { createRemoteBrowserForRun, destroyRemoteBrowser } from "../browser-management/controller";
import { chromium } from "playwright";
2024-09-19 17:39:33 +05:30
import { browserPool } from "../server";
2024-07-31 21:10:25 +05:30
import { uuid } from "uuidv4";
2024-09-11 23:33:04 +05:30
import moment from 'moment-timezone';
import cron from 'node-cron';
2024-09-19 19:36:47 +05:30
import { googleSheetUpdateTasks, processGoogleSheetUpdates } from '../workflow-management/integrations/gsheet';
2024-10-06 03:15:45 +05:30
import { getDecryptedProxyConfig } from './proxy';
import { requireSignIn } from '../middlewares/auth';
import Robot from '../models/Robot';
import Run from '../models/Run';
2024-10-15 22:20:29 +05:30
import { BinaryOutputService } from '../storage/mino';
2024-10-22 16:57:33 +05:30
import { workflowQueue } from '../worker';
2024-10-24 22:26:12 +05:30
import { AuthenticatedRequest } from './record';
2024-10-26 00:49:26 +05:30
import { computeNextRun } from '../utils/schedule';
2024-10-28 02:47:21 +05:30
import captureServerAnalytics from "../utils/analytics";
2024-07-31 21:10:25 +05:30
export const router = Router();
/**
* Logs information about recordings API.
*/
router.all('/', requireSignIn, (req, res, next) => {
2024-09-19 17:39:33 +05:30
logger.log('debug', `The recordings API was invoked: ${req.url}`)
2024-07-31 21:10:25 +05:30
next() // pass control to the next handler
})
/**
* GET endpoint for getting an array of all stored recordings.
*/
router.get('/recordings', requireSignIn, async (req, res) => {
2024-07-31 21:10:25 +05:30
try {
2024-10-21 21:03:42 +05:30
const data = await Robot.findAll();
2024-07-31 21:10:25 +05:30
return res.send(data);
} catch (e) {
logger.log('info', 'Error while reading recordings');
return res.send(null);
}
});
2024-10-17 14:34:25 +05:30
/**
* GET endpoint for getting a recording.
*/
2024-10-17 14:33:51 +05:30
router.get('/recordings/:id', requireSignIn, async (req, res) => {
try {
const data = await Robot.findOne({
2024-10-21 21:03:42 +05:30
where: { 'recording_meta.id': req.params.id },
2024-10-17 14:33:51 +05:30
raw: true
}
2024-10-22 16:59:18 +05:30
);
2024-10-17 14:33:51 +05:30
return res.send(data);
} catch (e) {
logger.log('info', 'Error while reading recordings');
return res.send(null);
}
})
2024-07-31 21:10:25 +05:30
/**
* DELETE endpoint for deleting a recording from the storage.
*/
2024-10-28 02:47:21 +05:30
router.delete('/recordings/:id', requireSignIn, async (req: AuthenticatedRequest, res) => {
if (!req.user) {
return res.status(401).send({ error: 'Unauthorized' });
}
2024-07-31 21:10:25 +05:30
try {
await Robot.destroy({
2024-10-21 21:03:42 +05:30
where: { 'recording_meta.id': req.params.id }
});
2024-10-28 02:47:21 +05:30
captureServerAnalytics.capture({
distinctId: req.user?.id,
event: 'maxun-oss-robot-deleted',
properties: {
robotId: req.params.id,
user_id: req.user?.id,
deleted_at: new Date().toISOString(),
}
})
2024-07-31 21:10:25 +05:30
return res.send(true);
} catch (e) {
2024-09-19 17:39:33 +05:30
const { message } = e as Error;
2024-10-09 03:10:22 +05:30
logger.log('info', `Error while deleting a recording with name: ${req.params.fileName}.json`);
2024-07-31 21:10:25 +05:30
return res.send(false);
}
});
/**
* GET endpoint for getting an array of runs from the storage.
*/
router.get('/runs', requireSignIn, async (req, res) => {
2024-07-31 21:10:25 +05:30
try {
const data = await Run.findAll();
2024-07-31 21:10:25 +05:30
return res.send(data);
} catch (e) {
logger.log('info', 'Error while reading runs');
return res.send(null);
}
});
/**
* DELETE endpoint for deleting a run from the storage.
*/
2024-10-28 02:47:21 +05:30
router.delete('/runs/:id', requireSignIn, async (req: AuthenticatedRequest, res) => {
if (!req.user) {
return res.status(401).send({ error: 'Unauthorized' });
}
2024-07-31 21:10:25 +05:30
try {
2024-10-10 02:02:43 +05:30
await Run.destroy({ where: { runId: req.params.id } });
2024-10-28 02:47:21 +05:30
captureServerAnalytics.capture({
distinctId: req.user?.id,
event: 'maxun-oss-run-deleted',
properties: {
runId: req.params.id,
user_id: req.user?.id,
deleted_at: new Date().toISOString(),
}
})
2024-07-31 21:10:25 +05:30
return res.send(true);
} catch (e) {
2024-09-19 17:39:33 +05:30
const { message } = e as Error;
2024-07-31 21:10:25 +05:30
logger.log('info', `Error while deleting a run with name: ${req.params.fileName}.json`);
return res.send(false);
}
});
/**
* PUT endpoint for starting a remote browser instance and saving run metadata to the storage.
* Making it ready for interpretation and returning a runId.
*/
2024-10-24 22:26:12 +05:30
router.put('/runs/:id', requireSignIn, async (req: AuthenticatedRequest, res) => {
2024-07-31 21:10:25 +05:30
try {
const recording = await Robot.findOne({
2024-10-10 01:19:31 +05:30
where: {
2024-10-21 21:03:42 +05:30
'recording_meta.id': req.params.id
2024-10-10 01:19:03 +05:30
},
raw: true
});
2024-10-08 23:25:24 +05:30
if (!recording || !recording.recording_meta || !recording.recording_meta.id) {
2024-10-08 22:00:05 +05:30
return res.status(404).send({ error: 'Recording not found' });
}
2024-10-24 22:26:12 +05:30
if (!req.user) {
return res.status(401).send({ error: 'Unauthorized' });
}
2024-10-06 03:15:45 +05:30
const proxyConfig = await getDecryptedProxyConfig(req.user.id);
let proxyOptions: any = {};
if (proxyConfig.proxy_url) {
2024-10-06 03:15:54 +05:30
proxyOptions = {
server: proxyConfig.proxy_url,
...(proxyConfig.proxy_username && proxyConfig.proxy_password && {
username: proxyConfig.proxy_username,
password: proxyConfig.proxy_password,
}),
};
2024-10-06 03:15:45 +05:30
}
2024-10-27 17:56:02 +05:30
console.log(`Proxy config for run: ${JSON.stringify(proxyOptions)}`)
2024-07-31 21:10:25 +05:30
const id = createRemoteBrowserForRun({
browser: chromium,
2024-10-06 01:08:21 +05:30
launchOptions: {
2024-10-06 01:07:47 +05:30
headless: true,
2024-10-06 03:15:45 +05:30
proxy: proxyOptions.server ? proxyOptions : undefined,
2024-10-06 01:08:21 +05:30
}
2024-10-27 17:56:02 +05:30
}, req.user.id);
2024-07-31 21:10:25 +05:30
const runId = uuid();
const run = await Run.create({
2024-10-29 00:44:38 +05:30
status: 'running',
2024-10-10 01:19:03 +05:30
name: recording.recording_meta.name,
robotId: recording.id,
robotMetaId: recording.recording_meta.id,
2024-07-31 21:10:25 +05:30
startedAt: new Date().toLocaleString(),
finishedAt: '',
browserId: id,
interpreterSettings: req.body,
log: '',
runId,
2024-10-21 18:55:11 +05:30
runByUserId: req.user.id,
serializableOutput: {},
binaryOutput: {},
});
2024-10-10 01:55:23 +05:30
const plainRun = run.toJSON();
2024-07-31 21:10:25 +05:30
return res.send({
browserId: id,
2024-10-10 01:55:23 +05:30
runId: plainRun.runId,
2024-07-31 21:10:25 +05:30
});
} catch (e) {
2024-09-19 17:39:33 +05:30
const { message } = e as Error;
2024-10-10 01:19:03 +05:30
logger.log('info', `Error while creating a run with recording id: ${req.params.id} - ${message}`);
2024-07-31 21:10:25 +05:30
return res.send('');
}
});
/**
* GET endpoint for getting a run from the storage.
*/
router.get('/runs/run/:id', requireSignIn, async (req, res) => {
2024-07-31 21:10:25 +05:30
try {
2024-10-10 01:19:03 +05:30
const run = await Run.findOne({ where: { runId: req.params.runId }, raw: true });
if (!run) {
return res.status(404).send(null);
}
return res.send(run);
2024-07-31 21:10:25 +05:30
} catch (e) {
const { message } = e as Error;
2024-10-10 01:55:23 +05:30
logger.log('error', `Error ${message} while reading a run with id: ${req.params.id}.json`);
2024-07-31 21:10:25 +05:30
return res.send(null);
}
});
/**
* PUT endpoint for finishing a run and saving it to the storage.
*/
2024-10-28 02:47:21 +05:30
router.post('/runs/run/:id', requireSignIn, async (req: AuthenticatedRequest, res) => {
2024-07-31 21:10:25 +05:30
try {
2024-10-28 02:47:21 +05:30
if (!req.user) { return res.status(401).send({ error: 'Unauthorized' }); }
const run = await Run.findOne({ where: { runId: req.params.id } });
if (!run) {
return res.status(404).send(false);
}
const plainRun = run.toJSON();
2024-10-21 21:03:42 +05:30
const recording = await Robot.findOne({ where: { 'recording_meta.id': plainRun.robotMetaId }, raw: true });
if (!recording) {
return res.status(404).send(false);
}
2024-07-31 21:10:25 +05:30
// interpret the run in active browser
const browser = browserPool.getRemoteBrowser(plainRun.browserId);
2024-07-31 21:10:25 +05:30
const currentPage = browser?.getCurrentPage();
if (browser && currentPage) {
const interpretationInfo = await browser.interpreter.InterpretRecording(
recording.recording, currentPage, plainRun.interpreterSettings);
2024-10-22 16:59:18 +05:30
const binaryOutputService = new BinaryOutputService('maxun-run-screenshots');
const uploadedBinaryOutput = await binaryOutputService.uploadAndStoreBinaryOutput(run, interpretationInfo.binaryOutput);
await destroyRemoteBrowser(plainRun.browserId);
await run.update({
...run,
2024-09-19 17:39:33 +05:30
status: 'success',
finishedAt: new Date().toLocaleString(),
browserId: plainRun.browserId,
2024-09-19 17:39:33 +05:30
log: interpretationInfo.log.join('\n'),
serializableOutput: interpretationInfo.serializableOutput,
2024-10-15 22:20:29 +05:30
binaryOutput: uploadedBinaryOutput,
});
2024-10-28 04:49:54 +05:30
let totalRowsExtracted = 0;
2024-10-29 00:44:38 +05:30
let extractedScreenshotsCount = 0;
let extractedItemsCount = 0;
if (run.dataValues.binaryOutput) {
extractedScreenshotsCount = run.dataValues.binaryOutput['item-0'].length;
}
if (run.dataValues.serializableOutput) {
extractedItemsCount = run.dataValues.serializableOutput['item-0'].length;
2024-10-28 04:49:54 +05:30
}
2024-10-29 00:44:38 +05:30
console.log(`${extractedItemsCount} ${extractedScreenshotsCount}`)
2024-10-28 04:49:54 +05:30
2024-10-28 02:47:21 +05:30
captureServerAnalytics.capture({
distinctId: req.user?.id,
2024-10-28 04:17:17 +05:30
event: 'maxun-oss-run-created-manual',
2024-10-28 02:47:21 +05:30
properties: {
runId: req.params.id,
user_id: req.user?.id,
created_at: new Date().toISOString(),
2024-10-28 04:17:17 +05:30
status: 'success',
2024-10-29 00:44:38 +05:30
extractedItemsCount,
extractedScreenshotsCount,
2024-10-28 02:47:21 +05:30
}
})
2024-10-18 00:05:56 +05:30
try {
googleSheetUpdateTasks[plainRun.runId] = {
robotId: plainRun.robotMetaId,
runId: plainRun.runId,
status: 'pending',
retries: 5,
};
processGoogleSheetUpdates();
} catch (err: any) {
logger.log('error', `Failed to update Google Sheet for run: ${plainRun.runId}: ${err.message}`);
}
2024-09-19 19:36:47 +05:30
return res.send(true);
2024-09-19 17:39:33 +05:30
} else {
throw new Error('Could not destroy browser');
}
2024-07-31 21:10:25 +05:30
} catch (e) {
2024-09-19 17:39:33 +05:30
const { message } = e as Error;
2024-10-28 04:49:54 +05:30
// If error occurs, set run status to failed
const run = await Run.findOne({ where: { runId: req.params.id } });
if (run) {
await run.update({
status: 'failed',
finishedAt: new Date().toLocaleString(),
});
}
2024-10-10 02:02:43 +05:30
logger.log('info', `Error while running a recording with id: ${req.params.id} - ${message}`);
2024-10-28 04:17:17 +05:30
captureServerAnalytics.capture({
distinctId: req.user?.id,
event: 'maxun-oss-run-created-manual',
properties: {
runId: req.params.id,
user_id: req.user?.id,
created_at: new Date().toISOString(),
status: 'failed',
error_message: message,
}
});
2024-07-31 21:10:25 +05:30
return res.send(false);
}
});
2024-10-24 22:26:12 +05:30
router.put('/schedule/:id/', requireSignIn, async (req: AuthenticatedRequest, res) => {
try {
2024-10-10 02:39:45 +05:30
const { id } = req.params;
2024-10-26 00:48:07 +05:30
const { runEvery, runEveryUnit, startFrom, atTimeStart, atTimeEnd, timezone } = req.body;
2024-09-11 23:33:04 +05:30
2024-10-22 19:20:03 +05:30
const robot = await Robot.findOne({ where: { 'recording_meta.id': id } });
if (!robot) {
return res.status(404).json({ error: 'Robot not found' });
}
2024-10-26 00:48:07 +05:30
// Validate required parameters
if (!runEvery || !runEveryUnit || !startFrom || !atTimeStart || !atTimeEnd || !timezone) {
return res.status(400).json({ error: 'Missing required parameters' });
}
2024-10-26 00:48:07 +05:30
// Validate time zone
2024-09-11 23:33:04 +05:30
if (!moment.tz.zone(timezone)) {
return res.status(400).json({ error: 'Invalid timezone' });
}
2024-10-26 00:48:07 +05:30
// Validate and parse start and end times
2024-10-22 18:26:28 +05:30
const [startHours, startMinutes] = atTimeStart.split(':').map(Number);
const [endHours, endMinutes] = atTimeEnd.split(':').map(Number);
2024-10-28 04:49:54 +05:30
2024-10-22 18:26:28 +05:30
if (isNaN(startHours) || isNaN(startMinutes) || isNaN(endHours) || isNaN(endMinutes) ||
2024-10-28 04:49:54 +05:30
startHours < 0 || startHours > 23 || startMinutes < 0 || startMinutes > 59 ||
endHours < 0 || endHours > 23 || endMinutes < 0 || endMinutes > 59) {
2024-09-11 23:33:04 +05:30
return res.status(400).json({ error: 'Invalid time format' });
}
const days = ['SUNDAY', 'MONDAY', 'TUESDAY', 'WEDNESDAY', 'THURSDAY', 'FRIDAY', 'SATURDAY'];
2024-09-13 08:11:29 +05:30
if (!days.includes(startFrom)) {
2024-09-11 23:33:04 +05:30
return res.status(400).json({ error: 'Invalid start day' });
}
2024-10-26 00:48:07 +05:30
// Build cron expression based on run frequency and starting day
2024-09-11 23:33:04 +05:30
let cronExpression;
2024-10-26 00:48:07 +05:30
const dayIndex = days.indexOf(startFrom);
2024-09-13 08:11:29 +05:30
switch (runEveryUnit) {
2024-10-22 18:26:28 +05:30
case 'MINUTES':
2024-10-29 00:44:38 +05:30
cronExpression = `${startMinutes} */${runEvery} * * *`;
2024-09-11 23:33:04 +05:30
break;
2024-10-26 00:48:07 +05:30
case 'HOURS':
2024-10-29 00:44:38 +05:30
cronExpression = `${startMinutes} */${runEvery} * * *`;
2024-10-26 00:48:07 +05:30
break;
2024-09-11 23:33:04 +05:30
case 'DAYS':
2024-10-22 18:26:28 +05:30
cronExpression = `${startMinutes} ${startHours} */${runEvery} * *`;
2024-09-11 23:33:04 +05:30
break;
case 'WEEKS':
2024-10-26 00:48:07 +05:30
cronExpression = `${startMinutes} ${startHours} * * ${dayIndex}`;
2024-09-11 23:33:04 +05:30
break;
case 'MONTHS':
2024-10-29 00:44:38 +05:30
cronExpression = `${startMinutes} ${startHours} ${startFrom === '1' ? '1' : '1-7'} * *`;
2024-09-13 08:11:29 +05:30
if (startFrom !== 'SUNDAY') {
2024-09-11 23:33:04 +05:30
cronExpression += ` ${dayIndex}`;
}
break;
2024-10-26 00:48:07 +05:30
default:
return res.status(400).json({ error: 'Invalid runEveryUnit' });
2024-09-11 23:33:04 +05:30
}
2024-10-26 00:48:07 +05:30
// Validate cron expression
2024-09-11 23:33:04 +05:30
if (!cronExpression || !cron.validate(cronExpression)) {
return res.status(400).json({ error: 'Invalid cron expression generated' });
}
2024-10-24 22:26:12 +05:30
if (!req.user) {
2024-10-26 00:48:07 +05:30
return res.status(401).json({ error: 'Unauthorized' });
2024-10-24 22:26:12 +05:30
}
2024-10-26 00:48:07 +05:30
// Create the job in the queue with the cron expression
2024-10-22 19:20:03 +05:30
const job = await workflowQueue.add(
2024-10-22 16:59:18 +05:30
'run workflow',
2024-10-26 00:48:07 +05:30
{ id, runId: uuid(), userId: req.user.id },
2024-10-22 16:59:18 +05:30
{
repeat: {
pattern: cronExpression,
2024-10-26 00:48:07 +05:30
tz: timezone,
},
2024-10-22 16:59:18 +05:30
}
);
2024-09-11 23:33:04 +05:30
2024-10-26 00:48:07 +05:30
const nextRunAt = computeNextRun(cronExpression, timezone);
2024-10-22 19:20:03 +05:30
await robot.update({
schedule: {
runEvery,
runEveryUnit,
startFrom,
atTimeStart,
atTimeEnd,
timezone,
cronExpression,
lastRunAt: undefined,
2024-10-26 00:49:26 +05:30
nextRunAt: nextRunAt || undefined,
2024-10-26 00:48:07 +05:30
},
2024-10-22 19:20:03 +05:30
});
2024-10-28 02:47:21 +05:30
captureServerAnalytics.capture({
distinctId: req.user.id,
event: 'maxun-oss-robot-scheduled',
properties: {
robotId: id,
user_id: req.user.id,
scheduled_at: new Date().toISOString(),
}
})
2024-10-22 22:01:25 +05:30
// Fetch updated schedule details after setting it
const updatedRobot = await Robot.findOne({ where: { 'recording_meta.id': id } });
2024-09-19 17:39:33 +05:30
res.status(200).json({
message: 'success',
2024-10-26 00:48:07 +05:30
robot: updatedRobot,
2024-09-11 23:33:04 +05:30
});
} catch (error) {
console.error('Error scheduling workflow:', error);
res.status(500).json({ error: 'Failed to schedule workflow' });
}
});
2024-10-26 00:48:07 +05:30
2024-10-22 22:01:25 +05:30
// Endpoint to get schedule details
2024-10-22 19:20:03 +05:30
router.get('/schedule/:id', requireSignIn, async (req, res) => {
try {
2024-10-23 00:10:48 +05:30
const robot = await Robot.findOne({ where: { 'recording_meta.id': req.params.id }, raw: true });
2024-10-22 19:20:03 +05:30
if (!robot) {
return res.status(404).json({ error: 'Robot not found' });
}
return res.status(200).json({
2024-10-22 23:17:39 +05:30
schedule: robot.schedule
2024-10-22 19:20:03 +05:30
});
} catch (error) {
console.error('Error getting schedule:', error);
res.status(500).json({ error: 'Failed to get schedule' });
}
});
2024-10-22 18:26:28 +05:30
2024-10-22 22:01:25 +05:30
// Endpoint to delete schedule
2024-10-28 02:47:21 +05:30
router.delete('/schedule/:id', requireSignIn, async (req: AuthenticatedRequest, res) => {
2024-10-22 22:01:25 +05:30
try {
const { id } = req.params;
2024-10-28 02:47:21 +05:30
if (!req.user) {
return res.status(401).json({ error: 'Unauthorized' });
}
2024-10-22 22:01:25 +05:30
const robot = await Robot.findOne({ where: { 'recording_meta.id': id } });
if (!robot) {
return res.status(404).json({ error: 'Robot not found' });
}
// Remove existing job from queue if it exists
const existingJobs = await workflowQueue.getJobs(['delayed', 'waiting']);
for (const job of existingJobs) {
if (job.data.id === id) {
await job.remove();
}
}
// Delete the schedule from the robot
await robot.update({
schedule: null
});
2024-10-28 02:47:21 +05:30
captureServerAnalytics.capture({
distinctId: req.user?.id,
event: 'maxun-oss-robot-schedule-deleted',
properties: {
robotId: id,
user_id: req.user?.id,
unscheduled_at: new Date().toISOString(),
}
})
2024-10-22 22:01:25 +05:30
res.status(200).json({ message: 'Schedule deleted successfully' });
} catch (error) {
console.error('Error deleting schedule:', error);
res.status(500).json({ error: 'Failed to delete schedule' });
}
});
2024-09-11 23:33:04 +05:30
2024-07-31 21:10:25 +05:30
/**
* POST endpoint for aborting a current interpretation of the run.
*/
router.post('/runs/abort/:id', requireSignIn, async (req, res) => {
2024-07-31 21:10:25 +05:30
try {
2024-10-10 02:32:46 +05:30
const run = await Run.findOne({ where: { runId: req.params.id } });
if (!run) {
return res.status(404).send(false);
}
2024-10-10 02:40:52 +05:30
const plainRun = run.toJSON();
2024-07-31 21:10:25 +05:30
2024-10-10 02:40:52 +05:30
const browser = browserPool.getRemoteBrowser(plainRun.browserId);
2024-07-31 21:10:25 +05:30
const currentLog = browser?.interpreter.debugMessages.join('/n');
const serializableOutput = browser?.interpreter.serializableData.reduce((reducedObject, item, index) => {
return {
[`item-${index}`]: item,
...reducedObject,
}
}, {});
const binaryOutput = browser?.interpreter.binaryData.reduce((reducedObject, item, index) => {
return {
[`item-${index}`]: item,
...reducedObject,
}
}, {});
await run.update({
...run,
2024-10-08 21:18:16 +05:30
status: 'aborted',
finishedAt: new Date().toLocaleString(),
2024-10-10 02:40:52 +05:30
browserId: plainRun.browserId,
2024-07-31 21:10:25 +05:30
log: currentLog,
2024-10-10 01:19:03 +05:30
serializableOutput,
binaryOutput,
});
2024-07-31 21:10:25 +05:30
return res.send(true);
} catch (e) {
2024-09-19 17:39:33 +05:30
const { message } = e as Error;
2024-07-31 21:10:25 +05:30
logger.log('info', `Error while running a recording with name: ${req.params.fileName}_${req.params.runId}.json`);
return res.send(false);
}
2024-09-19 17:58:38 +05:30
});