Files
parcer/server/src/routes/storage.ts

490 lines
14 KiB
TypeScript
Raw Normal View History

2024-07-31 21:10:25 +05:30
import { Router } from 'express';
import logger from "../logger";
import { createRemoteBrowserForRun, destroyRemoteBrowser } from "../browser-management/controller";
import { chromium } from "playwright";
2024-09-19 17:39:33 +05:30
import { browserPool } from "../server";
2024-07-31 21:10:25 +05:30
import { uuid } from "uuidv4";
2024-09-11 23:33:04 +05:30
import moment from 'moment-timezone';
import cron from 'node-cron';
2024-09-19 19:36:47 +05:30
import { googleSheetUpdateTasks, processGoogleSheetUpdates } from '../workflow-management/integrations/gsheet';
2024-10-06 03:15:45 +05:30
import { getDecryptedProxyConfig } from './proxy';
import { requireSignIn } from '../middlewares/auth';
import Robot from '../models/Robot';
import Run from '../models/Run';
2024-10-15 22:20:29 +05:30
import { BinaryOutputService } from '../storage/mino';
2024-10-22 16:57:33 +05:30
import { workflowQueue } from '../worker';
2024-10-24 22:26:12 +05:30
import { AuthenticatedRequest } from './record';
2024-07-31 21:10:25 +05:30
export const router = Router();
/**
* Logs information about recordings API.
*/
router.all('/', requireSignIn, (req, res, next) => {
2024-09-19 17:39:33 +05:30
logger.log('debug', `The recordings API was invoked: ${req.url}`)
2024-07-31 21:10:25 +05:30
next() // pass control to the next handler
})
/**
* GET endpoint for getting an array of all stored recordings.
*/
router.get('/recordings', requireSignIn, async (req, res) => {
2024-07-31 21:10:25 +05:30
try {
2024-10-21 21:03:42 +05:30
const data = await Robot.findAll();
2024-07-31 21:10:25 +05:30
return res.send(data);
} catch (e) {
logger.log('info', 'Error while reading recordings');
return res.send(null);
}
});
2024-10-17 14:34:25 +05:30
/**
* GET endpoint for getting a recording.
*/
2024-10-17 14:33:51 +05:30
router.get('/recordings/:id', requireSignIn, async (req, res) => {
try {
const data = await Robot.findOne({
2024-10-21 21:03:42 +05:30
where: { 'recording_meta.id': req.params.id },
2024-10-17 14:33:51 +05:30
raw: true
}
2024-10-22 16:59:18 +05:30
);
2024-10-17 14:33:51 +05:30
return res.send(data);
} catch (e) {
logger.log('info', 'Error while reading recordings');
return res.send(null);
}
})
2024-07-31 21:10:25 +05:30
/**
* DELETE endpoint for deleting a recording from the storage.
*/
router.delete('/recordings/:id', requireSignIn, async (req, res) => {
2024-07-31 21:10:25 +05:30
try {
await Robot.destroy({
2024-10-21 21:03:42 +05:30
where: { 'recording_meta.id': req.params.id }
});
2024-07-31 21:10:25 +05:30
return res.send(true);
} catch (e) {
2024-09-19 17:39:33 +05:30
const { message } = e as Error;
2024-10-09 03:10:22 +05:30
logger.log('info', `Error while deleting a recording with name: ${req.params.fileName}.json`);
2024-07-31 21:10:25 +05:30
return res.send(false);
}
});
/**
* GET endpoint for getting an array of runs from the storage.
*/
router.get('/runs', requireSignIn, async (req, res) => {
2024-07-31 21:10:25 +05:30
try {
const data = await Run.findAll();
2024-07-31 21:10:25 +05:30
return res.send(data);
} catch (e) {
logger.log('info', 'Error while reading runs');
return res.send(null);
}
});
/**
* DELETE endpoint for deleting a run from the storage.
*/
router.delete('/runs/:id', requireSignIn, async (req, res) => {
2024-07-31 21:10:25 +05:30
try {
2024-10-10 02:02:43 +05:30
await Run.destroy({ where: { runId: req.params.id } });
2024-07-31 21:10:25 +05:30
return res.send(true);
} catch (e) {
2024-09-19 17:39:33 +05:30
const { message } = e as Error;
2024-07-31 21:10:25 +05:30
logger.log('info', `Error while deleting a run with name: ${req.params.fileName}.json`);
return res.send(false);
}
});
/**
* PUT endpoint for starting a remote browser instance and saving run metadata to the storage.
* Making it ready for interpretation and returning a runId.
*/
2024-10-24 22:26:12 +05:30
router.put('/runs/:id', requireSignIn, async (req: AuthenticatedRequest, res) => {
2024-07-31 21:10:25 +05:30
try {
const recording = await Robot.findOne({
2024-10-10 01:19:31 +05:30
where: {
2024-10-21 21:03:42 +05:30
'recording_meta.id': req.params.id
2024-10-10 01:19:03 +05:30
},
raw: true
});
2024-10-08 23:25:24 +05:30
if (!recording || !recording.recording_meta || !recording.recording_meta.id) {
2024-10-08 22:00:05 +05:30
return res.status(404).send({ error: 'Recording not found' });
}
2024-10-24 22:26:12 +05:30
if (!req.user) {
return res.status(401).send({ error: 'Unauthorized' });
}
2024-10-06 03:15:45 +05:30
const proxyConfig = await getDecryptedProxyConfig(req.user.id);
let proxyOptions: any = {};
if (proxyConfig.proxy_url) {
2024-10-06 03:15:54 +05:30
proxyOptions = {
server: proxyConfig.proxy_url,
...(proxyConfig.proxy_username && proxyConfig.proxy_password && {
username: proxyConfig.proxy_username,
password: proxyConfig.proxy_password,
}),
};
2024-10-06 03:15:45 +05:30
}
2024-07-31 21:10:25 +05:30
const id = createRemoteBrowserForRun({
browser: chromium,
2024-10-06 01:08:21 +05:30
launchOptions: {
2024-10-06 01:07:47 +05:30
headless: true,
2024-10-06 03:15:45 +05:30
proxy: proxyOptions.server ? proxyOptions : undefined,
2024-10-06 01:08:21 +05:30
}
2024-07-31 21:10:25 +05:30
});
const runId = uuid();
const run = await Run.create({
2024-07-31 21:10:25 +05:30
status: 'RUNNING',
2024-10-10 01:19:03 +05:30
name: recording.recording_meta.name,
robotId: recording.id,
robotMetaId: recording.recording_meta.id,
2024-07-31 21:10:25 +05:30
startedAt: new Date().toLocaleString(),
finishedAt: '',
browserId: id,
interpreterSettings: req.body,
log: '',
runId,
2024-10-21 18:55:11 +05:30
runByUserId: req.user.id,
serializableOutput: {},
binaryOutput: {},
});
2024-10-10 01:55:23 +05:30
const plainRun = run.toJSON();
2024-07-31 21:10:25 +05:30
return res.send({
browserId: id,
2024-10-10 01:55:23 +05:30
runId: plainRun.runId,
2024-07-31 21:10:25 +05:30
});
} catch (e) {
2024-09-19 17:39:33 +05:30
const { message } = e as Error;
2024-10-10 01:19:03 +05:30
logger.log('info', `Error while creating a run with recording id: ${req.params.id} - ${message}`);
2024-07-31 21:10:25 +05:30
return res.send('');
}
});
/**
* GET endpoint for getting a run from the storage.
*/
router.get('/runs/run/:id', requireSignIn, async (req, res) => {
2024-07-31 21:10:25 +05:30
try {
2024-10-10 01:19:03 +05:30
const run = await Run.findOne({ where: { runId: req.params.runId }, raw: true });
if (!run) {
return res.status(404).send(null);
}
return res.send(run);
2024-07-31 21:10:25 +05:30
} catch (e) {
const { message } = e as Error;
2024-10-10 01:55:23 +05:30
logger.log('error', `Error ${message} while reading a run with id: ${req.params.id}.json`);
2024-07-31 21:10:25 +05:30
return res.send(null);
}
});
/**
* PUT endpoint for finishing a run and saving it to the storage.
*/
router.post('/runs/run/:id', requireSignIn, async (req, res) => {
2024-07-31 21:10:25 +05:30
try {
console.log(`Params for POST /runs/run/:id`, req.params.id)
const run = await Run.findOne({ where: { runId: req.params.id } });
if (!run) {
return res.status(404).send(false);
}
const plainRun = run.toJSON();
2024-10-21 21:03:42 +05:30
const recording = await Robot.findOne({ where: { 'recording_meta.id': plainRun.robotMetaId }, raw: true });
if (!recording) {
return res.status(404).send(false);
}
2024-07-31 21:10:25 +05:30
// interpret the run in active browser
const browser = browserPool.getRemoteBrowser(plainRun.browserId);
2024-07-31 21:10:25 +05:30
const currentPage = browser?.getCurrentPage();
if (browser && currentPage) {
const interpretationInfo = await browser.interpreter.InterpretRecording(
recording.recording, currentPage, plainRun.interpreterSettings);
2024-10-22 16:59:18 +05:30
const binaryOutputService = new BinaryOutputService('maxun-run-screenshots');
const uploadedBinaryOutput = await binaryOutputService.uploadAndStoreBinaryOutput(run, interpretationInfo.binaryOutput);
await destroyRemoteBrowser(plainRun.browserId);
await run.update({
...run,
2024-09-19 17:39:33 +05:30
status: 'success',
finishedAt: new Date().toLocaleString(),
browserId: plainRun.browserId,
2024-09-19 17:39:33 +05:30
log: interpretationInfo.log.join('\n'),
serializableOutput: interpretationInfo.serializableOutput,
2024-10-15 22:20:29 +05:30
binaryOutput: uploadedBinaryOutput,
});
2024-10-18 00:05:56 +05:30
try {
googleSheetUpdateTasks[plainRun.runId] = {
robotId: plainRun.robotMetaId,
runId: plainRun.runId,
status: 'pending',
retries: 5,
};
processGoogleSheetUpdates();
} catch (err: any) {
logger.log('error', `Failed to update Google Sheet for run: ${plainRun.runId}: ${err.message}`);
}
2024-09-19 19:36:47 +05:30
return res.send(true);
2024-09-19 17:39:33 +05:30
} else {
throw new Error('Could not destroy browser');
}
2024-07-31 21:10:25 +05:30
} catch (e) {
2024-09-19 17:39:33 +05:30
const { message } = e as Error;
2024-10-10 02:02:43 +05:30
logger.log('info', `Error while running a recording with id: ${req.params.id} - ${message}`);
2024-07-31 21:10:25 +05:30
return res.send(false);
}
});
2024-10-24 22:26:12 +05:30
router.put('/schedule/:id/', requireSignIn, async (req: AuthenticatedRequest, res) => {
2024-09-13 08:11:29 +05:30
console.log(req.body);
try {
2024-10-10 02:39:45 +05:30
const { id } = req.params;
2024-09-19 17:39:33 +05:30
const {
2024-10-22 22:01:25 +05:30
// enabled = true,
2024-09-19 17:39:33 +05:30
runEvery,
runEveryUnit,
startFrom,
2024-10-22 18:26:28 +05:30
atTimeStart,
atTimeEnd,
2024-09-19 17:39:33 +05:30
timezone
2024-09-11 23:33:04 +05:30
} = req.body;
2024-10-22 19:20:03 +05:30
const robot = await Robot.findOne({ where: { 'recording_meta.id': id } });
if (!robot) {
return res.status(404).json({ error: 'Robot not found' });
}
2024-10-22 22:01:25 +05:30
// If disabled, remove scheduling
// if (!enabled) {
// // Remove existing job from queue if it exists
// const existingJobs = await workflowQueue.getJobs(['delayed', 'waiting']);
// for (const job of existingJobs) {
// if (job.data.id === id) {
// await job.remove();
// }
// }
// // Update robot to disable scheduling
// await robot.update({
// schedule: null
// });
// return res.status(200).json({
// message: 'Schedule disabled successfully'
// });
// }
2024-10-22 19:20:03 +05:30
2024-10-22 18:26:28 +05:30
if (!id || !runEvery || !runEveryUnit || !startFrom || !timezone || (runEveryUnit === 'HOURS' || runEveryUnit === 'MINUTES') && (!atTimeStart || !atTimeEnd)) {
return res.status(400).json({ error: 'Missing required parameters' });
}
2024-10-22 18:26:28 +05:30
if (!['HOURS', 'DAYS', 'WEEKS', 'MONTHS', 'MINUTES'].includes(runEveryUnit)) {
2024-09-13 08:11:29 +05:30
return res.status(400).json({ error: 'Invalid runEvery unit' });
2024-09-11 23:33:04 +05:30
}
if (!moment.tz.zone(timezone)) {
return res.status(400).json({ error: 'Invalid timezone' });
}
2024-10-22 18:26:28 +05:30
const [startHours, startMinutes] = atTimeStart.split(':').map(Number);
const [endHours, endMinutes] = atTimeEnd.split(':').map(Number);
if (isNaN(startHours) || isNaN(startMinutes) || isNaN(endHours) || isNaN(endMinutes) ||
startHours < 0 || startHours > 23 || startMinutes < 0 || startMinutes > 59 ||
endHours < 0 || endHours > 23 || endMinutes < 0 || endMinutes > 59) {
2024-09-11 23:33:04 +05:30
return res.status(400).json({ error: 'Invalid time format' });
}
const days = ['SUNDAY', 'MONDAY', 'TUESDAY', 'WEDNESDAY', 'THURSDAY', 'FRIDAY', 'SATURDAY'];
2024-09-13 08:11:29 +05:30
if (!days.includes(startFrom)) {
2024-09-11 23:33:04 +05:30
return res.status(400).json({ error: 'Invalid start day' });
}
let cronExpression;
2024-09-13 08:11:29 +05:30
switch (runEveryUnit) {
2024-10-22 18:26:28 +05:30
case 'MINUTES':
2024-09-11 23:33:04 +05:30
case 'HOURS':
2024-10-22 18:26:28 +05:30
cronExpression = `${startMinutes}-${endMinutes} */${runEvery} * * *`;
2024-09-11 23:33:04 +05:30
break;
case 'DAYS':
2024-10-22 18:26:28 +05:30
cronExpression = `${startMinutes} ${startHours} */${runEvery} * *`;
2024-09-11 23:33:04 +05:30
break;
case 'WEEKS':
2024-09-13 08:11:29 +05:30
const dayIndex = days.indexOf(startFrom);
2024-10-22 18:26:28 +05:30
cronExpression = `${startMinutes} ${startHours} * * ${dayIndex}/${7 * runEvery}`;
2024-09-11 23:33:04 +05:30
break;
case 'MONTHS':
2024-10-22 18:26:28 +05:30
cronExpression = `${startMinutes} ${startHours} 1-7 */${runEvery} *`;
2024-09-13 08:11:29 +05:30
if (startFrom !== 'SUNDAY') {
const dayIndex = days.indexOf(startFrom);
2024-09-11 23:33:04 +05:30
cronExpression += ` ${dayIndex}`;
}
break;
}
if (!cronExpression || !cron.validate(cronExpression)) {
return res.status(400).json({ error: 'Invalid cron expression generated' });
}
2024-10-24 22:26:12 +05:30
if (!req.user) {
return res.status(401).send({ error: 'Unauthorized' });
}
2024-09-11 23:33:04 +05:30
const runId = uuid();
2024-10-10 03:23:29 +05:30
const userId = req.user.id;
2024-10-22 22:01:25 +05:30
// Remove existing jobs for this robot just in case some were left
// const existingJobs = await workflowQueue.getJobs(['delayed', 'waiting']);
// for (const job of existingJobs) {
// if (job.data.id === id) {
// await job.remove();
// }
// }
2024-10-22 19:20:03 +05:30
// Add new job
const job = await workflowQueue.add(
2024-10-22 16:59:18 +05:30
'run workflow',
{ id, runId, userId },
{
repeat: {
pattern: cronExpression,
tz: timezone
2024-10-22 16:57:19 +05:30
}
2024-10-22 16:59:18 +05:30
}
);
2024-09-11 23:33:04 +05:30
2024-10-22 19:20:03 +05:30
const nextRun = job.timestamp;
// Update robot with schedule details
await robot.update({
schedule: {
runEvery,
runEveryUnit,
startFrom,
atTimeStart,
atTimeEnd,
timezone,
cronExpression,
lastRunAt: undefined,
nextRunAt: new Date(nextRun)
}
});
2024-10-22 22:01:25 +05:30
// Fetch updated schedule details after setting it
const updatedRobot = await Robot.findOne({ where: { 'recording_meta.id': id } });
2024-09-19 17:39:33 +05:30
res.status(200).json({
message: 'success',
2024-09-11 23:33:04 +05:30
runId,
2024-10-22 22:01:25 +05:30
robot: updatedRobot
2024-09-11 23:33:04 +05:30
});
2024-09-13 08:11:29 +05:30
} catch (error) {
console.error('Error scheduling workflow:', error);
res.status(500).json({ error: 'Failed to schedule workflow' });
}
});
2024-10-22 22:01:25 +05:30
// Endpoint to get schedule details
2024-10-22 19:20:03 +05:30
router.get('/schedule/:id', requireSignIn, async (req, res) => {
try {
2024-10-23 00:10:48 +05:30
const robot = await Robot.findOne({ where: { 'recording_meta.id': req.params.id }, raw: true });
2024-10-22 19:20:03 +05:30
if (!robot) {
return res.status(404).json({ error: 'Robot not found' });
}
return res.status(200).json({
2024-10-22 23:17:39 +05:30
schedule: robot.schedule
2024-10-22 19:20:03 +05:30
});
} catch (error) {
console.error('Error getting schedule:', error);
res.status(500).json({ error: 'Failed to get schedule' });
}
});
2024-10-22 18:26:28 +05:30
2024-10-22 22:01:25 +05:30
// Endpoint to delete schedule
router.delete('/schedule/:id', requireSignIn, async (req, res) => {
try {
const { id } = req.params;
const robot = await Robot.findOne({ where: { 'recording_meta.id': id } });
if (!robot) {
return res.status(404).json({ error: 'Robot not found' });
}
// Remove existing job from queue if it exists
const existingJobs = await workflowQueue.getJobs(['delayed', 'waiting']);
for (const job of existingJobs) {
if (job.data.id === id) {
await job.remove();
}
}
// Delete the schedule from the robot
await robot.update({
schedule: null
});
res.status(200).json({ message: 'Schedule deleted successfully' });
} catch (error) {
console.error('Error deleting schedule:', error);
res.status(500).json({ error: 'Failed to delete schedule' });
}
});
2024-09-11 23:33:04 +05:30
2024-07-31 21:10:25 +05:30
/**
* POST endpoint for aborting a current interpretation of the run.
*/
router.post('/runs/abort/:id', requireSignIn, async (req, res) => {
2024-07-31 21:10:25 +05:30
try {
2024-10-10 02:32:46 +05:30
const run = await Run.findOne({ where: { runId: req.params.id } });
if (!run) {
return res.status(404).send(false);
}
2024-10-10 02:40:52 +05:30
const plainRun = run.toJSON();
2024-07-31 21:10:25 +05:30
2024-10-10 02:40:52 +05:30
const browser = browserPool.getRemoteBrowser(plainRun.browserId);
2024-07-31 21:10:25 +05:30
const currentLog = browser?.interpreter.debugMessages.join('/n');
const serializableOutput = browser?.interpreter.serializableData.reduce((reducedObject, item, index) => {
return {
[`item-${index}`]: item,
...reducedObject,
}
}, {});
const binaryOutput = browser?.interpreter.binaryData.reduce((reducedObject, item, index) => {
return {
[`item-${index}`]: item,
...reducedObject,
}
}, {});
await run.update({
...run,
2024-10-08 21:18:16 +05:30
status: 'aborted',
finishedAt: new Date().toLocaleString(),
2024-10-10 02:40:52 +05:30
browserId: plainRun.browserId,
2024-07-31 21:10:25 +05:30
log: currentLog,
2024-10-10 01:19:03 +05:30
serializableOutput,
binaryOutput,
});
2024-07-31 21:10:25 +05:30
return res.send(true);
} catch (e) {
2024-09-19 17:39:33 +05:30
const { message } = e as Error;
2024-07-31 21:10:25 +05:30
logger.log('info', `Error while running a recording with name: ${req.params.fileName}_${req.params.runId}.json`);
return res.send(false);
}
2024-09-19 17:58:38 +05:30
});