Files
parcer/server/src/routes/storage.ts

378 lines
11 KiB
TypeScript
Raw Normal View History

2024-07-31 21:10:25 +05:30
import { Router } from 'express';
import logger from "../logger";
import { createRemoteBrowserForRun, destroyRemoteBrowser } from "../browser-management/controller";
import { chromium } from "playwright";
2024-09-19 17:39:33 +05:30
import { browserPool } from "../server";
2024-07-31 21:10:25 +05:30
import { uuid } from "uuidv4";
2024-09-11 23:33:04 +05:30
import moment from 'moment-timezone';
import cron from 'node-cron';
2024-09-19 19:36:47 +05:30
import { googleSheetUpdateTasks, processGoogleSheetUpdates } from '../workflow-management/integrations/gsheet';
2024-10-06 03:15:45 +05:30
import { getDecryptedProxyConfig } from './proxy';
import { requireSignIn } from '../middlewares/auth';
import Robot from '../models/Robot';
import Run from '../models/Run';
2024-10-15 22:20:29 +05:30
import { BinaryOutputService } from '../storage/mino';
2024-10-22 16:57:33 +05:30
import { workflowQueue } from '../worker';
2024-07-31 21:10:25 +05:30
export const router = Router();
/**
* Logs information about recordings API.
*/
router.all('/', requireSignIn, (req, res, next) => {
2024-09-19 17:39:33 +05:30
logger.log('debug', `The recordings API was invoked: ${req.url}`)
2024-07-31 21:10:25 +05:30
next() // pass control to the next handler
})
/**
* GET endpoint for getting an array of all stored recordings.
*/
router.get('/recordings', requireSignIn, async (req, res) => {
2024-07-31 21:10:25 +05:30
try {
2024-10-21 21:03:42 +05:30
const data = await Robot.findAll();
2024-07-31 21:10:25 +05:30
return res.send(data);
} catch (e) {
logger.log('info', 'Error while reading recordings');
return res.send(null);
}
});
2024-10-17 14:34:25 +05:30
/**
* GET endpoint for getting a recording.
*/
2024-10-17 14:33:51 +05:30
router.get('/recordings/:id', requireSignIn, async (req, res) => {
try {
const data = await Robot.findOne({
2024-10-21 21:03:42 +05:30
where: { 'recording_meta.id': req.params.id },
2024-10-17 14:33:51 +05:30
raw: true
}
2024-10-22 16:59:18 +05:30
);
2024-10-17 14:33:51 +05:30
return res.send(data);
} catch (e) {
logger.log('info', 'Error while reading recordings');
return res.send(null);
}
})
2024-07-31 21:10:25 +05:30
/**
* DELETE endpoint for deleting a recording from the storage.
*/
router.delete('/recordings/:id', requireSignIn, async (req, res) => {
2024-07-31 21:10:25 +05:30
try {
await Robot.destroy({
2024-10-21 21:03:42 +05:30
where: { 'recording_meta.id': req.params.id }
});
2024-07-31 21:10:25 +05:30
return res.send(true);
} catch (e) {
2024-09-19 17:39:33 +05:30
const { message } = e as Error;
2024-10-09 03:10:22 +05:30
logger.log('info', `Error while deleting a recording with name: ${req.params.fileName}.json`);
2024-07-31 21:10:25 +05:30
return res.send(false);
}
});
/**
* GET endpoint for getting an array of runs from the storage.
*/
router.get('/runs', requireSignIn, async (req, res) => {
2024-07-31 21:10:25 +05:30
try {
const data = await Run.findAll();
2024-07-31 21:10:25 +05:30
return res.send(data);
} catch (e) {
logger.log('info', 'Error while reading runs');
return res.send(null);
}
});
/**
* DELETE endpoint for deleting a run from the storage.
*/
router.delete('/runs/:id', requireSignIn, async (req, res) => {
2024-07-31 21:10:25 +05:30
try {
2024-10-10 02:02:43 +05:30
await Run.destroy({ where: { runId: req.params.id } });
2024-07-31 21:10:25 +05:30
return res.send(true);
} catch (e) {
2024-09-19 17:39:33 +05:30
const { message } = e as Error;
2024-07-31 21:10:25 +05:30
logger.log('info', `Error while deleting a run with name: ${req.params.fileName}.json`);
return res.send(false);
}
});
/**
* PUT endpoint for starting a remote browser instance and saving run metadata to the storage.
* Making it ready for interpretation and returning a runId.
*/
router.put('/runs/:id', requireSignIn, async (req, res) => {
2024-07-31 21:10:25 +05:30
try {
const recording = await Robot.findOne({
2024-10-10 01:19:31 +05:30
where: {
2024-10-21 21:03:42 +05:30
'recording_meta.id': req.params.id
2024-10-10 01:19:03 +05:30
},
raw: true
});
2024-10-08 23:25:24 +05:30
if (!recording || !recording.recording_meta || !recording.recording_meta.id) {
2024-10-08 22:00:05 +05:30
return res.status(404).send({ error: 'Recording not found' });
}
2024-10-06 03:15:45 +05:30
const proxyConfig = await getDecryptedProxyConfig(req.user.id);
let proxyOptions: any = {};
if (proxyConfig.proxy_url) {
2024-10-06 03:15:54 +05:30
proxyOptions = {
server: proxyConfig.proxy_url,
...(proxyConfig.proxy_username && proxyConfig.proxy_password && {
username: proxyConfig.proxy_username,
password: proxyConfig.proxy_password,
}),
};
2024-10-06 03:15:45 +05:30
}
2024-07-31 21:10:25 +05:30
const id = createRemoteBrowserForRun({
browser: chromium,
2024-10-06 01:08:21 +05:30
launchOptions: {
2024-10-06 01:07:47 +05:30
headless: true,
2024-10-06 03:15:45 +05:30
proxy: proxyOptions.server ? proxyOptions : undefined,
2024-10-06 01:08:21 +05:30
}
2024-07-31 21:10:25 +05:30
});
const runId = uuid();
const run = await Run.create({
2024-07-31 21:10:25 +05:30
status: 'RUNNING',
2024-10-10 01:19:03 +05:30
name: recording.recording_meta.name,
robotId: recording.id,
robotMetaId: recording.recording_meta.id,
2024-07-31 21:10:25 +05:30
startedAt: new Date().toLocaleString(),
finishedAt: '',
browserId: id,
interpreterSettings: req.body,
log: '',
runId,
2024-10-21 18:55:11 +05:30
runByUserId: req.user.id,
serializableOutput: {},
binaryOutput: {},
});
2024-10-10 01:55:23 +05:30
const plainRun = run.toJSON();
2024-07-31 21:10:25 +05:30
return res.send({
browserId: id,
2024-10-10 01:55:23 +05:30
runId: plainRun.runId,
2024-07-31 21:10:25 +05:30
});
} catch (e) {
2024-09-19 17:39:33 +05:30
const { message } = e as Error;
2024-10-10 01:19:03 +05:30
logger.log('info', `Error while creating a run with recording id: ${req.params.id} - ${message}`);
2024-07-31 21:10:25 +05:30
return res.send('');
}
});
/**
* GET endpoint for getting a run from the storage.
*/
router.get('/runs/run/:id', requireSignIn, async (req, res) => {
2024-07-31 21:10:25 +05:30
try {
2024-10-10 01:19:03 +05:30
const run = await Run.findOne({ where: { runId: req.params.runId }, raw: true });
if (!run) {
return res.status(404).send(null);
}
return res.send(run);
2024-07-31 21:10:25 +05:30
} catch (e) {
const { message } = e as Error;
2024-10-10 01:55:23 +05:30
logger.log('error', `Error ${message} while reading a run with id: ${req.params.id}.json`);
2024-07-31 21:10:25 +05:30
return res.send(null);
}
});
/**
* PUT endpoint for finishing a run and saving it to the storage.
*/
router.post('/runs/run/:id', requireSignIn, async (req, res) => {
2024-07-31 21:10:25 +05:30
try {
console.log(`Params for POST /runs/run/:id`, req.params.id)
const run = await Run.findOne({ where: { runId: req.params.id } });
if (!run) {
return res.status(404).send(false);
}
const plainRun = run.toJSON();
2024-10-21 21:03:42 +05:30
const recording = await Robot.findOne({ where: { 'recording_meta.id': plainRun.robotMetaId }, raw: true });
if (!recording) {
return res.status(404).send(false);
}
2024-07-31 21:10:25 +05:30
// interpret the run in active browser
const browser = browserPool.getRemoteBrowser(plainRun.browserId);
2024-07-31 21:10:25 +05:30
const currentPage = browser?.getCurrentPage();
if (browser && currentPage) {
const interpretationInfo = await browser.interpreter.InterpretRecording(
recording.recording, currentPage, plainRun.interpreterSettings);
2024-10-22 16:59:18 +05:30
const binaryOutputService = new BinaryOutputService('maxun-run-screenshots');
const uploadedBinaryOutput = await binaryOutputService.uploadAndStoreBinaryOutput(run, interpretationInfo.binaryOutput);
await destroyRemoteBrowser(plainRun.browserId);
await run.update({
...run,
2024-09-19 17:39:33 +05:30
status: 'success',
finishedAt: new Date().toLocaleString(),
browserId: plainRun.browserId,
2024-09-19 17:39:33 +05:30
log: interpretationInfo.log.join('\n'),
serializableOutput: interpretationInfo.serializableOutput,
2024-10-15 22:20:29 +05:30
binaryOutput: uploadedBinaryOutput,
});
2024-10-18 00:05:56 +05:30
try {
googleSheetUpdateTasks[plainRun.runId] = {
robotId: plainRun.robotMetaId,
runId: plainRun.runId,
status: 'pending',
retries: 5,
};
processGoogleSheetUpdates();
} catch (err: any) {
logger.log('error', `Failed to update Google Sheet for run: ${plainRun.runId}: ${err.message}`);
}
2024-09-19 19:36:47 +05:30
return res.send(true);
2024-09-19 17:39:33 +05:30
} else {
throw new Error('Could not destroy browser');
}
2024-07-31 21:10:25 +05:30
} catch (e) {
2024-09-19 17:39:33 +05:30
const { message } = e as Error;
2024-10-10 02:02:43 +05:30
logger.log('info', `Error while running a recording with id: ${req.params.id} - ${message}`);
2024-07-31 21:10:25 +05:30
return res.send(false);
}
});
2024-10-10 02:39:45 +05:30
router.put('/schedule/:id/', requireSignIn, async (req, res) => {
2024-09-13 08:11:29 +05:30
console.log(req.body);
try {
2024-10-10 02:39:45 +05:30
const { id } = req.params;
2024-09-19 17:39:33 +05:30
const {
runEvery,
runEveryUnit,
startFrom,
atTime,
timezone
2024-09-11 23:33:04 +05:30
} = req.body;
2024-10-10 02:39:45 +05:30
if (!id || !runEvery || !runEveryUnit || !startFrom || !atTime || !timezone) {
return res.status(400).json({ error: 'Missing required parameters' });
}
2024-09-13 08:11:29 +05:30
if (!['HOURS', 'DAYS', 'WEEKS', 'MONTHS'].includes(runEveryUnit)) {
return res.status(400).json({ error: 'Invalid runEvery unit' });
2024-09-11 23:33:04 +05:30
}
if (!moment.tz.zone(timezone)) {
return res.status(400).json({ error: 'Invalid timezone' });
}
2024-09-13 08:11:29 +05:30
const [hours, minutes] = atTime.split(':').map(Number);
2024-09-11 23:33:04 +05:30
if (isNaN(hours) || isNaN(minutes) || hours < 0 || hours > 23 || minutes < 0 || minutes > 59) {
return res.status(400).json({ error: 'Invalid time format' });
}
const days = ['SUNDAY', 'MONDAY', 'TUESDAY', 'WEDNESDAY', 'THURSDAY', 'FRIDAY', 'SATURDAY'];
2024-09-13 08:11:29 +05:30
if (!days.includes(startFrom)) {
2024-09-11 23:33:04 +05:30
return res.status(400).json({ error: 'Invalid start day' });
}
let cronExpression;
2024-09-13 08:11:29 +05:30
switch (runEveryUnit) {
2024-09-11 23:33:04 +05:30
case 'HOURS':
2024-09-13 08:11:29 +05:30
cronExpression = `${minutes} */${runEvery} * * *`;
2024-09-11 23:33:04 +05:30
break;
case 'DAYS':
2024-09-13 08:11:29 +05:30
cronExpression = `${minutes} ${hours} */${runEvery} * *`;
2024-09-11 23:33:04 +05:30
break;
case 'WEEKS':
2024-09-13 08:11:29 +05:30
const dayIndex = days.indexOf(startFrom);
cronExpression = `${minutes} ${hours} * * ${dayIndex}/${7 * runEvery}`;
2024-09-11 23:33:04 +05:30
break;
case 'MONTHS':
2024-09-13 08:11:29 +05:30
cronExpression = `${minutes} ${hours} 1-7 */${runEvery} *`;
if (startFrom !== 'SUNDAY') {
const dayIndex = days.indexOf(startFrom);
2024-09-11 23:33:04 +05:30
cronExpression += ` ${dayIndex}`;
}
break;
}
if (!cronExpression || !cron.validate(cronExpression)) {
return res.status(400).json({ error: 'Invalid cron expression generated' });
}
const runId = uuid();
2024-10-10 03:23:29 +05:30
const userId = req.user.id;
2024-10-22 16:59:18 +05:30
await workflowQueue.add(
'run workflow',
{ id, runId, userId },
{
repeat: {
pattern: cronExpression,
tz: timezone
2024-10-22 16:57:19 +05:30
}
2024-10-22 16:59:18 +05:30
}
);
2024-09-11 23:33:04 +05:30
2024-09-19 17:39:33 +05:30
res.status(200).json({
message: 'success',
2024-09-11 23:33:04 +05:30
runId,
2024-09-13 07:56:11 +05:30
// cronExpression,
2024-09-11 23:33:04 +05:30
// nextRunTime: getNextRunTime(cronExpression, timezone)
});
2024-09-13 08:11:29 +05:30
} catch (error) {
console.error('Error scheduling workflow:', error);
res.status(500).json({ error: 'Failed to schedule workflow' });
}
});
2024-09-11 23:33:04 +05:30
// function getNextRunTime(cronExpression, timezone) {
// const schedule = cron.schedule(cronExpression, () => {}, { timezone });
// const nextDate = schedule.nextDate();
// schedule.stop();
// return nextDate.toDate();
// }
2024-07-31 21:10:25 +05:30
/**
* POST endpoint for aborting a current interpretation of the run.
*/
router.post('/runs/abort/:id', requireSignIn, async (req, res) => {
2024-07-31 21:10:25 +05:30
try {
2024-10-10 02:32:46 +05:30
const run = await Run.findOne({ where: { runId: req.params.id } });
if (!run) {
return res.status(404).send(false);
}
2024-10-10 02:40:52 +05:30
const plainRun = run.toJSON();
2024-07-31 21:10:25 +05:30
2024-10-10 02:40:52 +05:30
const browser = browserPool.getRemoteBrowser(plainRun.browserId);
2024-07-31 21:10:25 +05:30
const currentLog = browser?.interpreter.debugMessages.join('/n');
const serializableOutput = browser?.interpreter.serializableData.reduce((reducedObject, item, index) => {
return {
[`item-${index}`]: item,
...reducedObject,
}
}, {});
const binaryOutput = browser?.interpreter.binaryData.reduce((reducedObject, item, index) => {
return {
[`item-${index}`]: item,
...reducedObject,
}
}, {});
await run.update({
...run,
2024-10-08 21:18:16 +05:30
status: 'aborted',
finishedAt: new Date().toLocaleString(),
2024-10-10 02:40:52 +05:30
browserId: plainRun.browserId,
2024-07-31 21:10:25 +05:30
log: currentLog,
2024-10-10 01:19:03 +05:30
serializableOutput,
binaryOutput,
});
2024-07-31 21:10:25 +05:30
return res.send(true);
} catch (e) {
2024-09-19 17:39:33 +05:30
const { message } = e as Error;
2024-07-31 21:10:25 +05:30
logger.log('info', `Error while running a recording with name: ${req.params.fileName}_${req.params.runId}.json`);
return res.send(false);
}
2024-09-19 17:58:38 +05:30
});