Files
parcer/server/src/workflow-management/scheduler/index.ts

249 lines
7.1 KiB
TypeScript
Raw Normal View History

2024-09-13 14:28:52 +05:30
import { uuid } from "uuidv4";
import { chromium } from "playwright";
import { io, Socket } from "socket.io-client";
2024-09-13 14:27:39 +05:30
import { createRemoteBrowserForRun, destroyRemoteBrowser } from '../../browser-management/controller';
2024-09-11 11:53:12 +05:30
import logger from '../../logger';
2024-09-12 21:01:46 +05:30
import { browserPool } from "../../server";
2024-09-19 19:41:19 +05:30
import { googleSheetUpdateTasks, processGoogleSheetUpdates } from "../integrations/gsheet";
import Robot from "../../models/Robot";
import Run from "../../models/Run";
import { getDecryptedProxyConfig } from "../../routes/proxy";
2024-10-15 22:22:03 +05:30
import { BinaryOutputService } from "../../storage/mino";
2024-10-29 03:46:13 +05:30
import { capture } from "../../utils/analytics";
2024-09-12 21:01:46 +05:30
async function createWorkflowAndStoreMetadata(id: string, userId: string) {
2024-10-13 04:55:51 +05:30
try {
2024-10-13 04:56:41 +05:30
const recording = await Robot.findOne({
where: {
'recording_meta.id': id
},
raw: true
});
2024-10-08 23:34:53 +05:30
2024-10-13 04:56:41 +05:30
if (!recording || !recording.recording_meta || !recording.recording_meta.id) {
return {
success: false,
error: 'Recording not found'
};
}
2024-10-13 04:56:41 +05:30
const proxyConfig = await getDecryptedProxyConfig(userId);
let proxyOptions: any = {};
2024-10-13 04:56:41 +05:30
if (proxyConfig.proxy_url) {
proxyOptions = {
server: proxyConfig.proxy_url,
...(proxyConfig.proxy_username && proxyConfig.proxy_password && {
username: proxyConfig.proxy_username,
password: proxyConfig.proxy_password,
}),
};
}
2024-10-08 23:34:53 +05:30
2024-11-03 02:59:30 +05:30
const browserId = createRemoteBrowserForRun( userId);
2024-10-13 04:56:41 +05:30
const runId = uuid();
const run = await Run.create({
2024-10-29 03:46:13 +05:30
status: 'scheduled',
name: recording.recording_meta.name,
robotId: recording.id,
robotMetaId: recording.recording_meta.id,
startedAt: new Date().toLocaleString(),
finishedAt: '',
browserId,
2024-09-11 23:35:59 +05:30
interpreterSettings: { maxConcurrency: 1, maxRepeats: 1, debug: true },
log: '',
runId,
2024-10-21 19:07:47 +05:30
runByScheduleId: uuid(),
serializableOutput: {},
binaryOutput: {},
});
const plainRun = run.toJSON();
2024-09-12 00:57:01 +05:30
2024-09-12 21:01:46 +05:30
return {
browserId,
runId: plainRun.runId,
2024-09-12 21:01:46 +05:30
}
2024-09-11 11:53:12 +05:30
} catch (e) {
const { message } = e as Error;
logger.log('info', `Error while scheduling a run with id: ${id}`);
2024-09-12 00:57:01 +05:30
console.log(message);
return {
success: false,
error: message,
};
2024-09-11 11:53:12 +05:30
}
}
async function executeRun(id: string) {
2024-09-12 00:57:01 +05:30
try {
const run = await Run.findOne({ where: { runId: id } });
if (!run) {
return {
success: false,
error: 'Run not found'
}
}
2024-09-12 00:57:01 +05:30
const plainRun = run.toJSON();
2024-09-12 00:57:01 +05:30
const recording = await Robot.findOne({ where: { 'recording_meta.id': plainRun.robotMetaId }, raw: true });
if (!recording) {
return {
success: false,
error: 'Recording not found'
}
}
2024-09-12 00:57:01 +05:30
plainRun.status = 'running';
const browser = browserPool.getRemoteBrowser(plainRun.browserId);
2024-09-12 00:57:01 +05:30
if (!browser) {
throw new Error('Could not access browser');
}
const currentPage = await browser.getCurrentPage();
if (!currentPage) {
throw new Error('Could not create a new page');
}
const interpretationInfo = await browser.interpreter.InterpretRecording(
recording.recording, currentPage, plainRun.interpreterSettings);
2024-10-15 22:22:14 +05:30
const binaryOutputService = new BinaryOutputService('maxun-run-screenshots');
const uploadedBinaryOutput = await binaryOutputService.uploadAndStoreBinaryOutput(run, interpretationInfo.binaryOutput);
2024-09-12 00:57:19 +05:30
await destroyRemoteBrowser(plainRun.browserId);
2024-09-12 00:57:01 +05:30
await run.update({
...run,
2024-09-19 19:38:31 +05:30
status: 'success',
2024-09-12 00:57:01 +05:30
finishedAt: new Date().toLocaleString(),
browserId: plainRun.browserId,
2024-09-12 00:57:01 +05:30
log: interpretationInfo.log.join('\n'),
serializableOutput: interpretationInfo.serializableOutput,
2024-10-15 22:22:03 +05:30
binaryOutput: uploadedBinaryOutput,
});
2024-09-12 00:57:01 +05:30
2024-10-28 04:49:54 +05:30
let totalRowsExtracted = 0;
2024-10-29 04:26:31 +05:30
let extractedScreenshotsCount = 0;
let extractedItemsCount = 0;
2024-10-29 04:26:19 +05:30
2024-10-29 04:26:31 +05:30
if (run.dataValues.binaryOutput && run.dataValues.binaryOutput["item-0"]) {
extractedScreenshotsCount = 1;
}
2024-10-29 04:26:19 +05:30
2024-10-29 04:26:31 +05:30
if (run.dataValues.serializableOutput && run.dataValues.serializableOutput["item-0"]) {
const itemsArray = run.dataValues.serializableOutput["item-0"];
extractedItemsCount = itemsArray.length;
2024-10-29 04:26:19 +05:30
2024-10-29 04:26:31 +05:30
totalRowsExtracted = itemsArray.reduce((total, item) => {
return total + Object.keys(item).length;
}, 0);
}
2024-10-29 04:26:19 +05:30
2024-10-29 04:26:31 +05:30
console.log(`Extracted Items Count: ${extractedItemsCount}`);
console.log(`Extracted Screenshots Count: ${extractedScreenshotsCount}`);
console.log(`Total Rows Extracted: ${totalRowsExtracted}`);
2024-10-28 04:49:54 +05:30
2024-10-29 03:46:13 +05:30
capture(
'maxun-oss-run-created-scheduled',
{
2024-10-28 04:17:17 +05:30
runId: id,
created_at: new Date().toISOString(),
status: 'success',
2024-10-29 04:26:19 +05:30
totalRowsExtracted,
2024-10-29 04:26:31 +05:30
extractedItemsCount,
extractedScreenshotsCount,
2024-10-28 04:17:17 +05:30
}
2024-10-29 03:46:13 +05:30
);
2024-10-28 04:17:17 +05:30
googleSheetUpdateTasks[id] = {
2024-10-18 00:06:23 +05:30
robotId: plainRun.robotMetaId,
runId: id,
2024-09-19 19:41:19 +05:30
status: 'pending',
retries: 5,
};
processGoogleSheetUpdates();
2024-09-12 19:35:27 +05:30
return true;
2024-09-12 00:57:01 +05:30
} catch (error: any) {
logger.log('info', `Error while running a recording with id: ${id} - ${error.message}`);
2024-09-12 00:57:01 +05:30
console.log(error.message);
2024-10-28 04:17:17 +05:30
const run = await Run.findOne({ where: { runId: id } });
if (run) {
await run.update({
status: 'failed',
finishedAt: new Date().toLocaleString(),
});
}
2024-10-29 03:46:13 +05:30
capture(
'maxun-oss-run-created-scheduled',
{
2024-10-28 04:17:17 +05:30
runId: id,
created_at: new Date().toISOString(),
status: 'failed',
}
2024-10-29 03:46:13 +05:30
);
2024-09-12 19:35:27 +05:30
return false;
2024-09-12 00:57:01 +05:30
}
}
2024-09-12 21:01:46 +05:30
2024-10-10 03:03:14 +05:30
async function readyForRunHandler(browserId: string, id: string) {
2024-09-12 21:02:07 +05:30
try {
2024-10-10 03:03:14 +05:30
const interpretation = await executeRun(id);
2024-09-12 21:02:07 +05:30
if (interpretation) {
2024-10-10 03:03:14 +05:30
logger.log('info', `Interpretation of ${id} succeeded`);
2024-09-12 21:02:07 +05:30
} else {
2024-10-10 03:03:14 +05:30
logger.log('error', `Interpretation of ${id} failed`);
2024-09-12 21:05:49 +05:30
await destroyRemoteBrowser(browserId);
2024-09-12 21:02:07 +05:30
}
2024-10-10 03:03:14 +05:30
resetRecordingState(browserId, id);
2024-09-12 21:02:07 +05:30
} catch (error: any) {
2024-09-12 21:08:36 +05:30
logger.error(`Error during readyForRunHandler: ${error.message}`);
2024-09-12 21:02:07 +05:30
await destroyRemoteBrowser(browserId);
}
}
2024-10-10 03:03:14 +05:30
function resetRecordingState(browserId: string, id: string) {
2024-09-12 21:02:27 +05:30
browserId = '';
2024-10-10 03:03:14 +05:30
id = '';
2024-09-12 21:02:27 +05:30
}
2024-09-12 21:01:46 +05:30
2024-10-10 03:22:04 +05:30
export async function handleRunRecording(id: string, userId: string) {
2024-09-12 21:01:46 +05:30
try {
const result = await createWorkflowAndStoreMetadata(id, userId);
2024-09-12 21:01:46 +05:30
const { browserId, runId: newRunId } = result;
if (!browserId || !newRunId || !userId) {
throw new Error('browserId or runId or userId is undefined');
2024-09-12 21:01:46 +05:30
}
2024-11-03 01:12:32 +05:30
const socket = io(`${process.env.BACKEND_URL}/${browserId}`, {
2024-09-12 21:01:46 +05:30
transports: ['websocket'],
rejectUnauthorized: false
});
2024-10-10 03:05:39 +05:30
socket.on('ready-for-run', () => readyForRunHandler(browserId, newRunId));
2024-09-12 21:01:46 +05:30
2024-10-10 03:05:39 +05:30
logger.log('info', `Running recording: ${id}`);
2024-09-12 21:01:46 +05:30
socket.on('disconnect', () => {
cleanupSocketListeners(socket, browserId, newRunId);
});
} catch (error: any) {
2024-09-12 21:08:36 +05:30
logger.error('Error running recording:', error);
2024-09-12 21:01:46 +05:30
}
}
2024-10-10 03:05:39 +05:30
function cleanupSocketListeners(socket: Socket, browserId: string, id: string) {
socket.off('ready-for-run', () => readyForRunHandler(browserId, id));
logger.log('info', `Cleaned up listeners for browserId: ${browserId}, runId: ${id}`);
2024-09-12 21:01:46 +05:30
}
export { createWorkflowAndStoreMetadata };