Files
parcer/server/src/workflow-management/scheduler/index.ts

230 lines
6.9 KiB
TypeScript
Raw Normal View History

2024-09-11 11:30:39 +05:30
import { Queue, Worker } from 'bullmq';
import IORedis from 'ioredis';
2024-09-11 11:53:12 +05:30
import { deleteFile, readFile, readFiles, saveFile } from "../storage";
2024-09-12 00:57:01 +05:30
import { createRemoteBrowserForRun, destroyRemoteBrowser, getActiveBrowserId } from '../../browser-management/controller';
2024-09-12 17:41:49 +05:30
import { RemoteBrowser } from '../../browser-management/classes/RemoteBrowser';
2024-09-11 11:53:12 +05:30
import logger from '../../logger';
2024-09-12 21:01:46 +05:30
import { browserPool } from "../../server";
2024-09-11 11:53:12 +05:30
import fs from "fs";
import { uuid } from "uuidv4";
import { chromium } from "playwright";
2024-09-12 21:01:46 +05:30
import { io, Socket } from "socket.io-client";
2024-09-11 13:46:48 +05:30
const connection = new IORedis({
2024-09-11 13:47:03 +05:30
host: 'localhost',
port: 6379,
2024-09-11 13:46:48 +05:30
maxRetriesPerRequest: null,
});
connection.on('connect', () => {
console.log('Connected to Redis!');
});
connection.on('error', (err) => {
console.error('Redis connection error:', err);
});
2024-09-11 11:30:39 +05:30
const workflowQueue = new Queue('workflow', { connection });
2024-09-11 13:16:45 +05:30
export const worker = new Worker('workflow', async job => {
2024-09-11 11:30:39 +05:30
const { fileName, runId } = job.data;
try {
2024-09-12 21:01:46 +05:30
const result = await handleRunRecording(fileName, runId);
2024-09-11 11:30:39 +05:30
return result;
} catch (error) {
2024-09-12 21:08:36 +05:30
logger.error('Error running workflow:', error);
2024-09-11 11:30:39 +05:30
throw error;
}
}, { connection });
2024-09-12 17:41:49 +05:30
worker.on('completed', async (job: any) => {
2024-09-12 21:09:49 +05:30
logger.log(`info`, `Job ${job.id} completed for ${job.data.fileName}_${job.data.runId}`);
2024-09-12 17:41:49 +05:30
await worker.close();
await workflowQueue.close();
2024-09-12 21:09:49 +05:30
logger.log(`info`, `Worker and queue have been closed.`);
2024-09-11 11:30:39 +05:30
});
2024-09-12 17:41:49 +05:30
worker.on('failed', async (job: any, err) => {
2024-09-12 21:08:36 +05:30
logger.log(`error`, `Job ${job.id} failed for ${job.data.fileName}_${job.data.runId}:`, err);
2024-09-12 17:41:49 +05:30
await worker.close();
await workflowQueue.close();
2024-09-12 21:08:36 +05:30
logger.log(`info`, `Worker and queue have been closed after failure.`);
2024-09-11 11:30:39 +05:30
});
2024-09-12 21:01:46 +05:30
const existingJobs = workflowQueue.getRepeatableJobs();
logger.log(`info`, `jobs ${existingJobs}`)
2024-09-12 19:42:02 +05:30
async function runWorkflow(fileName: string, runId: string) {
2024-09-12 00:57:01 +05:30
if (!runId) {
runId = uuid();
}
2024-09-11 11:53:12 +05:30
try {
const browserId = createRemoteBrowserForRun({
browser: chromium,
launchOptions: { headless: true }
});
const run_meta = {
2024-09-12 00:57:01 +05:30
status: 'SCHEDULED',
name: fileName,
startedAt: new Date().toLocaleString(),
finishedAt: '',
duration: '',
task: '', // Optionally set based on workflow
browserId: browserId,
2024-09-11 23:35:59 +05:30
interpreterSettings: { maxConcurrency: 1, maxRepeats: 1, debug: true },
log: '',
runId: runId,
};
fs.mkdirSync('../storage/runs', { recursive: true });
await saveFile(
`../storage/runs/${fileName}_${runId}.json`,
JSON.stringify(run_meta, null, 2)
);
2024-09-12 00:57:01 +05:30
logger.log('debug', `Scheduled run with name: ${fileName}_${runId}.json`);
2024-09-12 21:01:46 +05:30
return {
browserId,
runId
}
2024-09-11 11:53:12 +05:30
} catch (e) {
const { message } = e as Error;
2024-09-12 00:57:01 +05:30
logger.log('info', `Error while scheduling a run with name: ${fileName}_${runId}.json`);
console.log(message);
return {
success: false,
error: message,
};
2024-09-11 11:53:12 +05:30
}
}
2024-09-12 00:57:01 +05:30
async function executeRun(fileName: string, runId: string) {
try {
const recording = await readFile(`./../storage/recordings/${fileName}.waw.json`);
const parsedRecording = JSON.parse(recording);
const run = await readFile(`./../storage/runs/${fileName}_${runId}.json`);
const parsedRun = JSON.parse(run);
parsedRun.status = 'RUNNING';
await saveFile(
`../storage/runs/${fileName}_${runId}.json`,
JSON.stringify(parsedRun, null, 2)
);
const browser = browserPool.getRemoteBrowser(parsedRun.browserId);
if (!browser) {
throw new Error('Could not access browser');
}
const currentPage = await browser.getCurrentPage();
if (!currentPage) {
throw new Error('Could not create a new page');
}
const interpretationInfo = await browser.interpreter.InterpretRecording(
parsedRecording.recording, currentPage, parsedRun.interpreterSettings);
2024-09-12 00:57:19 +05:30
2024-09-12 00:57:01 +05:30
const duration = Math.round((new Date().getTime() - new Date(parsedRun.startedAt).getTime()) / 1000);
const durString = duration < 60 ? `${duration} s` : `${Math.floor(duration / 60)} m ${duration % 60} s`;
await destroyRemoteBrowser(parsedRun.browserId);
const updated_run_meta = {
...parsedRun,
status: interpretationInfo.result,
finishedAt: new Date().toLocaleString(),
duration: durString,
browserId: null,
log: interpretationInfo.log.join('\n'),
serializableOutput: interpretationInfo.serializableOutput,
binaryOutput: interpretationInfo.binaryOutput,
};
await saveFile(
`../storage/runs/${fileName}_${runId}.json`,
JSON.stringify(updated_run_meta, null, 2)
);
2024-09-12 19:35:27 +05:30
return true;
2024-09-12 00:57:01 +05:30
} catch (error: any) {
logger.log('info', `Error while running a recording with name: ${fileName}_${runId}.json`);
console.log(error.message);
2024-09-12 00:57:19 +05:30
2024-09-12 00:57:01 +05:30
const errorRun = await readFile(`./../storage/runs/${fileName}_${runId}.json`);
const parsedErrorRun = JSON.parse(errorRun);
parsedErrorRun.status = 'ERROR';
parsedErrorRun.log += `\nError: ${error.message}`;
await saveFile(
`../storage/runs/${fileName}_${runId}.json`,
JSON.stringify(parsedErrorRun, null, 2)
);
2024-09-12 19:35:27 +05:30
return false;
2024-09-12 00:57:01 +05:30
}
}
2024-09-12 21:01:46 +05:30
2024-09-12 21:02:07 +05:30
async function readyForRunHandler(browserId: string, fileName: string, runId: string) {
try {
const interpretation = await executeRun(fileName, runId);
if (interpretation) {
logger.log('info', `Interpretation of ${fileName} succeeded`);
} else {
logger.log('error', `Interpretation of ${fileName} failed`);
2024-09-12 21:05:49 +05:30
await destroyRemoteBrowser(browserId);
2024-09-12 21:02:07 +05:30
}
resetRecordingState(browserId, fileName, runId);
} catch (error: any) {
2024-09-12 21:08:36 +05:30
logger.error(`Error during readyForRunHandler: ${error.message}`);
2024-09-12 21:02:07 +05:30
await destroyRemoteBrowser(browserId);
}
}
2024-09-12 21:02:27 +05:30
function resetRecordingState(browserId: string, fileName: string, runId: string) {
browserId = '';
fileName = '';
runId = '';
logger.log(`info`, `reset values for ${browserId}, ${fileName}, and ${runId}`);
}
2024-09-12 21:01:46 +05:30
async function handleRunRecording(fileName: string, runId: string) {
try {
const result = await runWorkflow(fileName, runId);
const { browserId, runId: newRunId } = result;
if (!browserId || !newRunId) {
throw new Error('browserId or runId is undefined');
}
const socket = io(`http://localhost:8080/${browserId}`, {
transports: ['websocket'],
rejectUnauthorized: false
});
socket.on('ready-for-run', () => readyForRunHandler(browserId, fileName, newRunId));
logger.log('info', `Running recording: ${fileName}`);
socket.on('disconnect', () => {
cleanupSocketListeners(socket, browserId, newRunId);
});
} catch (error: any) {
2024-09-12 21:08:36 +05:30
logger.error('Error running recording:', error);
2024-09-12 21:01:46 +05:30
}
}
function cleanupSocketListeners(socket: Socket, browserId: string, runId: string) {
socket.off('ready-for-run', () => readyForRunHandler(browserId, '', runId));
logger.log('info', `Cleaned up listeners for browserId: ${browserId}, runId: ${runId}`);
}
2024-09-12 19:42:02 +05:30
export { workflowQueue, runWorkflow };