Files
parcer/server/src/workflow-management/scheduler/index.ts

177 lines
5.2 KiB
TypeScript
Raw Normal View History

2024-09-11 11:30:39 +05:30
import { Queue, Worker } from 'bullmq';
import IORedis from 'ioredis';
2024-09-11 11:53:12 +05:30
import { deleteFile, readFile, readFiles, saveFile } from "../storage";
2024-09-12 00:57:01 +05:30
import { createRemoteBrowserForRun, destroyRemoteBrowser, getActiveBrowserId } from '../../browser-management/controller';
2024-09-11 11:53:12 +05:30
import logger from '../../logger';
import { browserPool } from "../../server";
import fs from "fs";
import { uuid } from "uuidv4";
import { chromium } from "playwright";
2024-09-11 11:30:39 +05:30
2024-09-11 13:46:48 +05:30
const connection = new IORedis({
2024-09-11 13:47:03 +05:30
host: 'localhost',
port: 6379,
2024-09-11 13:46:48 +05:30
maxRetriesPerRequest: null,
});
connection.on('connect', () => {
console.log('Connected to Redis!');
});
connection.on('error', (err) => {
console.error('Redis connection error:', err);
});
2024-09-11 11:30:39 +05:30
const workflowQueue = new Queue('workflow', { connection });
2024-09-11 13:16:45 +05:30
export const worker = new Worker('workflow', async job => {
2024-09-11 11:30:39 +05:30
const { fileName, runId } = job.data;
try {
const result = await runWorkflow(fileName, runId);
return result;
} catch (error) {
console.error('Error running workflow:', error);
throw error;
}
}, { connection });
worker.on('completed', (job: any) => {
console.log(`Job ${job.id} completed for ${job.data.fileName}_${job.data.runId}`);
});
worker.on('failed', (job: any, err) => {
console.error(`Job ${job.id} failed for ${job.data.fileName}_${job.data.runId}:`, err);
});
async function runWorkflow(fileName: string, runId: string) {
2024-09-12 00:57:01 +05:30
if (!runId) {
runId = uuid();
}
// Phase 1: Scheduling
2024-09-11 11:53:12 +05:30
try {
const browserId = createRemoteBrowserForRun({
browser: chromium,
launchOptions: { headless: true }
});
2024-09-12 00:57:19 +05:30
logger.log(`debug`, `Created browser with ID: ${browserId}`);
2024-09-11 11:53:12 +05:30
const run_meta = {
2024-09-12 00:57:01 +05:30
status: 'SCHEDULED',
name: fileName,
startedAt: new Date().toLocaleString(),
finishedAt: '',
duration: '',
task: '', // Optionally set based on workflow
browserId: browserId,
2024-09-11 23:35:59 +05:30
interpreterSettings: { maxConcurrency: 1, maxRepeats: 1, debug: true },
log: '',
runId: runId,
};
fs.mkdirSync('../storage/runs', { recursive: true });
await saveFile(
`../storage/runs/${fileName}_${runId}.json`,
JSON.stringify(run_meta, null, 2)
);
2024-09-12 00:57:01 +05:30
logger.log('debug', `Scheduled run with name: ${fileName}_${runId}.json`);
logger.log('debug', `Active in run : ${getActiveBrowserId()}`);
// Phase 2: Running
return await executeRun(fileName, runId);
2024-09-11 11:53:12 +05:30
} catch (e) {
const { message } = e as Error;
2024-09-12 00:57:01 +05:30
logger.log('info', `Error while scheduling a run with name: ${fileName}_${runId}.json`);
console.log(message);
return {
success: false,
error: message,
};
2024-09-11 11:53:12 +05:30
}
}
2024-09-12 00:57:01 +05:30
async function executeRun(fileName: string, runId: string) {
try {
// Read the recording from storage
const recording = await readFile(`./../storage/recordings/${fileName}.waw.json`);
const parsedRecording = JSON.parse(recording);
// Read the run from storage
const run = await readFile(`./../storage/runs/${fileName}_${runId}.json`);
const parsedRun = JSON.parse(run);
// Update status to RUNNING
parsedRun.status = 'RUNNING';
await saveFile(
`../storage/runs/${fileName}_${runId}.json`,
JSON.stringify(parsedRun, null, 2)
);
// Interpret the run in active browser
2024-09-12 00:57:19 +05:30
2024-09-12 00:57:01 +05:30
logger.log('debug', `Active in exec : ${getActiveBrowserId()}`);
const browser = browserPool.getRemoteBrowser(parsedRun.browserId);
if (!browser) {
throw new Error('Could not access browser');
}
const currentPage = await browser.getCurrentPage();
if (!currentPage) {
throw new Error('Could not create a new page');
}
const interpretationInfo = await browser.interpreter.InterpretRecording(
parsedRecording.recording, currentPage, parsedRun.interpreterSettings);
2024-09-12 00:57:19 +05:30
2024-09-12 00:57:01 +05:30
const duration = Math.round((new Date().getTime() - new Date(parsedRun.startedAt).getTime()) / 1000);
const durString = duration < 60 ? `${duration} s` : `${Math.floor(duration / 60)} m ${duration % 60} s`;
await destroyRemoteBrowser(parsedRun.browserId);
const updated_run_meta = {
...parsedRun,
status: interpretationInfo.result,
finishedAt: new Date().toLocaleString(),
duration: durString,
browserId: null,
log: interpretationInfo.log.join('\n'),
serializableOutput: interpretationInfo.serializableOutput,
binaryOutput: interpretationInfo.binaryOutput,
};
await saveFile(
`../storage/runs/${fileName}_${runId}.json`,
JSON.stringify(updated_run_meta, null, 2)
);
return {
browserId: parsedRun.browserId,
runId: runId,
success: true,
};
} catch (error: any) {
logger.log('info', `Error while running a recording with name: ${fileName}_${runId}.json`);
console.log(error.message);
2024-09-12 00:57:19 +05:30
2024-09-12 00:57:01 +05:30
// Update run status to ERROR
const errorRun = await readFile(`./../storage/runs/${fileName}_${runId}.json`);
const parsedErrorRun = JSON.parse(errorRun);
parsedErrorRun.status = 'ERROR';
parsedErrorRun.log += `\nError: ${error.message}`;
await saveFile(
`../storage/runs/${fileName}_${runId}.json`,
JSON.stringify(parsedErrorRun, null, 2)
);
return {
runId: runId,
success: false,
error: error.message,
};
}
}
2024-09-11 11:53:41 +05:30
export { workflowQueue, runWorkflow };