2024-07-31 22:46:19 +05:30
|
|
|
import Interpreter, { WorkflowFile } from "maxun-core";
|
2024-06-05 04:24:49 +05:30
|
|
|
import logger from "../../logger";
|
|
|
|
|
import { Socket } from "socket.io";
|
2025-11-30 17:41:44 +05:30
|
|
|
import { Page } from "playwright-core";
|
2024-06-05 04:24:49 +05:30
|
|
|
import { InterpreterSettings } from "../../types";
|
2024-11-25 20:57:38 +05:30
|
|
|
import { decrypt } from "../../utils/auth";
|
2025-09-10 00:21:43 +05:30
|
|
|
import Run from "../../models/Run";
|
2024-11-25 20:57:38 +05:30
|
|
|
|
|
|
|
|
/**
|
2025-01-09 19:08:29 +05:30
|
|
|
* Decrypts any encrypted inputs in the workflow. If checkLimit is true, it will also handle the limit validation for scrapeList action.
|
2024-11-25 20:57:38 +05:30
|
|
|
* @param workflow The workflow to decrypt.
|
2025-01-09 19:08:29 +05:30
|
|
|
* @param checkLimit If true, it will handle the limit validation for scrapeList action.
|
2024-11-25 20:57:38 +05:30
|
|
|
*/
|
2025-01-09 19:08:29 +05:30
|
|
|
function processWorkflow(workflow: WorkflowFile, checkLimit: boolean = false): WorkflowFile {
|
|
|
|
|
const processedWorkflow = JSON.parse(JSON.stringify(workflow)) as WorkflowFile;
|
2024-11-25 20:57:38 +05:30
|
|
|
|
2025-01-09 19:08:29 +05:30
|
|
|
processedWorkflow.workflow.forEach((pair) => {
|
2024-11-25 20:57:38 +05:30
|
|
|
pair.what.forEach((action) => {
|
2025-01-09 19:08:29 +05:30
|
|
|
// Handle limit validation for scrapeList action
|
|
|
|
|
if (action.action === 'scrapeList' && checkLimit && Array.isArray(action.args) && action.args.length > 0) {
|
|
|
|
|
const scrapeConfig = action.args[0];
|
|
|
|
|
if (scrapeConfig && typeof scrapeConfig === 'object' && 'limit' in scrapeConfig) {
|
|
|
|
|
if (typeof scrapeConfig.limit === 'number' && scrapeConfig.limit > 5) {
|
|
|
|
|
scrapeConfig.limit = 5;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Handle decryption for type and press actions
|
2024-12-04 16:55:42 +05:30
|
|
|
if ((action.action === 'type' || action.action === 'press') && Array.isArray(action.args) && action.args.length > 1) {
|
2024-11-25 20:57:38 +05:30
|
|
|
try {
|
|
|
|
|
const encryptedValue = action.args[1];
|
|
|
|
|
if (typeof encryptedValue === 'string') {
|
|
|
|
|
const decryptedValue = decrypt(encryptedValue);
|
|
|
|
|
action.args[1] = decryptedValue;
|
|
|
|
|
} else {
|
|
|
|
|
logger.log('error', 'Encrypted value is not a string');
|
|
|
|
|
action.args[1] = '';
|
|
|
|
|
}
|
|
|
|
|
} catch (error: unknown) {
|
|
|
|
|
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
|
|
|
logger.log('error', `Failed to decrypt input value: ${errorMessage}`);
|
|
|
|
|
action.args[1] = '';
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
});
|
|
|
|
|
|
2025-01-09 19:08:29 +05:30
|
|
|
return processedWorkflow;
|
2024-11-25 20:57:38 +05:30
|
|
|
}
|
2024-06-05 04:24:49 +05:30
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* This class implements the main interpretation functions.
|
|
|
|
|
* It holds some information about the current interpretation process and
|
|
|
|
|
* registers to some events to allow the client (frontend) to interact with the interpreter.
|
2024-07-31 22:46:19 +05:30
|
|
|
* It uses the [maxun-core](https://www.npmjs.com/package/maxun-core)
|
2024-06-05 04:24:49 +05:30
|
|
|
* library to interpret the workflow.
|
|
|
|
|
* @category WorkflowManagement
|
|
|
|
|
*/
|
|
|
|
|
export class WorkflowInterpreter {
|
|
|
|
|
/**
|
|
|
|
|
* Socket.io socket instance enabling communication with the client (frontend) side.
|
|
|
|
|
* @private
|
|
|
|
|
*/
|
2024-06-05 04:36:22 +05:30
|
|
|
private socket: Socket;
|
2024-06-05 04:24:49 +05:30
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* True if the interpretation is paused.
|
|
|
|
|
*/
|
|
|
|
|
public interpretationIsPaused: boolean = false;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* The instance of the {@link Interpreter} class used to interpret the workflow.
|
2024-07-31 22:46:19 +05:30
|
|
|
* From maxun-core.
|
2024-06-05 04:24:49 +05:30
|
|
|
* @private
|
|
|
|
|
*/
|
|
|
|
|
private interpreter: Interpreter | null = null;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* An id of the currently interpreted pair in the workflow.
|
|
|
|
|
* @private
|
|
|
|
|
*/
|
|
|
|
|
private activeId: number | null = null;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* An array of debug messages emitted by the {@link Interpreter}.
|
|
|
|
|
*/
|
|
|
|
|
public debugMessages: string[] = [];
|
|
|
|
|
|
|
|
|
|
/**
|
2025-04-27 15:23:05 +05:30
|
|
|
* Storage for different types of serializable data
|
2024-06-05 04:24:49 +05:30
|
|
|
*/
|
2025-04-27 15:23:05 +05:30
|
|
|
public serializableDataByType: {
|
2025-10-21 00:43:08 +05:30
|
|
|
scrapeSchema: Record<string, any>;
|
|
|
|
|
scrapeList: Record<string, any>;
|
|
|
|
|
[key: string]: any;
|
2025-04-27 15:23:05 +05:30
|
|
|
} = {
|
2025-10-21 00:43:08 +05:30
|
|
|
scrapeSchema: {},
|
|
|
|
|
scrapeList: {},
|
2025-04-27 15:23:05 +05:30
|
|
|
};
|
|
|
|
|
|
2025-10-21 00:43:08 +05:30
|
|
|
private currentActionName: string | null = null;
|
|
|
|
|
|
2025-04-27 15:23:05 +05:30
|
|
|
/**
|
|
|
|
|
* Track the current action type being processed
|
|
|
|
|
*/
|
|
|
|
|
private currentActionType: string | null = null;
|
2024-06-05 04:24:49 +05:30
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* An array of all the binary data extracted from the run.
|
|
|
|
|
*/
|
2025-10-21 00:43:08 +05:30
|
|
|
public binaryData: { name: string; mimeType: string; data: string }[] = [];
|
2024-06-05 04:24:49 +05:30
|
|
|
|
2025-06-07 14:50:28 +05:30
|
|
|
/**
|
|
|
|
|
* Track current scrapeList index
|
|
|
|
|
*/
|
|
|
|
|
private currentScrapeListIndex: number = 0;
|
|
|
|
|
|
2025-11-07 13:56:42 +05:30
|
|
|
/**
|
|
|
|
|
* Track action counts to generate unique names
|
|
|
|
|
*/
|
|
|
|
|
private actionCounts: Record<string, number> = {};
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Track used action names to prevent duplicates
|
|
|
|
|
*/
|
|
|
|
|
private usedActionNames: Set<string> = new Set();
|
|
|
|
|
|
2025-09-10 00:21:43 +05:30
|
|
|
/**
|
|
|
|
|
* Current run ID for real-time persistence
|
|
|
|
|
*/
|
|
|
|
|
private currentRunId: string | null = null;
|
|
|
|
|
|
2025-09-28 22:53:52 +05:30
|
|
|
/**
|
|
|
|
|
* Batched persistence system for performance optimization
|
|
|
|
|
*/
|
|
|
|
|
private persistenceBuffer: Array<{
|
|
|
|
|
actionType: string;
|
|
|
|
|
data: any;
|
|
|
|
|
listIndex?: number;
|
|
|
|
|
timestamp: number;
|
|
|
|
|
creditValidated: boolean;
|
|
|
|
|
}> = [];
|
|
|
|
|
|
|
|
|
|
private persistenceTimer: NodeJS.Timeout | null = null;
|
2025-11-29 15:06:03 +05:30
|
|
|
private persistenceRetryTimer: NodeJS.Timeout | null = null;
|
2025-09-28 23:01:23 +05:30
|
|
|
private readonly BATCH_SIZE = 5;
|
|
|
|
|
private readonly BATCH_TIMEOUT = 3000;
|
2025-11-29 15:06:03 +05:30
|
|
|
private readonly MAX_PERSISTENCE_RETRIES = 3;
|
2025-09-28 22:53:52 +05:30
|
|
|
private persistenceInProgress = false;
|
2025-09-28 23:01:23 +05:30
|
|
|
private persistenceRetryCount = 0;
|
2025-09-28 22:53:52 +05:30
|
|
|
|
2024-06-05 04:24:49 +05:30
|
|
|
/**
|
|
|
|
|
* An array of id's of the pairs from the workflow that are about to be paused.
|
|
|
|
|
* As "breakpoints".
|
|
|
|
|
* @private
|
|
|
|
|
*/
|
|
|
|
|
private breakpoints: boolean[] = [];
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Callback to resume the interpretation after a pause.
|
|
|
|
|
* @private
|
|
|
|
|
*/
|
|
|
|
|
private interpretationResume: (() => void) | null = null;
|
|
|
|
|
|
2024-06-05 04:28:43 +05:30
|
|
|
/**
|
|
|
|
|
* A public constructor taking a socket instance for communication with the client.
|
|
|
|
|
* @param socket Socket.io socket instance enabling communication with the client (frontend) side.
|
2025-09-10 00:21:43 +05:30
|
|
|
* @param runId Optional run ID for real-time data persistence
|
2024-06-05 04:28:43 +05:30
|
|
|
* @constructor
|
|
|
|
|
*/
|
2025-09-10 00:21:43 +05:30
|
|
|
constructor(socket: Socket, runId?: string) {
|
2024-06-05 04:28:43 +05:30
|
|
|
this.socket = socket;
|
2025-09-10 00:21:43 +05:30
|
|
|
this.currentRunId = runId || null;
|
2024-06-05 04:28:43 +05:30
|
|
|
}
|
|
|
|
|
|
2025-11-29 15:06:03 +05:30
|
|
|
/**
|
|
|
|
|
* Removes pausing-related socket listeners to prevent memory leaks
|
|
|
|
|
* Must be called before re-registering listeners or during cleanup
|
|
|
|
|
* @private
|
|
|
|
|
*/
|
|
|
|
|
private removePausingListeners(): void {
|
|
|
|
|
try {
|
|
|
|
|
this.socket.removeAllListeners('pause');
|
|
|
|
|
this.socket.removeAllListeners('resume');
|
|
|
|
|
this.socket.removeAllListeners('step');
|
|
|
|
|
this.socket.removeAllListeners('breakpoints');
|
|
|
|
|
logger.log('debug', 'Removed pausing socket listeners');
|
|
|
|
|
} catch (error: any) {
|
|
|
|
|
logger.warn(`Error removing pausing listeners: ${error.message}`);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-06-05 04:29:55 +05:30
|
|
|
/**
|
|
|
|
|
* Subscribes to the events that are used to control the interpretation.
|
|
|
|
|
* The events are pause, resume, step and breakpoints.
|
|
|
|
|
* Step is used to interpret a single pair and pause on the other matched pair.
|
|
|
|
|
* @returns void
|
|
|
|
|
*/
|
|
|
|
|
public subscribeToPausing = () => {
|
2025-11-29 15:06:03 +05:30
|
|
|
this.removePausingListeners();
|
|
|
|
|
|
2024-06-05 04:29:55 +05:30
|
|
|
this.socket.on('pause', () => {
|
|
|
|
|
this.interpretationIsPaused = true;
|
|
|
|
|
});
|
2024-06-05 04:30:26 +05:30
|
|
|
this.socket.on('resume', () => {
|
|
|
|
|
this.interpretationIsPaused = false;
|
|
|
|
|
if (this.interpretationResume) {
|
|
|
|
|
this.interpretationResume();
|
|
|
|
|
this.socket.emit('log', '----- The interpretation has been resumed -----', false);
|
|
|
|
|
} else {
|
2024-06-05 04:36:22 +05:30
|
|
|
logger.log('debug', "Resume called but no resume function is set");
|
2024-06-05 04:30:26 +05:30
|
|
|
}
|
|
|
|
|
});
|
2024-06-05 04:30:44 +05:30
|
|
|
this.socket.on('step', () => {
|
|
|
|
|
if (this.interpretationResume) {
|
|
|
|
|
this.interpretationResume();
|
|
|
|
|
} else {
|
|
|
|
|
logger.log('debug', "Step called but no resume function is set");
|
|
|
|
|
}
|
|
|
|
|
});
|
2024-06-05 04:30:59 +05:30
|
|
|
this.socket.on('breakpoints', (data: boolean[]) => {
|
|
|
|
|
logger.log('debug', "Setting breakpoints: " + data);
|
|
|
|
|
this.breakpoints = data
|
|
|
|
|
});
|
2024-06-05 04:29:55 +05:30
|
|
|
}
|
2024-06-05 04:32:28 +05:30
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Sets up the instance of {@link Interpreter} and interprets
|
|
|
|
|
* the workflow inside the recording editor.
|
|
|
|
|
* Cleans up this interpreter instance after the interpretation is finished.
|
|
|
|
|
* @param workflow The workflow to interpret.
|
|
|
|
|
* @param page The page instance used to interact with the browser.
|
|
|
|
|
* @param updatePageOnPause A callback to update the page after a pause.
|
|
|
|
|
* @returns {Promise<void>}
|
|
|
|
|
*/
|
|
|
|
|
public interpretRecordingInEditor = async (
|
|
|
|
|
workflow: WorkflowFile,
|
|
|
|
|
page: Page,
|
|
|
|
|
updatePageOnPause: (page: Page) => void,
|
|
|
|
|
settings: InterpreterSettings,
|
|
|
|
|
) => {
|
|
|
|
|
const params = settings.params ? settings.params : null;
|
|
|
|
|
delete settings.params;
|
2025-04-29 00:22:05 +05:30
|
|
|
|
2025-01-09 19:08:29 +05:30
|
|
|
const processedWorkflow = processWorkflow(workflow, true);
|
2025-04-29 00:22:05 +05:30
|
|
|
|
2024-06-05 04:32:28 +05:30
|
|
|
const options = {
|
|
|
|
|
...settings,
|
2025-05-07 09:29:03 +05:30
|
|
|
mode: 'editor',
|
2024-06-05 04:32:28 +05:30
|
|
|
debugChannel: {
|
|
|
|
|
activeId: (id: any) => {
|
|
|
|
|
this.activeId = id;
|
|
|
|
|
this.socket.emit('activePairId', id);
|
|
|
|
|
},
|
|
|
|
|
debugMessage: (msg: any) => {
|
|
|
|
|
this.debugMessages.push(`[${new Date().toLocaleString()}] ` + msg);
|
|
|
|
|
this.socket.emit('log', msg)
|
|
|
|
|
},
|
2025-04-27 15:23:05 +05:30
|
|
|
setActionType: (type: string) => {
|
|
|
|
|
this.currentActionType = type;
|
|
|
|
|
}
|
2024-06-05 04:32:28 +05:30
|
|
|
},
|
2025-09-10 00:21:43 +05:30
|
|
|
serializableCallback: async (data: any) => {
|
2025-04-29 00:22:05 +05:30
|
|
|
if (this.currentActionType === 'scrapeSchema') {
|
2025-09-10 00:21:43 +05:30
|
|
|
const cumulativeScrapeSchemaData = Array.isArray(data) && data.length > 0 ? data : [data];
|
|
|
|
|
|
|
|
|
|
if (cumulativeScrapeSchemaData.length > 0) {
|
|
|
|
|
await this.persistDataToDatabase('scrapeSchema', cumulativeScrapeSchemaData);
|
|
|
|
|
}
|
|
|
|
|
|
2025-04-29 00:22:05 +05:30
|
|
|
if (Array.isArray(data) && data.length > 0) {
|
|
|
|
|
this.socket.emit('serializableCallback', {
|
|
|
|
|
type: 'captureText',
|
|
|
|
|
data
|
|
|
|
|
});
|
|
|
|
|
} else {
|
|
|
|
|
this.socket.emit('serializableCallback', {
|
|
|
|
|
type: 'captureText',
|
|
|
|
|
data : [data]
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
} else if (this.currentActionType === 'scrapeList') {
|
2025-09-10 00:21:43 +05:30
|
|
|
if (data && Array.isArray(data) && data.length > 0) {
|
|
|
|
|
// Use the current index for persistence
|
|
|
|
|
await this.persistDataToDatabase('scrapeList', data, this.currentScrapeListIndex);
|
|
|
|
|
}
|
|
|
|
|
|
2025-04-29 00:22:05 +05:30
|
|
|
this.socket.emit('serializableCallback', {
|
|
|
|
|
type: 'captureList',
|
|
|
|
|
data
|
|
|
|
|
});
|
2025-04-30 19:24:38 +05:30
|
|
|
}
|
2024-06-05 04:32:28 +05:30
|
|
|
},
|
2025-09-10 00:21:43 +05:30
|
|
|
binaryCallback: async (data: string, mimetype: string) => {
|
2025-10-21 00:43:08 +05:30
|
|
|
// For editor mode, we don't have the name yet, so use a timestamp-based name
|
|
|
|
|
const binaryItem = {
|
|
|
|
|
name: `Screenshot ${Date.now()}`,
|
|
|
|
|
mimeType: mimetype,
|
|
|
|
|
data: JSON.stringify(data)
|
|
|
|
|
};
|
2025-09-10 00:21:43 +05:30
|
|
|
this.binaryData.push(binaryItem);
|
2025-10-21 00:43:08 +05:30
|
|
|
|
2025-09-10 00:21:43 +05:30
|
|
|
// Persist binary data to database
|
|
|
|
|
await this.persistBinaryDataToDatabase(binaryItem);
|
2025-10-21 00:43:08 +05:30
|
|
|
|
|
|
|
|
this.socket.emit('binaryCallback', {
|
|
|
|
|
data,
|
2025-04-29 00:22:05 +05:30
|
|
|
mimetype,
|
|
|
|
|
type: 'captureScreenshot'
|
|
|
|
|
});
|
2024-06-05 04:32:28 +05:30
|
|
|
}
|
|
|
|
|
}
|
2025-04-29 00:22:05 +05:30
|
|
|
|
2025-01-09 19:08:29 +05:30
|
|
|
const interpreter = new Interpreter(processedWorkflow, options);
|
2024-06-05 04:32:28 +05:30
|
|
|
this.interpreter = interpreter;
|
2025-04-29 00:22:05 +05:30
|
|
|
|
2024-06-05 04:32:28 +05:30
|
|
|
interpreter.on('flag', async (page, resume) => {
|
|
|
|
|
if (this.activeId !== null && this.breakpoints[this.activeId]) {
|
2024-06-05 04:36:22 +05:30
|
|
|
logger.log('debug', `breakpoint hit id: ${this.activeId}`);
|
2024-06-05 04:32:28 +05:30
|
|
|
this.socket.emit('breakpointHit');
|
|
|
|
|
this.interpretationIsPaused = true;
|
|
|
|
|
}
|
2025-04-29 00:22:05 +05:30
|
|
|
|
2024-06-05 04:32:28 +05:30
|
|
|
if (this.interpretationIsPaused) {
|
|
|
|
|
this.interpretationResume = resume;
|
2024-06-05 04:36:22 +05:30
|
|
|
logger.log('debug', `Paused inside of flag: ${page.url()}`);
|
2024-06-05 04:32:28 +05:30
|
|
|
updatePageOnPause(page);
|
|
|
|
|
this.socket.emit('log', '----- The interpretation has been paused -----', false);
|
|
|
|
|
} else {
|
|
|
|
|
resume();
|
|
|
|
|
}
|
|
|
|
|
});
|
2025-04-29 00:22:05 +05:30
|
|
|
|
2024-06-05 04:32:28 +05:30
|
|
|
this.socket.emit('log', '----- Starting the interpretation -----', false);
|
2025-04-29 00:22:05 +05:30
|
|
|
|
2024-06-05 04:32:28 +05:30
|
|
|
const status = await interpreter.run(page, params);
|
2025-04-29 00:22:05 +05:30
|
|
|
|
2024-06-05 04:32:28 +05:30
|
|
|
this.socket.emit('log', `----- The interpretation finished with status: ${status} -----`, false);
|
2025-04-29 00:22:05 +05:30
|
|
|
|
2024-06-05 04:36:22 +05:30
|
|
|
logger.log('debug', `Interpretation finished`);
|
2025-09-29 20:08:17 +05:30
|
|
|
|
|
|
|
|
// Flush any remaining data in persistence buffer before completing
|
|
|
|
|
await this.flushPersistenceBuffer();
|
|
|
|
|
|
2024-06-05 04:32:28 +05:30
|
|
|
this.interpreter = null;
|
|
|
|
|
this.socket.emit('activePairId', -1);
|
|
|
|
|
this.interpretationIsPaused = false;
|
|
|
|
|
this.interpretationResume = null;
|
|
|
|
|
this.socket.emit('finished');
|
|
|
|
|
};
|
|
|
|
|
|
2024-06-05 04:33:07 +05:30
|
|
|
/**
|
|
|
|
|
* Stops the current process of the interpretation of the workflow.
|
|
|
|
|
* @returns {Promise<void>}
|
|
|
|
|
*/
|
|
|
|
|
public stopInterpretation = async () => {
|
|
|
|
|
if (this.interpreter) {
|
|
|
|
|
logger.log('info', 'Stopping the interpretation.');
|
2025-09-10 00:21:43 +05:30
|
|
|
|
|
|
|
|
this.interpreter.abort();
|
|
|
|
|
logger.log('info', 'maxun-core interpreter aborted - data collection stopped immediately');
|
|
|
|
|
|
2024-06-05 04:33:07 +05:30
|
|
|
await this.interpreter.stop();
|
|
|
|
|
this.socket.emit('log', '----- The interpretation has been stopped -----', false);
|
2025-09-28 22:53:52 +05:30
|
|
|
await this.clearState();
|
2024-06-05 04:33:07 +05:30
|
|
|
} else {
|
|
|
|
|
logger.log('error', 'Cannot stop: No active interpretation.');
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
2025-09-28 22:53:52 +05:30
|
|
|
public clearState = async (): Promise<void> => {
|
|
|
|
|
if (this.persistenceBuffer.length > 0) {
|
|
|
|
|
try {
|
|
|
|
|
await this.flushPersistenceBuffer();
|
|
|
|
|
logger.log('debug', 'Successfully flushed final persistence buffer during cleanup');
|
|
|
|
|
} catch (error: any) {
|
|
|
|
|
logger.log('error', `Failed to flush final persistence buffer: ${error.message}`);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (this.persistenceTimer) {
|
|
|
|
|
clearTimeout(this.persistenceTimer);
|
|
|
|
|
this.persistenceTimer = null;
|
|
|
|
|
}
|
|
|
|
|
|
2025-11-29 15:06:03 +05:30
|
|
|
if (this.persistenceRetryTimer) {
|
|
|
|
|
clearTimeout(this.persistenceRetryTimer);
|
|
|
|
|
this.persistenceRetryTimer = null;
|
|
|
|
|
}
|
|
|
|
|
|
2025-09-28 23:01:23 +05:30
|
|
|
if (this.interpreter) {
|
|
|
|
|
try {
|
|
|
|
|
if (!this.interpreter.getIsAborted()) {
|
|
|
|
|
this.interpreter.abort();
|
|
|
|
|
}
|
|
|
|
|
await this.interpreter.stop();
|
|
|
|
|
logger.log('debug', 'mx-cloud interpreter properly stopped during cleanup');
|
2025-11-29 15:06:03 +05:30
|
|
|
|
|
|
|
|
if (typeof this.interpreter.cleanup === 'function') {
|
|
|
|
|
await this.interpreter.cleanup();
|
|
|
|
|
logger.log('debug', 'mx-cloud interpreter cleanup completed');
|
|
|
|
|
}
|
2025-09-28 23:01:23 +05:30
|
|
|
} catch (error: any) {
|
|
|
|
|
logger.log('warn', `Error stopping mx-cloud interpreter during cleanup: ${error.message}`);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2025-11-29 15:06:03 +05:30
|
|
|
this.removePausingListeners();
|
|
|
|
|
|
2024-06-05 04:33:52 +05:30
|
|
|
this.debugMessages = [];
|
|
|
|
|
this.interpretationIsPaused = false;
|
|
|
|
|
this.activeId = null;
|
|
|
|
|
this.interpreter = null;
|
|
|
|
|
this.breakpoints = [];
|
|
|
|
|
this.interpretationResume = null;
|
2025-04-27 15:23:05 +05:30
|
|
|
this.currentActionType = null;
|
2025-10-21 00:43:08 +05:30
|
|
|
this.currentActionName = null;
|
2025-04-27 15:23:05 +05:30
|
|
|
this.serializableDataByType = {
|
2025-10-21 00:43:08 +05:30
|
|
|
scrapeSchema: {},
|
|
|
|
|
scrapeList: {},
|
2025-04-27 15:23:05 +05:30
|
|
|
};
|
2024-06-05 04:33:52 +05:30
|
|
|
this.binaryData = [];
|
2025-06-07 14:50:28 +05:30
|
|
|
this.currentScrapeListIndex = 0;
|
2025-11-07 13:56:42 +05:30
|
|
|
this.actionCounts = {};
|
|
|
|
|
this.usedActionNames = new Set();
|
2025-09-10 00:21:43 +05:30
|
|
|
this.currentRunId = null;
|
2025-09-28 22:53:52 +05:30
|
|
|
this.persistenceBuffer = [];
|
|
|
|
|
this.persistenceInProgress = false;
|
2025-09-28 23:01:23 +05:30
|
|
|
this.persistenceRetryCount = 0;
|
2024-06-05 04:33:52 +05:30
|
|
|
}
|
|
|
|
|
|
2025-09-10 00:21:43 +05:30
|
|
|
/**
|
|
|
|
|
* Sets the current run ID for real-time persistence.
|
|
|
|
|
* @param runId The run ID to set
|
|
|
|
|
*/
|
|
|
|
|
public setRunId = (runId: string): void => {
|
|
|
|
|
this.currentRunId = runId;
|
|
|
|
|
logger.log('debug', `Set run ID for real-time persistence: ${runId}`);
|
|
|
|
|
};
|
|
|
|
|
|
2025-11-07 13:56:42 +05:30
|
|
|
/**
|
|
|
|
|
* Generates a unique action name for data storage
|
|
|
|
|
* @param actionType The type of action (scrapeList, scrapeSchema, etc.)
|
|
|
|
|
* @param providedName Optional name provided by the action
|
|
|
|
|
* @returns A unique action name
|
|
|
|
|
*/
|
|
|
|
|
private getUniqueActionName = (actionType: string, providedName?: string | null): string => {
|
|
|
|
|
if (providedName && providedName.trim() !== '' && !this.usedActionNames.has(providedName)) {
|
|
|
|
|
this.usedActionNames.add(providedName);
|
|
|
|
|
return providedName;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!this.actionCounts[actionType]) {
|
|
|
|
|
this.actionCounts[actionType] = 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let uniqueName: string;
|
|
|
|
|
let counter = this.actionCounts[actionType];
|
|
|
|
|
|
|
|
|
|
do {
|
|
|
|
|
counter++;
|
|
|
|
|
if (actionType === 'scrapeList') {
|
|
|
|
|
uniqueName = `List ${counter}`;
|
|
|
|
|
} else if (actionType === 'scrapeSchema') {
|
|
|
|
|
uniqueName = `Text ${counter}`;
|
|
|
|
|
} else if (actionType === 'screenshot') {
|
|
|
|
|
uniqueName = `Screenshot ${counter}`;
|
|
|
|
|
} else {
|
|
|
|
|
uniqueName = `${actionType} ${counter}`;
|
|
|
|
|
}
|
|
|
|
|
} while (this.usedActionNames.has(uniqueName));
|
|
|
|
|
|
|
|
|
|
this.actionCounts[actionType] = counter;
|
|
|
|
|
this.usedActionNames.add(uniqueName);
|
|
|
|
|
return uniqueName;
|
|
|
|
|
};
|
|
|
|
|
|
2025-09-10 00:21:43 +05:30
|
|
|
/**
|
2025-09-28 22:53:52 +05:30
|
|
|
* Persists extracted data to database with intelligent batching for performance
|
|
|
|
|
* Falls back to immediate persistence for critical operations
|
2025-09-10 00:21:43 +05:30
|
|
|
* @private
|
|
|
|
|
*/
|
|
|
|
|
private persistDataToDatabase = async (actionType: string, data: any, listIndex?: number): Promise<void> => {
|
|
|
|
|
if (!this.currentRunId) {
|
2025-09-28 22:53:52 +05:30
|
|
|
logger.log('debug', 'No run ID available for persistence');
|
2025-09-10 00:21:43 +05:30
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2025-09-28 22:53:52 +05:30
|
|
|
this.addToPersistenceBatch(actionType, data, listIndex, true);
|
2025-09-10 00:21:43 +05:30
|
|
|
|
2025-09-28 22:53:52 +05:30
|
|
|
if (actionType === 'scrapeSchema' || this.persistenceBuffer.length >= this.BATCH_SIZE) {
|
|
|
|
|
await this.flushPersistenceBuffer();
|
|
|
|
|
} else {
|
|
|
|
|
this.scheduleBatchFlush();
|
2025-09-10 00:21:43 +05:30
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Persists binary data to database in real-time
|
|
|
|
|
* @private
|
|
|
|
|
*/
|
2025-10-21 00:43:08 +05:30
|
|
|
private persistBinaryDataToDatabase = async (binaryItem: { name: string; mimeType: string; data: string }): Promise<void> => {
|
2025-09-10 00:21:43 +05:30
|
|
|
if (!this.currentRunId) {
|
|
|
|
|
logger.log('debug', 'No run ID available for binary data persistence');
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
const run = await Run.findOne({ where: { runId: this.currentRunId } });
|
|
|
|
|
if (!run) {
|
|
|
|
|
logger.log('warn', `Run not found for binary data persistence: ${this.currentRunId}`);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2025-10-21 00:43:08 +05:30
|
|
|
const currentBinaryOutput =
|
|
|
|
|
run.binaryOutput && typeof run.binaryOutput === 'object'
|
|
|
|
|
? JSON.parse(JSON.stringify(run.binaryOutput))
|
|
|
|
|
: {};
|
|
|
|
|
|
|
|
|
|
const baseName = binaryItem.name?.trim() || `Screenshot ${Object.keys(currentBinaryOutput).length + 1}`;
|
|
|
|
|
|
|
|
|
|
let uniqueName = baseName;
|
|
|
|
|
let counter = 1;
|
|
|
|
|
while (currentBinaryOutput[uniqueName]) {
|
|
|
|
|
uniqueName = `${baseName} (${counter++})`;
|
|
|
|
|
}
|
|
|
|
|
|
2025-09-10 00:21:43 +05:30
|
|
|
const updatedBinaryOutput = {
|
|
|
|
|
...currentBinaryOutput,
|
2025-10-21 00:43:08 +05:30
|
|
|
[uniqueName]: binaryItem,
|
2025-09-10 00:21:43 +05:30
|
|
|
};
|
|
|
|
|
|
|
|
|
|
await run.update({
|
|
|
|
|
binaryOutput: updatedBinaryOutput
|
|
|
|
|
});
|
2025-10-21 00:43:08 +05:30
|
|
|
|
|
|
|
|
logger.log('debug', `Persisted binary data for run ${this.currentRunId}: ${binaryItem.name} (${binaryItem.mimeType})`);
|
2025-09-10 00:21:43 +05:30
|
|
|
} catch (error: any) {
|
|
|
|
|
logger.log('error', `Failed to persist binary data in real-time for run ${this.currentRunId}: ${error.message}`);
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
2024-06-05 04:34:36 +05:30
|
|
|
/**
|
|
|
|
|
* Interprets the recording as a run.
|
|
|
|
|
* @param workflow The workflow to interpret.
|
|
|
|
|
* @param page The page instance used to interact with the browser.
|
|
|
|
|
* @param settings The settings to use for the interpretation.
|
|
|
|
|
*/
|
2024-12-08 18:06:57 +05:30
|
|
|
public InterpretRecording = async (
|
|
|
|
|
workflow: WorkflowFile,
|
|
|
|
|
page: Page,
|
|
|
|
|
updatePageOnPause: (page: Page) => void,
|
|
|
|
|
settings: InterpreterSettings
|
|
|
|
|
) => {
|
2024-06-05 04:34:36 +05:30
|
|
|
const params = settings.params ? settings.params : null;
|
|
|
|
|
delete settings.params;
|
2024-11-25 20:57:38 +05:30
|
|
|
|
2025-01-09 19:08:29 +05:30
|
|
|
const processedWorkflow = processWorkflow(workflow);
|
2024-11-25 20:57:38 +05:30
|
|
|
|
2025-04-29 00:22:05 +05:30
|
|
|
let mergedScrapeSchema = {};
|
|
|
|
|
|
2024-06-05 04:34:36 +05:30
|
|
|
const options = {
|
|
|
|
|
...settings,
|
|
|
|
|
debugChannel: {
|
|
|
|
|
activeId: (id: any) => {
|
|
|
|
|
this.activeId = id;
|
|
|
|
|
this.socket.emit('activePairId', id);
|
|
|
|
|
},
|
|
|
|
|
debugMessage: (msg: any) => {
|
|
|
|
|
this.debugMessages.push(`[${new Date().toLocaleString()}] ` + msg);
|
|
|
|
|
this.socket.emit('debugMessage', msg)
|
|
|
|
|
},
|
2025-04-27 15:23:05 +05:30
|
|
|
setActionType: (type: string) => {
|
|
|
|
|
this.currentActionType = type;
|
2025-06-07 14:50:28 +05:30
|
|
|
},
|
|
|
|
|
incrementScrapeListIndex: () => {
|
|
|
|
|
this.currentScrapeListIndex++;
|
2025-10-21 00:43:08 +05:30
|
|
|
},
|
|
|
|
|
setActionName: (name: string) => {
|
|
|
|
|
this.currentActionName = name;
|
|
|
|
|
},
|
2024-06-05 04:34:36 +05:30
|
|
|
},
|
2025-09-10 00:21:43 +05:30
|
|
|
serializableCallback: async (data: any) => {
|
2025-10-21 00:43:08 +05:30
|
|
|
try {
|
2025-11-21 00:41:04 +05:30
|
|
|
if (!data || typeof data !== "object") return;
|
2025-10-21 00:43:08 +05:30
|
|
|
|
2025-11-21 00:41:04 +05:30
|
|
|
let typeKey = this.currentActionType || "";
|
2025-10-21 00:43:08 +05:30
|
|
|
|
2025-11-21 00:41:04 +05:30
|
|
|
if (this.currentActionType === "scrapeList") {
|
|
|
|
|
typeKey = "scrapeList";
|
|
|
|
|
} else if (this.currentActionType === "scrapeSchema") {
|
|
|
|
|
typeKey = "scrapeSchema";
|
|
|
|
|
}
|
2025-10-21 00:43:08 +05:30
|
|
|
|
2025-11-18 14:25:09 +05:30
|
|
|
if (typeKey === "scrapeList" && data.scrapeList) {
|
2025-10-21 00:43:08 +05:30
|
|
|
data = data.scrapeList;
|
2025-11-18 14:25:09 +05:30
|
|
|
} else if (typeKey === "scrapeSchema" && data.scrapeSchema) {
|
2025-10-21 00:43:08 +05:30
|
|
|
data = data.scrapeSchema;
|
2025-09-10 00:21:43 +05:30
|
|
|
}
|
2025-10-21 00:43:08 +05:30
|
|
|
|
2025-11-18 14:25:09 +05:30
|
|
|
let actionName = "";
|
|
|
|
|
if (typeKey === "scrapeList" && data && typeof data === "object" && !Array.isArray(data)) {
|
|
|
|
|
const keys = Object.keys(data);
|
|
|
|
|
if (keys.length === 1) {
|
|
|
|
|
actionName = keys[0];
|
|
|
|
|
data = data[actionName];
|
|
|
|
|
} else if (keys.length > 1) {
|
|
|
|
|
actionName = keys[keys.length - 1];
|
|
|
|
|
data = data[actionName];
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!actionName) {
|
|
|
|
|
actionName = this.currentActionName || "";
|
|
|
|
|
if (typeKey === "scrapeList" && !actionName) {
|
|
|
|
|
actionName = this.getUniqueActionName(typeKey, "");
|
|
|
|
|
}
|
2025-11-07 13:56:42 +05:30
|
|
|
}
|
2025-10-21 00:43:08 +05:30
|
|
|
|
|
|
|
|
const flattened = Array.isArray(data)
|
|
|
|
|
? data
|
2025-11-18 14:25:09 +05:30
|
|
|
: (
|
|
|
|
|
data?.List ??
|
|
|
|
|
(data && typeof data === "object"
|
|
|
|
|
? Object.values(data).flat?.() ?? data
|
|
|
|
|
: [])
|
|
|
|
|
);
|
2025-10-21 00:43:08 +05:30
|
|
|
|
|
|
|
|
if (!this.serializableDataByType[typeKey]) {
|
|
|
|
|
this.serializableDataByType[typeKey] = {};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
this.serializableDataByType[typeKey][actionName] = flattened;
|
|
|
|
|
|
2025-11-18 14:25:09 +05:30
|
|
|
await this.persistDataToDatabase(typeKey, {
|
|
|
|
|
[actionName]: flattened,
|
|
|
|
|
});
|
2025-10-21 00:43:08 +05:30
|
|
|
|
|
|
|
|
this.socket.emit("serializableCallback", {
|
|
|
|
|
type: typeKey,
|
|
|
|
|
name: actionName,
|
|
|
|
|
data: flattened,
|
|
|
|
|
});
|
|
|
|
|
} catch (err: any) {
|
2025-11-21 00:41:04 +05:30
|
|
|
logger.log('error', `serializableCallback handler failed: ${err.message}`);
|
2025-10-21 00:43:08 +05:30
|
|
|
}
|
2024-06-05 04:34:36 +05:30
|
|
|
},
|
2025-10-21 00:43:08 +05:30
|
|
|
binaryCallback: async (payload: { name: string; data: Buffer; mimeType: string }) => {
|
|
|
|
|
try {
|
|
|
|
|
const { name, data, mimeType } = payload;
|
|
|
|
|
|
|
|
|
|
const base64Data = data.toString("base64");
|
2025-11-07 13:56:42 +05:30
|
|
|
const uniqueName = this.getUniqueActionName('screenshot', name);
|
2025-10-21 00:43:08 +05:30
|
|
|
|
|
|
|
|
const binaryItem = {
|
2025-11-07 13:56:42 +05:30
|
|
|
name: uniqueName,
|
2025-10-21 00:43:08 +05:30
|
|
|
mimeType,
|
|
|
|
|
data: base64Data
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
this.binaryData.push(binaryItem);
|
|
|
|
|
|
|
|
|
|
await this.persistBinaryDataToDatabase(binaryItem);
|
|
|
|
|
|
|
|
|
|
this.socket.emit("binaryCallback", {
|
2025-11-07 13:56:42 +05:30
|
|
|
name: uniqueName,
|
2025-10-21 00:43:08 +05:30
|
|
|
data: base64Data,
|
|
|
|
|
mimeType
|
|
|
|
|
});
|
|
|
|
|
} catch (err: any) {
|
|
|
|
|
logger.log("error", `binaryCallback handler failed: ${err.message}`);
|
|
|
|
|
}
|
2024-06-05 04:34:36 +05:30
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2025-01-09 19:08:29 +05:30
|
|
|
const interpreter = new Interpreter(processedWorkflow, options);
|
2024-06-05 04:34:36 +05:30
|
|
|
this.interpreter = interpreter;
|
|
|
|
|
|
2024-12-08 18:06:57 +05:30
|
|
|
interpreter.on('flag', async (page, resume) => {
|
|
|
|
|
if (this.activeId !== null && this.breakpoints[this.activeId]) {
|
|
|
|
|
logger.log('debug', `breakpoint hit id: ${this.activeId}`);
|
|
|
|
|
this.socket.emit('breakpointHit');
|
|
|
|
|
this.interpretationIsPaused = true;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (this.interpretationIsPaused) {
|
|
|
|
|
this.interpretationResume = resume;
|
|
|
|
|
logger.log('debug', `Paused inside of flag: ${page.url()}`);
|
|
|
|
|
updatePageOnPause(page);
|
|
|
|
|
this.socket.emit('log', '----- The interpretation has been paused -----', false);
|
|
|
|
|
} else {
|
|
|
|
|
resume();
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
2024-06-05 04:34:36 +05:30
|
|
|
const status = await interpreter.run(page, params);
|
|
|
|
|
|
2025-10-25 13:09:00 +05:30
|
|
|
await this.flushPersistenceBuffer();
|
|
|
|
|
|
2025-04-27 15:23:05 +05:30
|
|
|
// Structure the output to maintain separate data for each action type
|
2024-06-05 04:34:36 +05:30
|
|
|
const result = {
|
|
|
|
|
log: this.debugMessages,
|
|
|
|
|
result: status,
|
2025-10-21 00:43:08 +05:30
|
|
|
scrapeSchemaOutput: this.serializableDataByType.scrapeSchema,
|
|
|
|
|
scrapeListOutput: this.serializableDataByType.scrapeList,
|
|
|
|
|
binaryOutput: this.binaryData.reduce<Record<string, { data: string; mimeType: string }>>((acc, item) => {
|
|
|
|
|
const key = item.name || `Screenshot ${Object.keys(acc).length + 1}`;
|
|
|
|
|
acc[key] = { data: item.data, mimeType: item.mimeType };
|
|
|
|
|
return acc;
|
|
|
|
|
}, {})
|
2024-06-05 04:34:36 +05:30
|
|
|
}
|
|
|
|
|
|
2024-06-05 04:36:22 +05:30
|
|
|
logger.log('debug', `Interpretation finished`);
|
2024-06-05 04:34:36 +05:30
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
2024-06-05 04:35:02 +05:30
|
|
|
/**
|
|
|
|
|
* Returns true if an interpretation is currently running.
|
|
|
|
|
* @returns {boolean}
|
|
|
|
|
*/
|
|
|
|
|
public interpretationInProgress = () => {
|
|
|
|
|
return this.interpreter !== null;
|
|
|
|
|
};
|
|
|
|
|
|
2024-06-05 04:35:23 +05:30
|
|
|
/**
|
|
|
|
|
* Updates the socket used for communication with the client (frontend).
|
|
|
|
|
* @param socket Socket.io socket instance enabling communication with the client (frontend) side.
|
|
|
|
|
* @returns void
|
|
|
|
|
*/
|
2024-06-05 04:36:22 +05:30
|
|
|
public updateSocket = (socket: Socket): void => {
|
2024-06-05 04:35:23 +05:30
|
|
|
this.socket = socket;
|
|
|
|
|
this.subscribeToPausing();
|
|
|
|
|
};
|
2025-09-28 22:53:52 +05:30
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Adds data to persistence buffer for batched processing
|
|
|
|
|
* @private
|
|
|
|
|
*/
|
|
|
|
|
private addToPersistenceBatch(actionType: string, data: any, listIndex?: number, creditValidated: boolean = false): void {
|
|
|
|
|
this.persistenceBuffer.push({
|
|
|
|
|
actionType,
|
|
|
|
|
data,
|
|
|
|
|
listIndex,
|
|
|
|
|
timestamp: Date.now(),
|
|
|
|
|
creditValidated
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
logger.log('debug', `Added ${actionType} to persistence buffer (${this.persistenceBuffer.length} items)`);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Schedules a batched flush if not already scheduled
|
|
|
|
|
* @private
|
|
|
|
|
*/
|
|
|
|
|
private scheduleBatchFlush(): void {
|
|
|
|
|
if (!this.persistenceTimer && !this.persistenceInProgress) {
|
|
|
|
|
this.persistenceTimer = setTimeout(async () => {
|
|
|
|
|
await this.flushPersistenceBuffer();
|
|
|
|
|
}, this.BATCH_TIMEOUT);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Flushes persistence buffer to database in a single transaction
|
2025-09-29 20:08:17 +05:30
|
|
|
* @public - Made public to allow external flush before socket emission
|
2025-09-28 22:53:52 +05:30
|
|
|
*/
|
2025-09-29 20:08:17 +05:30
|
|
|
public async flushPersistenceBuffer(): Promise<void> {
|
2025-09-28 22:53:52 +05:30
|
|
|
if (this.persistenceBuffer.length === 0 || this.persistenceInProgress || !this.currentRunId) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (this.persistenceTimer) {
|
|
|
|
|
clearTimeout(this.persistenceTimer);
|
|
|
|
|
this.persistenceTimer = null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
this.persistenceInProgress = true;
|
|
|
|
|
const batchToProcess = [...this.persistenceBuffer];
|
|
|
|
|
this.persistenceBuffer = [];
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
const sequelize = require('../../storage/db').default;
|
|
|
|
|
await sequelize.transaction(async (transaction: any) => {
|
|
|
|
|
const run = await Run.findOne({
|
|
|
|
|
where: { runId: this.currentRunId! },
|
|
|
|
|
transaction
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
if (!run) {
|
|
|
|
|
logger.log('warn', `Run not found for batched persistence: ${this.currentRunId}`);
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const currentSerializableOutput = run.serializableOutput ?
|
|
|
|
|
JSON.parse(JSON.stringify(run.serializableOutput)) :
|
|
|
|
|
{ scrapeSchema: [], scrapeList: [] };
|
2025-10-21 00:43:08 +05:30
|
|
|
|
|
|
|
|
if (Array.isArray(currentSerializableOutput.scrapeList)) {
|
|
|
|
|
currentSerializableOutput.scrapeList = {};
|
|
|
|
|
}
|
|
|
|
|
if (Array.isArray(currentSerializableOutput.scrapeSchema)) {
|
|
|
|
|
currentSerializableOutput.scrapeSchema = {};
|
|
|
|
|
}
|
2025-09-28 22:53:52 +05:30
|
|
|
|
|
|
|
|
let hasUpdates = false;
|
|
|
|
|
|
2025-10-21 00:43:08 +05:30
|
|
|
const mergeLists = (target: Record<string, any>, updates: Record<string, any>) => {
|
|
|
|
|
for (const [key, val] of Object.entries(updates)) {
|
|
|
|
|
const flattened = Array.isArray(val)
|
|
|
|
|
? val
|
|
|
|
|
: (val?.List ?? (val && typeof val === 'object' ? Object.values(val).flat?.() ?? val : []));
|
|
|
|
|
target[key] = flattened;
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
2025-09-28 22:53:52 +05:30
|
|
|
for (const item of batchToProcess) {
|
|
|
|
|
if (item.actionType === 'scrapeSchema') {
|
2025-10-21 00:43:08 +05:30
|
|
|
if (!currentSerializableOutput.scrapeSchema || typeof currentSerializableOutput.scrapeSchema !== 'object') {
|
|
|
|
|
currentSerializableOutput.scrapeSchema = {};
|
|
|
|
|
}
|
|
|
|
|
mergeLists(currentSerializableOutput.scrapeSchema, item.data);
|
2025-09-28 22:53:52 +05:30
|
|
|
hasUpdates = true;
|
2025-10-21 00:43:08 +05:30
|
|
|
} else if (item.actionType === 'scrapeList') {
|
|
|
|
|
if (!currentSerializableOutput.scrapeList || typeof currentSerializableOutput.scrapeList !== 'object') {
|
|
|
|
|
currentSerializableOutput.scrapeList = {};
|
2025-09-28 22:53:52 +05:30
|
|
|
}
|
2025-10-21 00:43:08 +05:30
|
|
|
mergeLists(currentSerializableOutput.scrapeList, item.data);
|
2025-09-28 22:53:52 +05:30
|
|
|
hasUpdates = true;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (hasUpdates) {
|
|
|
|
|
await run.update({
|
|
|
|
|
serializableOutput: currentSerializableOutput
|
|
|
|
|
}, { transaction });
|
|
|
|
|
|
|
|
|
|
logger.log('debug', `Batched persistence: Updated run ${this.currentRunId} with ${batchToProcess.length} items`);
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
2025-09-28 23:01:23 +05:30
|
|
|
this.persistenceRetryCount = 0;
|
|
|
|
|
|
2025-09-28 22:53:52 +05:30
|
|
|
} catch (error: any) {
|
|
|
|
|
logger.log('error', `Failed to flush persistence buffer for run ${this.currentRunId}: ${error.message}`);
|
|
|
|
|
|
2025-09-28 23:01:23 +05:30
|
|
|
if (!this.persistenceRetryCount) {
|
|
|
|
|
this.persistenceRetryCount = 0;
|
|
|
|
|
}
|
|
|
|
|
|
2025-11-29 15:06:03 +05:30
|
|
|
if (this.persistenceRetryCount < this.MAX_PERSISTENCE_RETRIES) {
|
2025-09-28 23:01:23 +05:30
|
|
|
this.persistenceBuffer.unshift(...batchToProcess);
|
|
|
|
|
this.persistenceRetryCount++;
|
2025-09-28 22:53:52 +05:30
|
|
|
|
2025-09-28 23:01:23 +05:30
|
|
|
const backoffDelay = Math.min(5000 * Math.pow(2, this.persistenceRetryCount), 30000);
|
2025-11-29 15:06:03 +05:30
|
|
|
|
|
|
|
|
if (this.persistenceRetryTimer) {
|
|
|
|
|
clearTimeout(this.persistenceRetryTimer);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
this.persistenceRetryTimer = setTimeout(async () => {
|
|
|
|
|
this.persistenceRetryTimer = null;
|
2025-09-28 23:01:23 +05:30
|
|
|
await this.flushPersistenceBuffer();
|
|
|
|
|
}, backoffDelay);
|
|
|
|
|
|
2025-11-29 15:06:03 +05:30
|
|
|
logger.log('warn', `Scheduling persistence retry ${this.persistenceRetryCount}/${this.MAX_PERSISTENCE_RETRIES} in ${backoffDelay}ms`);
|
2025-09-28 23:01:23 +05:30
|
|
|
} else {
|
|
|
|
|
logger.log('error', `Max persistence retries exceeded for run ${this.currentRunId}, dropping ${batchToProcess.length} items`);
|
|
|
|
|
this.persistenceRetryCount = 0;
|
|
|
|
|
}
|
2025-09-28 22:53:52 +05:30
|
|
|
} finally {
|
|
|
|
|
this.persistenceInProgress = false;
|
2025-09-29 18:26:55 +05:30
|
|
|
|
|
|
|
|
if (this.persistenceBuffer.length > 0 && !this.persistenceTimer) {
|
|
|
|
|
this.scheduleBatchFlush();
|
|
|
|
|
}
|
2025-09-28 22:53:52 +05:30
|
|
|
}
|
|
|
|
|
};
|
2024-06-05 04:34:36 +05:30
|
|
|
}
|