feat: persist data to db
This commit is contained in:
@@ -4,6 +4,7 @@ import { Socket } from "socket.io";
|
|||||||
import { Page } from "playwright";
|
import { Page } from "playwright";
|
||||||
import { InterpreterSettings } from "../../types";
|
import { InterpreterSettings } from "../../types";
|
||||||
import { decrypt } from "../../utils/auth";
|
import { decrypt } from "../../utils/auth";
|
||||||
|
import Run from "../../models/Run";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Decrypts any encrypted inputs in the workflow. If checkLimit is true, it will also handle the limit validation for scrapeList action.
|
* Decrypts any encrypted inputs in the workflow. If checkLimit is true, it will also handle the limit validation for scrapeList action.
|
||||||
@@ -112,6 +113,11 @@ export class WorkflowInterpreter {
|
|||||||
*/
|
*/
|
||||||
private currentScrapeListIndex: number = 0;
|
private currentScrapeListIndex: number = 0;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Current run ID for real-time persistence
|
||||||
|
*/
|
||||||
|
private currentRunId: string | null = null;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* An array of id's of the pairs from the workflow that are about to be paused.
|
* An array of id's of the pairs from the workflow that are about to be paused.
|
||||||
* As "breakpoints".
|
* As "breakpoints".
|
||||||
@@ -128,10 +134,12 @@ export class WorkflowInterpreter {
|
|||||||
/**
|
/**
|
||||||
* A public constructor taking a socket instance for communication with the client.
|
* A public constructor taking a socket instance for communication with the client.
|
||||||
* @param socket Socket.io socket instance enabling communication with the client (frontend) side.
|
* @param socket Socket.io socket instance enabling communication with the client (frontend) side.
|
||||||
|
* @param runId Optional run ID for real-time data persistence
|
||||||
* @constructor
|
* @constructor
|
||||||
*/
|
*/
|
||||||
constructor(socket: Socket) {
|
constructor(socket: Socket, runId?: string) {
|
||||||
this.socket = socket;
|
this.socket = socket;
|
||||||
|
this.currentRunId = runId || null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -202,8 +210,14 @@ export class WorkflowInterpreter {
|
|||||||
this.currentActionType = type;
|
this.currentActionType = type;
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
serializableCallback: (data: any) => {
|
serializableCallback: async (data: any) => {
|
||||||
if (this.currentActionType === 'scrapeSchema') {
|
if (this.currentActionType === 'scrapeSchema') {
|
||||||
|
const cumulativeScrapeSchemaData = Array.isArray(data) && data.length > 0 ? data : [data];
|
||||||
|
|
||||||
|
if (cumulativeScrapeSchemaData.length > 0) {
|
||||||
|
await this.persistDataToDatabase('scrapeSchema', cumulativeScrapeSchemaData);
|
||||||
|
}
|
||||||
|
|
||||||
if (Array.isArray(data) && data.length > 0) {
|
if (Array.isArray(data) && data.length > 0) {
|
||||||
this.socket.emit('serializableCallback', {
|
this.socket.emit('serializableCallback', {
|
||||||
type: 'captureText',
|
type: 'captureText',
|
||||||
@@ -216,13 +230,24 @@ export class WorkflowInterpreter {
|
|||||||
});
|
});
|
||||||
}
|
}
|
||||||
} else if (this.currentActionType === 'scrapeList') {
|
} else if (this.currentActionType === 'scrapeList') {
|
||||||
|
if (data && Array.isArray(data) && data.length > 0) {
|
||||||
|
// Use the current index for persistence
|
||||||
|
await this.persistDataToDatabase('scrapeList', data, this.currentScrapeListIndex);
|
||||||
|
}
|
||||||
|
|
||||||
this.socket.emit('serializableCallback', {
|
this.socket.emit('serializableCallback', {
|
||||||
type: 'captureList',
|
type: 'captureList',
|
||||||
data
|
data
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
binaryCallback: (data: string, mimetype: string) => {
|
binaryCallback: async (data: string, mimetype: string) => {
|
||||||
|
const binaryItem = { mimetype, data: JSON.stringify(data) };
|
||||||
|
this.binaryData.push(binaryItem);
|
||||||
|
|
||||||
|
// Persist binary data to database
|
||||||
|
await this.persistBinaryDataToDatabase(binaryItem);
|
||||||
|
|
||||||
this.socket.emit('binaryCallback', {
|
this.socket.emit('binaryCallback', {
|
||||||
data,
|
data,
|
||||||
mimetype,
|
mimetype,
|
||||||
@@ -272,6 +297,10 @@ export class WorkflowInterpreter {
|
|||||||
public stopInterpretation = async () => {
|
public stopInterpretation = async () => {
|
||||||
if (this.interpreter) {
|
if (this.interpreter) {
|
||||||
logger.log('info', 'Stopping the interpretation.');
|
logger.log('info', 'Stopping the interpretation.');
|
||||||
|
|
||||||
|
this.interpreter.abort();
|
||||||
|
logger.log('info', 'maxun-core interpreter aborted - data collection stopped immediately');
|
||||||
|
|
||||||
await this.interpreter.stop();
|
await this.interpreter.stop();
|
||||||
this.socket.emit('log', '----- The interpretation has been stopped -----', false);
|
this.socket.emit('log', '----- The interpretation has been stopped -----', false);
|
||||||
this.clearState();
|
this.clearState();
|
||||||
@@ -294,8 +323,115 @@ export class WorkflowInterpreter {
|
|||||||
};
|
};
|
||||||
this.binaryData = [];
|
this.binaryData = [];
|
||||||
this.currentScrapeListIndex = 0;
|
this.currentScrapeListIndex = 0;
|
||||||
|
this.currentRunId = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Sets the current run ID for real-time persistence.
|
||||||
|
* @param runId The run ID to set
|
||||||
|
*/
|
||||||
|
public setRunId = (runId: string): void => {
|
||||||
|
this.currentRunId = runId;
|
||||||
|
logger.log('debug', `Set run ID for real-time persistence: ${runId}`);
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Persists data to database in real-time during interpretation
|
||||||
|
* @private
|
||||||
|
*/
|
||||||
|
private persistDataToDatabase = async (actionType: string, data: any, listIndex?: number): Promise<void> => {
|
||||||
|
if (!this.currentRunId) {
|
||||||
|
logger.log('debug', 'No run ID available for real-time persistence');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const run = await Run.findOne({ where: { runId: this.currentRunId } });
|
||||||
|
|
||||||
|
if (!run) {
|
||||||
|
logger.log('warn', `Run not found for real-time persistence: ${this.currentRunId}`);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const currentSerializableOutput = run.serializableOutput ?
|
||||||
|
JSON.parse(JSON.stringify(run.serializableOutput)) :
|
||||||
|
{ scrapeSchema: [], scrapeList: [] };
|
||||||
|
|
||||||
|
if (actionType === 'scrapeSchema') {
|
||||||
|
const newSchemaData = Array.isArray(data) ? data : [data];
|
||||||
|
const updatedOutput = {
|
||||||
|
...currentSerializableOutput,
|
||||||
|
scrapeSchema: newSchemaData
|
||||||
|
};
|
||||||
|
|
||||||
|
await run.update({
|
||||||
|
serializableOutput: updatedOutput
|
||||||
|
});
|
||||||
|
|
||||||
|
logger.log('debug', `Persisted scrapeSchema data for run ${this.currentRunId}: ${newSchemaData.length} items`);
|
||||||
|
|
||||||
|
} else if (actionType === 'scrapeList' && typeof listIndex === 'number') {
|
||||||
|
if (!Array.isArray(currentSerializableOutput.scrapeList)) {
|
||||||
|
currentSerializableOutput.scrapeList = [];
|
||||||
|
}
|
||||||
|
|
||||||
|
const updatedList = [...currentSerializableOutput.scrapeList];
|
||||||
|
updatedList[listIndex] = data;
|
||||||
|
|
||||||
|
const updatedOutput = {
|
||||||
|
...currentSerializableOutput,
|
||||||
|
scrapeList: updatedList
|
||||||
|
};
|
||||||
|
|
||||||
|
await run.update({
|
||||||
|
serializableOutput: updatedOutput
|
||||||
|
});
|
||||||
|
|
||||||
|
logger.log('debug', `Persisted scrapeList data for run ${this.currentRunId} at index ${listIndex}: ${Array.isArray(data) ? data.length : 'N/A'} items`);
|
||||||
|
}
|
||||||
|
} catch (error: any) {
|
||||||
|
logger.log('error', `Failed to persist data in real-time for run ${this.currentRunId}: ${error.message}`);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Persists binary data to database in real-time
|
||||||
|
* @private
|
||||||
|
*/
|
||||||
|
private persistBinaryDataToDatabase = async (binaryItem: { mimetype: string, data: string }): Promise<void> => {
|
||||||
|
if (!this.currentRunId) {
|
||||||
|
logger.log('debug', 'No run ID available for binary data persistence');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
const run = await Run.findOne({ where: { runId: this.currentRunId } });
|
||||||
|
if (!run) {
|
||||||
|
logger.log('warn', `Run not found for binary data persistence: ${this.currentRunId}`);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
const currentBinaryOutput = run.binaryOutput ?
|
||||||
|
JSON.parse(JSON.stringify(run.binaryOutput)) :
|
||||||
|
{};
|
||||||
|
|
||||||
|
const uniqueKey = `item-${Date.now()}-${Object.keys(currentBinaryOutput).length}`;
|
||||||
|
|
||||||
|
const updatedBinaryOutput = {
|
||||||
|
...currentBinaryOutput,
|
||||||
|
[uniqueKey]: binaryItem
|
||||||
|
};
|
||||||
|
|
||||||
|
await run.update({
|
||||||
|
binaryOutput: updatedBinaryOutput
|
||||||
|
});
|
||||||
|
|
||||||
|
logger.log('debug', `Persisted binary data for run ${this.currentRunId}: ${binaryItem.mimetype}`);
|
||||||
|
} catch (error: any) {
|
||||||
|
logger.log('error', `Failed to persist binary data in real-time for run ${this.currentRunId}: ${error.message}`);
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Interprets the recording as a run.
|
* Interprets the recording as a run.
|
||||||
* @param workflow The workflow to interpret.
|
* @param workflow The workflow to interpret.
|
||||||
@@ -333,7 +469,7 @@ export class WorkflowInterpreter {
|
|||||||
this.currentScrapeListIndex++;
|
this.currentScrapeListIndex++;
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
serializableCallback: (data: any) => {
|
serializableCallback: async (data: any) => {
|
||||||
if (this.currentActionType === 'scrapeSchema') {
|
if (this.currentActionType === 'scrapeSchema') {
|
||||||
if (Array.isArray(data) && data.length > 0) {
|
if (Array.isArray(data) && data.length > 0) {
|
||||||
mergedScrapeSchema = { ...mergedScrapeSchema, ...data[0] };
|
mergedScrapeSchema = { ...mergedScrapeSchema, ...data[0] };
|
||||||
@@ -342,14 +478,29 @@ export class WorkflowInterpreter {
|
|||||||
mergedScrapeSchema = { ...mergedScrapeSchema, ...data };
|
mergedScrapeSchema = { ...mergedScrapeSchema, ...data };
|
||||||
this.serializableDataByType.scrapeSchema.push([data]);
|
this.serializableDataByType.scrapeSchema.push([data]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Persist the cumulative scrapeSchema data
|
||||||
|
const cumulativeScrapeSchemaData = Object.keys(mergedScrapeSchema).length > 0 ? [mergedScrapeSchema] : [];
|
||||||
|
if (cumulativeScrapeSchemaData.length > 0) {
|
||||||
|
await this.persistDataToDatabase('scrapeSchema', cumulativeScrapeSchemaData);
|
||||||
|
}
|
||||||
} else if (this.currentActionType === 'scrapeList') {
|
} else if (this.currentActionType === 'scrapeList') {
|
||||||
|
if (data && Array.isArray(data) && data.length > 0) {
|
||||||
|
// Use the current index for persistence
|
||||||
|
await this.persistDataToDatabase('scrapeList', data, this.currentScrapeListIndex);
|
||||||
|
}
|
||||||
this.serializableDataByType.scrapeList[this.currentScrapeListIndex] = data;
|
this.serializableDataByType.scrapeList[this.currentScrapeListIndex] = data;
|
||||||
}
|
}
|
||||||
|
|
||||||
this.socket.emit('serializableCallback', data);
|
this.socket.emit('serializableCallback', data);
|
||||||
},
|
},
|
||||||
binaryCallback: async (data: string, mimetype: string) => {
|
binaryCallback: async (data: string, mimetype: string) => {
|
||||||
this.binaryData.push({ mimetype, data: JSON.stringify(data) });
|
const binaryItem = { mimetype, data: JSON.stringify(data) };
|
||||||
|
this.binaryData.push(binaryItem);
|
||||||
|
|
||||||
|
// Persist binary data to database
|
||||||
|
await this.persistBinaryDataToDatabase(binaryItem);
|
||||||
|
|
||||||
this.socket.emit('binaryCallback', { data, mimetype });
|
this.socket.emit('binaryCallback', { data, mimetype });
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user