Files
parcer/server/src/workflow-management/classes/Generator.ts

182 lines
6.0 KiB
TypeScript
Raw Normal View History

2024-06-07 23:19:43 +05:30
import { Action, ActionType, Coordinates, TagName } from "../../types";
import { WhereWhatPair, WorkflowFile } from '@wbr-project/wbr-interpret';
import logger from "../../logger";
import { Socket } from "socket.io";
import { Page } from "playwright";
import {
getElementInformation,
getRect,
getSelectors,
isRuleOvershadowing,
selectorAlreadyInWorkflow
} from "../selector";
import { CustomActions } from "../../../../src/shared/types";
import { workflow } from "../../routes";
import { saveFile } from "../storage";
import fs from "fs";
import { getBestSelectorForAction } from "../utils";
import { browserPool } from "../../server";
2024-06-07 23:24:18 +05:30
interface PersistedGeneratedData {
lastUsedSelector: string;
lastIndex: number|null;
lastAction: string;
}
2024-06-07 23:19:43 +05:30
interface MetaData {
name: string;
create_date: string;
pairs: number;
update_date: string;
params: string[],
}
2024-06-07 23:26:57 +05:30
/**
* Workflow generator is used to transform the user's interactions into an automatically
* generated correct workflows, using the ability of internal state persistence and
* heuristic generative algorithms.
* This class also takes care of the selector generation.
* @category WorkflowManagement
*/
2024-06-07 23:26:08 +05:30
export class WorkflowGenerator {
2024-06-07 23:27:11 +05:30
/**
* The socket used to communicate with the client.
* @private
*/
2024-06-07 23:26:08 +05:30
private socket : Socket;
2024-06-07 23:27:11 +05:30
/**
* The public constructor of the WorkflowGenerator.
* Takes socket for communication as a parameter and registers some important events on it.
* @param socket The socket used to communicate with the client.
* @constructor
*/
2024-06-07 23:26:08 +05:30
public constructor(socket: Socket) {
this.socket = socket;
this.registerEventHandlers(socket);
}
2024-06-07 23:28:15 +05:30
/**
* The current workflow being recorded.
* @private
*/
2024-06-07 23:28:05 +05:30
private workflowRecord: WorkflowFile = {
workflow: [],
};
2024-06-07 23:28:15 +05:30
/**
* Metadata of the currently recorded workflow.
* @private
*/
2024-06-07 23:28:05 +05:30
private recordingMeta: MetaData = {
name: '',
create_date: '',
pairs: 0,
update_date: '',
params: [],
}
2024-06-07 23:44:04 +05:30
/**
* The persistent data from the whole workflow generation process.
* Used for correct generation of other user inputs.
* @private
*/
private generatedData: PersistedGeneratedData = {
lastUsedSelector: '',
lastIndex: null,
lastAction: '',
}
2024-06-07 23:53:17 +05:30
/**
* Registers the event handlers for all generator-related events on the socket.
* @param socket The socket used to communicate with the client.
* @private
*/
2024-06-07 23:45:50 +05:30
private registerEventHandlers = (socket: Socket) => {
socket.on('save', async (fileName: string) => {
logger.log('debug', `Saving workflow ${fileName}`);
await this.saveNewWorkflow(fileName)
});
2024-06-07 23:48:08 +05:30
socket.on('new-recording', () => this.workflowRecord = {
workflow: [],
} );
2024-06-07 23:48:30 +05:30
socket.on('activeIndex', (data) => this.generatedData.lastIndex = parseInt(data));
socket.on('decision', async ({pair, actionType, decision}) => {
const id = browserPool.getActiveBrowserId();
2024-06-07 23:50:11 +05:30
if (id) {
const activeBrowser = browserPool.getRemoteBrowser(id);
const currentPage = activeBrowser?.getCurrentPage();
if (decision) {
switch (actionType) {
case 'customAction':
pair.where.selectors = [this.generatedData.lastUsedSelector];
break;
2024-06-07 23:50:26 +05:30
default: break;
2024-06-07 23:50:11 +05:30
}
}
if (currentPage) {
await this.addPairToWorkflowAndNotifyClient(pair, currentPage);
}
2024-06-07 23:50:11 +05:30
}
})
2024-06-07 23:51:02 +05:30
socket.on('updatePair', (data) => {
this.updatePairInWorkflow(data.index, data.pair);
})
2024-06-07 23:45:50 +05:30
};
2024-06-07 23:28:05 +05:30
private addPairToWorkflowAndNotifyClient = async(pair: WhereWhatPair, page: Page) => {
let matched = false;
// validate if a pair with the same where conditions is already present in the workflow
if (pair.where.selectors && pair.where.selectors[0]) {
const match = selectorAlreadyInWorkflow(pair.where.selectors[0], this.workflowRecord.workflow);
if (match) {
// if a match of where conditions is found, the new action is added into the matched rule
const matchedIndex = this.workflowRecord.workflow.indexOf(match);
if (pair.what[0].action !== 'waitForLoadState' && pair.what[0].action !== 'press') {
pair.what.push({
action: 'waitForLoadState',
args: ['networkidle'],
})
}
this.workflowRecord.workflow[matchedIndex].what = this.workflowRecord.workflow[matchedIndex].what.concat(pair.what);
logger.log('info', `Pushed ${JSON.stringify(this.workflowRecord.workflow[matchedIndex])} to workflow pair`);
matched = true;
}
}
// is the where conditions of the pair are not already in the workflow, we need to validate the where conditions
// for possible overshadowing of different rules and handle cases according to the recording logic
if (!matched) {
const handled = await this.handleOverShadowing(pair, page, this.generatedData.lastIndex || 0);
if (!handled) {
//adding waitForLoadState with networkidle, for better success rate of automatically recorded workflows
if (pair.what[0].action !== 'waitForLoadState' && pair.what[0].action !== 'press') {
pair.what.push({
action: 'waitForLoadState',
args: ['networkidle'],
})
}
if (this.generatedData.lastIndex === 0) {
this.generatedData.lastIndex = null;
// we want to have the most specific selectors at the beginning of the workflow
this.workflowRecord.workflow.unshift(pair);
} else {
this.workflowRecord.workflow.splice(this.generatedData.lastIndex || 0, 0, pair);
if (this.generatedData.lastIndex) {
this.generatedData.lastIndex = this.generatedData.lastIndex - 1;
}
}
logger.log('info',
`${JSON.stringify(pair)}: Added to workflow file on index: ${this.generatedData.lastIndex || 0}`);
} else {
logger.log('debug',
` ${JSON.stringify(this.workflowRecord.workflow[this.generatedData.lastIndex || 0])} added action to workflow pair`);
}
}
};
2024-06-07 23:26:08 +05:30
}