Files
parcer/server/src/workflow-management/classes/Generator.ts

211 lines
7.7 KiB
TypeScript
Raw Normal View History

2024-06-07 23:19:43 +05:30
import { Action, ActionType, Coordinates, TagName } from "../../types";
import { WhereWhatPair, WorkflowFile } from '@wbr-project/wbr-interpret';
import logger from "../../logger";
import { Socket } from "socket.io";
import { Page } from "playwright";
import {
getElementInformation,
getRect,
getSelectors,
isRuleOvershadowing,
selectorAlreadyInWorkflow
} from "../selector";
import { CustomActions } from "../../../../src/shared/types";
import { workflow } from "../../routes";
import { saveFile } from "../storage";
import fs from "fs";
import { getBestSelectorForAction } from "../utils";
import { browserPool } from "../../server";
2024-06-07 23:24:18 +05:30
interface PersistedGeneratedData {
lastUsedSelector: string;
lastIndex: number|null;
lastAction: string;
}
2024-06-07 23:19:43 +05:30
interface MetaData {
name: string;
create_date: string;
pairs: number;
update_date: string;
params: string[],
}
2024-06-07 23:26:57 +05:30
/**
* Workflow generator is used to transform the user's interactions into an automatically
* generated correct workflows, using the ability of internal state persistence and
* heuristic generative algorithms.
* This class also takes care of the selector generation.
* @category WorkflowManagement
*/
2024-06-07 23:26:08 +05:30
export class WorkflowGenerator {
2024-06-07 23:27:11 +05:30
/**
* The socket used to communicate with the client.
* @private
*/
2024-06-07 23:26:08 +05:30
private socket : Socket;
2024-06-07 23:27:11 +05:30
/**
* The public constructor of the WorkflowGenerator.
* Takes socket for communication as a parameter and registers some important events on it.
* @param socket The socket used to communicate with the client.
* @constructor
*/
2024-06-07 23:26:08 +05:30
public constructor(socket: Socket) {
this.socket = socket;
this.registerEventHandlers(socket);
}
2024-06-07 23:28:15 +05:30
/**
* The current workflow being recorded.
* @private
*/
2024-06-07 23:28:05 +05:30
private workflowRecord: WorkflowFile = {
workflow: [],
};
2024-06-07 23:28:15 +05:30
/**
* Metadata of the currently recorded workflow.
* @private
*/
2024-06-07 23:28:05 +05:30
private recordingMeta: MetaData = {
name: '',
create_date: '',
pairs: 0,
update_date: '',
params: [],
}
2024-06-07 23:44:04 +05:30
/**
* The persistent data from the whole workflow generation process.
* Used for correct generation of other user inputs.
* @private
*/
private generatedData: PersistedGeneratedData = {
lastUsedSelector: '',
lastIndex: null,
lastAction: '',
}
2024-06-07 23:53:17 +05:30
/**
* Registers the event handlers for all generator-related events on the socket.
* @param socket The socket used to communicate with the client.
* @private
*/
2024-06-07 23:45:50 +05:30
private registerEventHandlers = (socket: Socket) => {
socket.on('save', async (fileName: string) => {
logger.log('debug', `Saving workflow ${fileName}`);
await this.saveNewWorkflow(fileName)
});
2024-06-07 23:48:08 +05:30
socket.on('new-recording', () => this.workflowRecord = {
workflow: [],
} );
2024-06-07 23:48:30 +05:30
socket.on('activeIndex', (data) => this.generatedData.lastIndex = parseInt(data));
socket.on('decision', async ({pair, actionType, decision}) => {
const id = browserPool.getActiveBrowserId();
2024-06-07 23:50:11 +05:30
if (id) {
const activeBrowser = browserPool.getRemoteBrowser(id);
const currentPage = activeBrowser?.getCurrentPage();
if (decision) {
switch (actionType) {
case 'customAction':
pair.where.selectors = [this.generatedData.lastUsedSelector];
break;
2024-06-07 23:50:26 +05:30
default: break;
2024-06-07 23:50:11 +05:30
}
}
if (currentPage) {
await this.addPairToWorkflowAndNotifyClient(pair, currentPage);
}
2024-06-07 23:50:11 +05:30
}
})
2024-06-07 23:51:02 +05:30
socket.on('updatePair', (data) => {
this.updatePairInWorkflow(data.index, data.pair);
})
2024-06-07 23:45:50 +05:30
};
/**
* Adds a newly generated pair to the workflow and notifies the client about it by
* sending the updated workflow through socket.
*
* Checks some conditions for the correct addition of the pair.
* 1. The pair's action selector is already in the workflow as a different pair's where selector
* If so, the what part of the pair is added to the pair with the same where selector.
* 2. The pair's where selector is located on the page at the same time as another pair's where selector,
* having the same url. This state is called over-shadowing an already existing pair.
* If so, the pair is merged with the previous over-shadowed pair - what part is attached and
* new selector added to the where selectors. In case the over-shadowed pair is further down the
* workflow array, the new pair is added to the beginning of the workflow array.
*
* This function also makes sure to add a waitForLoadState and a generated flag
* action after every new action or pair added. The [waitForLoadState](https://playwright.dev/docs/api/class-frame#frame-wait-for-load-state)
* action waits for the networkidle event to be fired,
* and the generated flag action is used for making pausing the interpretation possible.
*
* @param pair The pair to add to the workflow.
* @param page The page to use for the state checking.
* @private
* @returns {Promise<void>}
*/
private addPairToWorkflowAndNotifyClient = async(pair: WhereWhatPair, page: Page) => {
let matched = false;
// validate if a pair with the same where conditions is already present in the workflow
if (pair.where.selectors && pair.where.selectors[0]) {
const match = selectorAlreadyInWorkflow(pair.where.selectors[0], this.workflowRecord.workflow);
if (match) {
// if a match of where conditions is found, the new action is added into the matched rule
const matchedIndex = this.workflowRecord.workflow.indexOf(match);
if (pair.what[0].action !== 'waitForLoadState' && pair.what[0].action !== 'press') {
pair.what.push({
action: 'waitForLoadState',
args: ['networkidle'],
})
}
this.workflowRecord.workflow[matchedIndex].what = this.workflowRecord.workflow[matchedIndex].what.concat(pair.what);
logger.log('info', `Pushed ${JSON.stringify(this.workflowRecord.workflow[matchedIndex])} to workflow pair`);
matched = true;
}
}
// is the where conditions of the pair are not already in the workflow, we need to validate the where conditions
// for possible overshadowing of different rules and handle cases according to the recording logic
if (!matched) {
const handled = await this.handleOverShadowing(pair, page, this.generatedData.lastIndex || 0);
if (!handled) {
//adding waitForLoadState with networkidle, for better success rate of automatically recorded workflows
if (pair.what[0].action !== 'waitForLoadState' && pair.what[0].action !== 'press') {
pair.what.push({
action: 'waitForLoadState',
args: ['networkidle'],
})
}
if (this.generatedData.lastIndex === 0) {
this.generatedData.lastIndex = null;
// we want to have the most specific selectors at the beginning of the workflow
this.workflowRecord.workflow.unshift(pair);
} else {
this.workflowRecord.workflow.splice(this.generatedData.lastIndex || 0, 0, pair);
if (this.generatedData.lastIndex) {
this.generatedData.lastIndex = this.generatedData.lastIndex - 1;
}
}
logger.log('info',
`${JSON.stringify(pair)}: Added to workflow file on index: ${this.generatedData.lastIndex || 0}`);
} else {
logger.log('debug',
` ${JSON.stringify(this.workflowRecord.workflow[this.generatedData.lastIndex || 0])} added action to workflow pair`);
}
}
2024-06-07 23:59:10 +05:30
this.socket.emit('workflow', this.workflowRecord);
logger.log('info',`Workflow emitted`);
};
2024-06-08 00:04:54 +05:30
public onClick = async (coordinates: Coordinates, page: Page) => {
let where: WhereWhatPair["where"] = { url: this.getBestUrl(page.url()) };
};
2024-06-07 23:26:08 +05:30
}