diff --git a/server/src/workflow-management/classes/Generator.ts b/server/src/workflow-management/classes/Generator.ts new file mode 100644 index 00000000..2634635d --- /dev/null +++ b/server/src/workflow-management/classes/Generator.ts @@ -0,0 +1,783 @@ +import { Action, ActionType, Coordinates, TagName } from "../../types"; +import { WhereWhatPair, WorkflowFile } from '@wbr-project/wbr-interpret'; +import logger from "../../logger"; +import { Socket } from "socket.io"; +import { Page } from "playwright"; +import { + getElementInformation, + getRect, + getSelectors, + isRuleOvershadowing, + selectorAlreadyInWorkflow +} from "../selector"; +import { CustomActions } from "../../../../src/shared/types"; +import { workflow } from "../../routes"; +import { saveFile } from "../storage"; +import fs from "fs"; +import { getBestSelectorForAction } from "../utils"; +import { browserPool } from "../../server"; + +interface PersistedGeneratedData { + lastUsedSelector: string; + lastIndex: number | null; + lastAction: string; +} + +interface MetaData { + name: string; + create_date: string; + pairs: number; + update_date: string; + params: string[], +} + +/** + * Workflow generator is used to transform the user's interactions into an automatically + * generated correct workflows, using the ability of internal state persistence and + * heuristic generative algorithms. + * This class also takes care of the selector generation. + * @category WorkflowManagement + */ +export class WorkflowGenerator { + + /** + * The socket used to communicate with the client. + * @private + */ + private socket: Socket; + + /** + * The public constructor of the WorkflowGenerator. + * Takes socket for communication as a parameter and registers some important events on it. + * @param socket The socket used to communicate with the client. + * @constructor + */ + public constructor(socket: Socket) { + this.socket = socket; + this.registerEventHandlers(socket); + } + + /** + * The current workflow being recorded. + * @private + */ + private workflowRecord: WorkflowFile = { + workflow: [], + }; + + /** + * Metadata of the currently recorded workflow. + * @private + */ + private recordingMeta: MetaData = { + name: '', + create_date: '', + pairs: 0, + update_date: '', + params: [], + } + + /** + * The persistent data from the whole workflow generation process. + * Used for correct generation of other user inputs. + * @private + */ + private generatedData: PersistedGeneratedData = { + lastUsedSelector: '', + lastIndex: null, + lastAction: '', + } + + /** + * Registers the event handlers for all generator-related events on the socket. + * @param socket The socket used to communicate with the client. + * @private + */ + private registerEventHandlers = (socket: Socket) => { + socket.on('save', async (fileName: string) => { + logger.log('debug', `Saving workflow ${fileName}`); + await this.saveNewWorkflow(fileName) + }); + socket.on('new-recording', () => this.workflowRecord = { + workflow: [], + }); + socket.on('activeIndex', (data) => this.generatedData.lastIndex = parseInt(data)); + socket.on('decision', async ({ pair, actionType, decision }) => { + const id = browserPool.getActiveBrowserId(); + if (id) { + const activeBrowser = browserPool.getRemoteBrowser(id); + const currentPage = activeBrowser?.getCurrentPage(); + if (decision) { + switch (actionType) { + case 'customAction': + pair.where.selectors = [this.generatedData.lastUsedSelector]; + break; + default: break; + } + } + if (currentPage) { + await this.addPairToWorkflowAndNotifyClient(pair, currentPage); + } + } + }) + socket.on('updatePair', (data) => { + this.updatePairInWorkflow(data.index, data.pair); + }) + }; + + /** + * Adds a newly generated pair to the workflow and notifies the client about it by + * sending the updated workflow through socket. + * + * Checks some conditions for the correct addition of the pair. + * 1. The pair's action selector is already in the workflow as a different pair's where selector + * If so, the what part of the pair is added to the pair with the same where selector. + * 2. The pair's where selector is located on the page at the same time as another pair's where selector, + * having the same url. This state is called over-shadowing an already existing pair. + * If so, the pair is merged with the previous over-shadowed pair - what part is attached and + * new selector added to the where selectors. In case the over-shadowed pair is further down the + * workflow array, the new pair is added to the beginning of the workflow array. + * + * This function also makes sure to add a waitForLoadState and a generated flag + * action after every new action or pair added. The [waitForLoadState](https://playwright.dev/docs/api/class-frame#frame-wait-for-load-state) + * action waits for the networkidle event to be fired, + * and the generated flag action is used for making pausing the interpretation possible. + * + * @param pair The pair to add to the workflow. + * @param page The page to use for the state checking. + * @private + * @returns {Promise} + */ + private addPairToWorkflowAndNotifyClient = async (pair: WhereWhatPair, page: Page) => { + let matched = false; + // validate if a pair with the same where conditions is already present in the workflow + if (pair.where.selectors && pair.where.selectors[0]) { + const match = selectorAlreadyInWorkflow(pair.where.selectors[0], this.workflowRecord.workflow); + if (match) { + // if a match of where conditions is found, the new action is added into the matched rule + const matchedIndex = this.workflowRecord.workflow.indexOf(match); + if (pair.what[0].action !== 'waitForLoadState' && pair.what[0].action !== 'press') { + pair.what.push({ + action: 'waitForLoadState', + args: ['networkidle'], + }) + } + this.workflowRecord.workflow[matchedIndex].what = this.workflowRecord.workflow[matchedIndex].what.concat(pair.what); + logger.log('info', `Pushed ${JSON.stringify(this.workflowRecord.workflow[matchedIndex])} to workflow pair`); + matched = true; + } + } + // is the where conditions of the pair are not already in the workflow, we need to validate the where conditions + // for possible overshadowing of different rules and handle cases according to the recording logic + if (!matched) { + const handled = await this.handleOverShadowing(pair, page, this.generatedData.lastIndex || 0); + if (!handled) { + //adding waitForLoadState with networkidle, for better success rate of automatically recorded workflows + if (pair.what[0].action !== 'waitForLoadState' && pair.what[0].action !== 'press') { + pair.what.push({ + action: 'waitForLoadState', + args: ['networkidle'], + }) + } + if (this.generatedData.lastIndex === 0) { + this.generatedData.lastIndex = null; + // we want to have the most specific selectors at the beginning of the workflow + this.workflowRecord.workflow.unshift(pair); + } else { + this.workflowRecord.workflow.splice(this.generatedData.lastIndex || 0, 0, pair); + if (this.generatedData.lastIndex) { + this.generatedData.lastIndex = this.generatedData.lastIndex - 1; + } + } + logger.log('info', + `${JSON.stringify(pair)}: Added to workflow file on index: ${this.generatedData.lastIndex || 0}`); + } else { + logger.log('debug', + ` ${JSON.stringify(this.workflowRecord.workflow[this.generatedData.lastIndex || 0])} added action to workflow pair`); + } + } + this.socket.emit('workflow', this.workflowRecord); + logger.log('info', `Workflow emitted`); + }; + + /** + * Generates a pair for the click event. + * @param coordinates The coordinates of the click event. + * @param page The page to use for obtaining the needed data. + * @returns {Promise} + */ + public onClick = async (coordinates: Coordinates, page: Page) => { + let where: WhereWhatPair["where"] = { url: this.getBestUrl(page.url()) }; + const selector = await this.generateSelector(page, coordinates, ActionType.Click); + logger.log('debug', `Element's selector: ${selector}`); + //const element = await getElementMouseIsOver(page, coordinates); + //logger.log('debug', `Element: ${JSON.stringify(element, null, 2)}`); + if (selector) { + where.selectors = [selector]; + } + const pair: WhereWhatPair = { + where, + what: [{ + action: 'click', + args: [selector], + }], + } + if (selector) { + this.generatedData.lastUsedSelector = selector; + this.generatedData.lastAction = 'click'; + } + await this.addPairToWorkflowAndNotifyClient(pair, page); + }; + + /** + * Generates a pair for the change url event. + * @param newUrl The new url to be changed to. + * @param page The page to use for obtaining the needed data. + * @returns {Promise} + */ + public onChangeUrl = async (newUrl: string, page: Page) => { + this.generatedData.lastUsedSelector = ''; + const pair: WhereWhatPair = { + where: { url: this.getBestUrl(page.url()) }, + what: [ + { + action: 'goto', + args: [newUrl], + } + ], + } + await this.addPairToWorkflowAndNotifyClient(pair, page); + }; + + /** + * Generates a pair for the keypress event. + * @param key The key to be pressed. + * @param coordinates The coordinates of the keypress event. + * @param page The page to use for obtaining the needed data. + * @returns {Promise} + */ + public onKeyboardInput = async (key: string, coordinates: Coordinates, page: Page) => { + let where: WhereWhatPair["where"] = { url: this.getBestUrl(page.url()) }; + const selector = await this.generateSelector(page, coordinates, ActionType.Keydown); + if (selector) { + where.selectors = [selector]; + } + const pair: WhereWhatPair = { + where, + what: [{ + action: 'press', + args: [selector, key], + }], + } + if (selector) { + this.generatedData.lastUsedSelector = selector; + this.generatedData.lastAction = 'press'; + } + await this.addPairToWorkflowAndNotifyClient(pair, page); + }; + + /** + * Generates a pair for the custom action event. + * @param action The type of the custom action. + * @param settings The settings of the custom action. + * @param page The page to use for obtaining the needed data. + */ + public customAction = async (action: CustomActions, settings: any, page: Page) => { + const pair: WhereWhatPair = { + where: { url: this.getBestUrl(page.url()) }, + what: [{ + action, + args: settings ? Array.isArray(settings) ? settings : [settings] : [], + }], + } + + if (this.generatedData.lastUsedSelector) { + this.socket.emit('decision', { + pair, actionType: 'customAction', + lastData: { + selector: this.generatedData.lastUsedSelector, + action: this.generatedData.lastAction, + } + }); + } else { + await this.addPairToWorkflowAndNotifyClient(pair, page); + } + }; + + /** + * Returns the currently generated workflow. + * @returns {WorkflowFile} + */ + public getWorkflowFile = () => { + return this.workflowRecord; + }; + + /** + * Removes a pair from the currently generated workflow. + * @param index The index of the pair to be removed. + * @returns void + */ + public removePairFromWorkflow = (index: number) => { + if (index <= this.workflowRecord.workflow.length && index >= 0) { + this.workflowRecord.workflow.splice(this.workflowRecord.workflow.length - (index + 1), 1); + logger.log('debug', `pair ${index}: Removed from workflow file.`); + } else { + logger.log('error', `Delete pair ${index}: Index out of range.`); + } + }; + + /** + * Adds a new pair to the currently generated workflow. + * @param index The index on which the pair should be added. + * @param pair The pair to be added. + * @returns void + */ + public addPairToWorkflow = (index: number, pair: WhereWhatPair) => { + if (index === this.workflowRecord.workflow.length) { + this.workflowRecord.workflow.unshift(pair); + logger.log('debug', `pair ${index}: Added to workflow file.`); + } else if (index < this.workflowRecord.workflow.length && index >= 0) { + this.workflowRecord.workflow.splice( + this.workflowRecord.workflow.length - index, 0, pair); + } else { + logger.log('error', `Add pair ${index}: Index out of range.`); + } + }; + + /** + * Updates a pair in the currently generated workflow. + * @param index The index of the pair to be updated. + * @param pair The pair to be used as a replacement. + * @returns void + */ + public updatePairInWorkflow = (index: number, pair: WhereWhatPair) => { + if (index <= this.workflowRecord.workflow.length && index >= 0) { + this.workflowRecord.workflow[this.workflowRecord.workflow.length - (index + 1)] = pair; + } else { + logger.log('error', `Update pair ${index}: Index out of range.`); + } + }; + + /** + * Updates the socket used for communication with the client. + * @param socket The socket to be used for communication. + * @returns void + */ + public updateSocket = (socket: Socket): void => { + this.socket = socket; + this.registerEventHandlers(socket); + }; + + /** + * Returns the currently generated workflow without all the generated flag actions. + * @param workflow The workflow for removing the generated flag actions from. + * @private + * @returns {WorkflowFile} + */ + private removeAllGeneratedFlags = (workflow: WorkflowFile): WorkflowFile => { + for (let i = 0; i < workflow.workflow.length; i++) { + if ( + workflow.workflow[i].what[0] && + workflow.workflow[i].what[0].action === 'flag' && + workflow.workflow[i].what[0].args?.includes('generated')) { + workflow.workflow[i].what.splice(0, 1); + } + } + return workflow; + }; + + /** + * Adds generated flag actions to the workflow's pairs' what conditions. + * @param workflow The workflow for adding the generated flag actions from. + * @private + * @returns {WorkflowFile} + */ + public AddGeneratedFlags = (workflow: WorkflowFile): WorkflowFile => { + const copy = JSON.parse(JSON.stringify(workflow)); + for (let i = 0; i < workflow.workflow.length; i++) { + copy.workflow[i].what.unshift({ + action: 'flag', + args: ['generated'], + }); + } + return copy; + }; + + /** + * Enables to update the generated workflow file. + * Adds a generated flag action for possible pausing during the interpretation. + * Used for loading a recorded workflow to already initialized Generator. + * @param workflowFile The workflow file to be used as a replacement for the current generated workflow. + * @returns void + */ + public updateWorkflowFile = (workflowFile: WorkflowFile, meta: MetaData) => { + this.recordingMeta = meta; + const params = this.checkWorkflowForParams(workflowFile); + if (params) { + this.recordingMeta.params = params; + } + this.workflowRecord = workflowFile; + } + + /** + * Creates a recording metadata and stores the curren workflow + * with the metadata to the file system. + * @param fileName The name of the file. + * @returns {Promise} + */ + public saveNewWorkflow = async (fileName: string) => { + const recording = this.optimizeWorkflow(this.workflowRecord); + try { + this.recordingMeta = { + name: fileName, + create_date: this.recordingMeta.create_date || new Date().toLocaleString(), + pairs: recording.workflow.length, + update_date: new Date().toLocaleString(), + params: this.getParams() || [], + } + fs.mkdirSync('../storage/recordings', { recursive: true }) + await saveFile( + `../storage/recordings/${fileName}.waw.json`, + JSON.stringify({ recording_meta: this.recordingMeta, recording }, null, 2) + ); + } + catch (e) { + const { message } = e as Error; + logger.log('warn', `Cannot save the file to the local file system`) + } + this.socket.emit('fileSaved'); + } + + /** + * Uses a system of functions to generate a correct and unique css selector + * according to the action being performed. + * @param page The page to be used for obtaining the information and selector. + * @param coordinates The coordinates of the element. + * @param action The action for which the selector is being generated. + * @private + * @returns {Promise} + */ + private generateSelector = async (page: Page, coordinates: Coordinates, action: ActionType) => { + const elementInfo = await getElementInformation(page, coordinates); + const bestSelector = getBestSelectorForAction( + { + type: action, + tagName: elementInfo?.tagName as TagName || '', + inputType: undefined, + value: undefined, + selectors: await getSelectors(page, coordinates) || {}, + timestamp: 0, + isPassword: false, + hasOnlyText: elementInfo?.hasOnlyText || false, + } as Action, + ); + return bestSelector; + } + + /** + * Generates data for highlighting the element on client side and emits the + * highlighter event to the client. + * @param page The page to be used for obtaining data. + * @param coordinates The coordinates of the element. + * @returns {Promise} + */ + public generateDataForHighlighter = async (page: Page, coordinates: Coordinates) => { + const rect = await getRect(page, coordinates); + const displaySelector = await this.generateSelector(page, coordinates, ActionType.Click); + if (rect) { + this.socket.emit('highlighter', { rect, selector: displaySelector }); + } + } + + /** + * Notifies the client about the change of the url if navigation + * happens after some performed action. + * @param url The new url. + * @param fromNavBar Whether the navigation is from the simulated browser's navbar or not. + * @returns void + */ + public notifyUrlChange = (url: string) => { + if (this.socket) { + this.socket.emit('urlChanged', url); + } + } + + /** + * Notifies the client about the new tab if popped-up + * @param page The page to be used for obtaining data. + * @param pageIndex The index of the page. + * @returns void + */ + public notifyOnNewTab = (page: Page, pageIndex: number) => { + if (this.socket) { + page.on('close', () => { + this.socket.emit('tabHasBeenClosed', pageIndex); + }) + const parsedUrl = new URL(page.url()); + const host = parsedUrl.hostname?.match(/\b(?!www\.)[a-zA-Z0-9]+/g)?.join('.'); + this.socket.emit('newTab', host ? host : 'new tab') + } + } + + /** + * Generates a pair for navigating to the previous page. + * This function alone adds the pair to the workflow and notifies the client. + * It's safe to always add a go back action to the first rule in the workflow and do not check + * general conditions for adding a pair to the workflow. + * @param newUrl The previous page's url. + * @returns void + */ + public onGoBack = (newUrl: string) => { + //it's safe to always add a go back action to the first rule in the workflow + this.workflowRecord.workflow[0].what.push({ + action: 'goBack', + args: [{ waitUntil: 'commit' }], + }); + this.notifyUrlChange(newUrl); + this.socket.emit('workflow', this.workflowRecord); + } + + /** + * Generates a pair for navigating to the next page. + * This function alone adds the pair to the workflow and notifies the client. + * It's safe to always add a go forward action to the first rule in the workflow and do not check + * general conditions for adding a pair to the workflow. + * @param newUrl The next page's url. + * @returns void + */ + public onGoForward = (newUrl: string) => { + //it's safe to always add a go forward action to the first rule in the workflow + this.workflowRecord.workflow[0].what.push({ + action: 'goForward', + args: [{ waitUntil: 'commit' }], + }); + this.notifyUrlChange(newUrl); + this.socket.emit('workflow', this.workflowRecord); + } + + /** + * Checks and returns possible pairs that would get over-shadowed by the pair + * from the current workflow. + * @param pair The pair that could be over-shadowing. + * @param page The page to be used for checking the visibility and accessibility of the selectors. + * @private + * @returns {Promise} + */ + private IsOverShadowingAction = async (pair: WhereWhatPair, page: Page) => { + type possibleOverShadow = { + index: number; + isOverShadowing: boolean; + } + + const possibleOverShadow: possibleOverShadow[] = []; + const haveSameUrl = this.workflowRecord.workflow + .filter((p, index) => { + if (p.where.url === pair.where.url) { + possibleOverShadow.push({ index: index, isOverShadowing: false }); + return true; + } else { + return false; + } + }); + + if (haveSameUrl.length !== 0) { + for (let i = 0; i < haveSameUrl.length; i++) { + //@ts-ignore + if (haveSameUrl[i].where.selectors && haveSameUrl[i].where.selectors.length > 0) { + //@ts-ignore + const isOverShadowing = await isRuleOvershadowing(haveSameUrl[i].where.selectors, page); + if (isOverShadowing) { + possibleOverShadow[i].isOverShadowing = true; + } + } + } + } + return possibleOverShadow; + } + + + /** + * General over-shadowing handler. + * Checks for possible over-shadowed pairs and if found, + * adds the pair to the workflow in the correct way. + * @param pair The pair that could be over-shadowing. + * @param page The page to be used for checking the visibility and accessibility of the selectors. + * @private + * @returns {Promise} + */ + private handleOverShadowing = async (pair: WhereWhatPair, page: Page, index: number): Promise => { + const overShadowing = (await this.IsOverShadowingAction(pair, page)) + .filter((p) => p.isOverShadowing); + if (overShadowing.length !== 0) { + for (const overShadowedAction of overShadowing) { + if (overShadowedAction.index === index) { + if (pair.where.selectors) { + for (const selector of pair.where.selectors) { + if (this.workflowRecord.workflow[index].where.selectors?.includes(selector)) { + break; + } else { + // add new selector to the where part of the overshadowing pair + this.workflowRecord.workflow[index].where.selectors?.push(selector); + } + } + } + // push the action automatically to the first/the closest rule which would be overShadowed + this.workflowRecord.workflow[index].what = + this.workflowRecord.workflow[index].what.concat(pair.what); + return true; + } else { + // notify client about overshadowing a further rule + return false; + } + } + } + return false; + } + + /** + * Returns the best possible url representation for a where condition according to the heuristics. + * @param url The url to be checked and possibly replaced. + * @private + * @returns {string | {$regex: string}} + */ + private getBestUrl = (url: string) => { + const parsedUrl = new URL(url); + const protocol = parsedUrl.protocol === 'https:' || parsedUrl.protocol === 'http:' ? `${parsedUrl.protocol}//` : parsedUrl.protocol; + const regex = new RegExp(/(?=.*[A-Z])/g) + // remove all params with uppercase letters, they are most likely dynamically generated + // also escapes all regex characters from the params + const search = parsedUrl.search + .replace(/[.*+?^${}()|[\]\\]/g, '\\$&') + .split('&').map((param, index) => { + if (!regex.test(param)) { + return param; + } else { + return '.*'; + } + }) + .join('&'); + let bestUrl; + if (search) { + bestUrl = { + $regex: `^${protocol}${parsedUrl.host}${parsedUrl.pathname}${search}${parsedUrl.hash}` + } + } else { + bestUrl = `${protocol}${parsedUrl.host}${parsedUrl.pathname}${parsedUrl.hash}`; + } + return bestUrl; + } + + /** + * Returns parameters if present in the workflow or null. + * @param workflow The workflow to be checked. + */ + private checkWorkflowForParams = (workflow: WorkflowFile): string[] | null => { + // for now the where condition cannot have any params, so we're checking only what part of the pair + // where only the args part of what condition can have a parameter + for (const pair of workflow.workflow) { + for (const condition of pair.what) { + if (condition.args) { + const params: any[] = []; + condition.args.forEach((arg) => { + if (arg.$param) { + params.push(arg.$param); + } + }) + if (params.length !== 0) { + return params; + } + } + } + } + return null; + } + + /** + * A function for workflow optimization once finished. + * @param workflow The workflow to be optimized. + */ + private optimizeWorkflow = (workflow: WorkflowFile) => { + + // replace a sequence of press actions by a single fill action + let input = { + selector: '', + value: '', + actionCounter: 0, + }; + + const pushTheOptimizedAction = (pair: WhereWhatPair, index: number) => { + if (input.value.length === 1) { + // when only one press action is present, keep it and add a waitForLoadState action + pair.what.splice(index + 1, 0, { + action: 'waitForLoadState', + args: ['networkidle'], + }) + } else { + // when more than one press action is present, add a type action + pair.what.splice(index - input.actionCounter, input.actionCounter, { + action: 'type', + args: [input.selector, input.value], + }, { + action: 'waitForLoadState', + args: ['networkidle'], + }); + } + } + + + for (const pair of workflow.workflow) { + pair.what.forEach((condition, index) => { + if (condition.action === 'press') { + if (condition.args && condition.args[1]) { + if (!input.selector) { + input.selector = condition.args[0]; + } + if (input.selector === condition.args[0]) { + input.actionCounter++; + if (condition.args[1].length === 1) { + input.value = input.value + condition.args[1]; + } else if (condition.args[1] === 'Backspace') { + input.value = input.value.slice(0, -1); + } else if (condition.args[1] !== 'Shift') { + pushTheOptimizedAction(pair, index); + pair.what.splice(index + 1, 0, { + action: 'waitForLoadState', + args: ['networkidle'], + }) + input = { selector: '', value: '', actionCounter: 0 }; + } + } else { + pushTheOptimizedAction(pair, index); + input = { + selector: condition.args[0], + value: condition.args[1], + actionCounter: 1, + }; + } + } + } else { + if (input.value.length !== 0) { + pushTheOptimizedAction(pair, index); + // clear the input + input = { selector: '', value: '', actionCounter: 0 }; + } + } + }); + } + return workflow; + } + + /** + * Returns workflow params from the stored metadata. + */ + public getParams = (): string[] | null => { + return this.checkWorkflowForParams(this.workflowRecord); + } + + /** + * Clears the last generated data index. + */ + public clearLastIndex = () => { + this.generatedData.lastIndex = null; + } +}