Files
parcer/server/src/workflow-management/classes/Generator.ts

1072 lines
36 KiB
TypeScript
Raw Normal View History

2024-12-18 18:17:50 +05:30
import { Action, ActionType, Coordinates, TagName, DatePickerEventData } from "../../types";
2024-07-31 22:46:19 +05:30
import { WhereWhatPair, WorkflowFile } from 'maxun-core';
2024-06-07 23:19:43 +05:30
import logger from "../../logger";
import { Socket } from "socket.io";
import { Page } from "playwright";
import {
getElementInformation,
getRect,
getSelectors,
2024-09-02 22:06:48 +05:30
getChildSelectors,
getNonUniqueSelectors,
2024-06-07 23:19:43 +05:30
isRuleOvershadowing,
selectorAlreadyInWorkflow
} from "../selector";
import { CustomActions } from "../../../../src/shared/types";
import { workflow } from "../../routes";
import Robot from "../../models/Robot";
import Run from "../../models/Run";
2024-06-07 23:19:43 +05:30
import { saveFile } from "../storage";
import fs from "fs";
import { getBestSelectorForAction } from "../utils";
import { browserPool } from "../../server";
2024-10-08 20:48:18 +05:30
import { uuid } from "uuidv4";
2024-10-29 03:46:13 +05:30
import { capture } from "../../utils/analytics"
import { encrypt } from "../../utils/auth";
2024-06-07 23:19:43 +05:30
2024-06-07 23:24:18 +05:30
interface PersistedGeneratedData {
lastUsedSelector: string;
2024-06-08 00:46:59 +05:30
lastIndex: number | null;
2024-06-07 23:24:18 +05:30
lastAction: string;
lastUsedSelectorTagName: string;
lastUsedSelectorInnerText: string;
2024-06-07 23:24:18 +05:30
}
2024-06-07 23:19:43 +05:30
interface MetaData {
name: string;
2024-10-08 20:46:27 +05:30
id: string;
2024-10-08 20:49:29 +05:30
createdAt: string;
2024-06-07 23:19:43 +05:30
pairs: number;
2024-10-08 20:51:56 +05:30
updatedAt: string;
2024-06-07 23:19:43 +05:30
params: string[],
}
2024-06-07 23:26:57 +05:30
/**
* Workflow generator is used to transform the user's interactions into an automatically
* generated correct workflows, using the ability of internal state persistence and
* heuristic generative algorithms.
* This class also takes care of the selector generation.
* @category WorkflowManagement
*/
2024-06-07 23:26:08 +05:30
export class WorkflowGenerator {
2024-06-07 23:27:11 +05:30
/**
* The socket used to communicate with the client.
* @private
*/
2024-06-08 00:46:59 +05:30
private socket: Socket;
2024-06-07 23:26:08 +05:30
2024-08-08 06:26:27 +05:30
/**
* getList is one of the custom actions from maxun-core.
* Used to provide appropriate selectors for the getList action.
*/
2024-08-08 05:27:50 +05:30
private getList: boolean = false;
2024-09-03 10:06:59 +05:30
private listSelector: string = '';
private paginationMode: boolean = false;
2024-06-07 23:27:11 +05:30
/**
* The public constructor of the WorkflowGenerator.
* Takes socket for communication as a parameter and registers some important events on it.
* @param socket The socket used to communicate with the client.
* @constructor
*/
2024-06-07 23:26:08 +05:30
public constructor(socket: Socket) {
this.socket = socket;
this.registerEventHandlers(socket);
2024-08-08 05:27:50 +05:30
this.initializeSocketListeners();
2024-06-07 23:26:08 +05:30
}
2024-06-07 23:28:15 +05:30
/**
* The current workflow being recorded.
* @private
*/
2024-06-07 23:28:05 +05:30
private workflowRecord: WorkflowFile = {
workflow: [],
};
2024-06-07 23:28:15 +05:30
/**
* Metadata of the currently recorded workflow.
* @private
*/
2024-06-07 23:28:05 +05:30
private recordingMeta: MetaData = {
name: '',
2024-10-08 20:46:27 +05:30
id: '',
2024-10-08 20:49:29 +05:30
createdAt: '',
2024-06-07 23:28:05 +05:30
pairs: 0,
2024-10-08 20:51:56 +05:30
updatedAt: '',
2024-06-07 23:28:05 +05:30
params: [],
}
2024-06-07 23:44:04 +05:30
/**
* The persistent data from the whole workflow generation process.
* Used for correct generation of other user inputs.
* @private
*/
private generatedData: PersistedGeneratedData = {
lastUsedSelector: '',
lastIndex: null,
lastAction: '',
2024-09-23 17:25:39 +05:30
lastUsedSelectorTagName: '',
lastUsedSelectorInnerText: '',
2024-06-07 23:44:04 +05:30
}
2024-08-08 06:27:02 +05:30
/**
* Initializes the socket listeners for the generator.
*/
2024-08-08 05:27:50 +05:30
private initializeSocketListeners() {
this.socket.on('setGetList', (data: { getList: boolean }) => {
this.getList = data.getList;
});
this.socket.on('listSelector', (data: { selector: string }) => {
this.listSelector = data.selector;
2024-09-03 10:08:42 +05:30
})
this.socket.on('setPaginationMode', (data: { pagination: boolean }) => {
this.paginationMode = data.pagination;
})
2024-08-08 05:27:50 +05:30
}
2024-06-07 23:53:17 +05:30
/**
* Registers the event handlers for all generator-related events on the socket.
* @param socket The socket used to communicate with the client.
* @private
*/
2024-06-07 23:45:50 +05:30
private registerEventHandlers = (socket: Socket) => {
2024-10-21 23:07:28 +05:30
socket.on('save', (data) => {
const { fileName, userId } = data;
logger.log('debug', `Saving workflow ${fileName} for user ID ${userId}`);
this.saveNewWorkflow(fileName, userId);
});
2024-06-07 23:48:08 +05:30
socket.on('new-recording', () => this.workflowRecord = {
workflow: [],
2024-06-08 00:46:59 +05:30
});
2024-06-07 23:48:30 +05:30
socket.on('activeIndex', (data) => this.generatedData.lastIndex = parseInt(data));
2024-06-08 00:46:59 +05:30
socket.on('decision', async ({ pair, actionType, decision }) => {
const id = browserPool.getActiveBrowserId();
2024-06-07 23:50:11 +05:30
if (id) {
// const activeBrowser = browserPool.getRemoteBrowser(id);
// const currentPage = activeBrowser?.getCurrentPage();
if (!decision) {
2024-06-07 23:50:11 +05:30
switch (actionType) {
case 'customAction':
// pair.where.selectors = [this.generatedData.lastUsedSelector];
pair.where.selectors = pair.where.selectors.filter(
(selector: string) => selector !== this.generatedData.lastUsedSelector
);
2024-06-07 23:50:11 +05:30
break;
2024-06-07 23:50:26 +05:30
default: break;
2024-06-07 23:50:11 +05:30
}
}
// if (currentPage) {
// await this.addPairToWorkflowAndNotifyClient(pair, currentPage);
// }
2024-06-07 23:50:11 +05:30
}
})
2024-06-07 23:51:02 +05:30
socket.on('updatePair', (data) => {
this.updatePairInWorkflow(data.index, data.pair);
})
2024-06-07 23:45:50 +05:30
};
private async getSelectorsForSchema(page: Page, schema: Record<string, { selector: string }>): Promise<string[]> {
const selectors = Object.values(schema).map((field) => field.selector);
// Verify if the selectors are present and actionable on the current page
const actionableSelectors: string[] = [];
for (const selector of selectors) {
const isActionable = await page.isVisible(selector).catch(() => false);
if (isActionable) {
actionableSelectors.push(selector);
}
}
return actionableSelectors;
}
/**
* Adds a newly generated pair to the workflow and notifies the client about it by
* sending the updated workflow through socket.
*
* Checks some conditions for the correct addition of the pair.
* 1. The pair's action selector is already in the workflow as a different pair's where selector
* If so, the what part of the pair is added to the pair with the same where selector.
* 2. The pair's where selector is located on the page at the same time as another pair's where selector,
* having the same url. This state is called over-shadowing an already existing pair.
* If so, the pair is merged with the previous over-shadowed pair - what part is attached and
* new selector added to the where selectors. In case the over-shadowed pair is further down the
* workflow array, the new pair is added to the beginning of the workflow array.
*
* This function also makes sure to add a waitForLoadState and a generated flag
* action after every new action or pair added. The [waitForLoadState](https://playwright.dev/docs/api/class-frame#frame-wait-for-load-state)
* action waits for the networkidle event to be fired,
* and the generated flag action is used for making pausing the interpretation possible.
*
* @param pair The pair to add to the workflow.
* @param page The page to use for the state checking.
* @private
* @returns {Promise<void>}
*/
2024-06-08 00:46:59 +05:30
private addPairToWorkflowAndNotifyClient = async (pair: WhereWhatPair, page: Page) => {
let matched = false;
// Check for scrapeSchema actions and enhance the where condition
if (pair.what[0].action === 'scrapeSchema') {
const schema = pair.what[0]?.args?.[0];
if (schema) {
const additionalSelectors = await this.getSelectorsForSchema(page, schema);
pair.where.selectors = [...(pair.where.selectors || []), ...additionalSelectors];
}
}
// Validate if the pair is already in the workflow
if (pair.where.selectors && pair.where.selectors[0]) {
const match = selectorAlreadyInWorkflow(pair.where.selectors[0], this.workflowRecord.workflow);
if (match) {
const matchedIndex = this.workflowRecord.workflow.indexOf(match);
if (pair.what[0].action !== 'waitForLoadState' && pair.what[0].action !== 'press') {
pair.what.push({
action: 'waitForLoadState',
args: ['networkidle'],
});
}
this.workflowRecord.workflow[matchedIndex].what = this.workflowRecord.workflow[matchedIndex].what.concat(pair.what);
matched = true;
}
}
// Handle cases where the where condition isn't already present
if (!matched) {
const handled = await this.handleOverShadowing(pair, page, this.generatedData.lastIndex || 0);
if (!handled) {
if (pair.what[0].action !== 'waitForLoadState' && pair.what[0].action !== 'press') {
pair.what.push({
action: 'waitForLoadState',
args: ['networkidle'],
});
}
if (this.generatedData.lastIndex === 0) {
this.generatedData.lastIndex = null;
this.workflowRecord.workflow.unshift(pair);
} else {
this.workflowRecord.workflow.splice(this.generatedData.lastIndex || 0, 0, pair);
if (this.generatedData.lastIndex) {
this.generatedData.lastIndex -= 1;
}
}
}
}
// Emit the updated workflow to the client
2024-06-07 23:59:10 +05:30
this.socket.emit('workflow', this.workflowRecord);
2024-06-08 00:46:59 +05:30
logger.log('info', `Workflow emitted`);
};
2024-12-18 18:17:50 +05:30
public onDateSelection = async (page: Page, data: DatePickerEventData) => {
const { selector, value } = data;
try {
await page.fill(selector, value);
} catch (error) {
console.error("Failed to fill date value:", error);
}
const pair: WhereWhatPair = {
where: { url: this.getBestUrl(page.url()) },
what: [{
action: 'fill',
args: [selector, value],
}],
};
await this.addPairToWorkflowAndNotifyClient(pair, page);
};
public onDropdownSelection = async (page: Page, data: { selector: string, value: string }) => {
const { selector, value } = data;
try {
await page.selectOption(selector, value);
} catch (error) {
console.error("Failed to fill date value:", error);
}
const pair: WhereWhatPair = {
where: { url: this.getBestUrl(page.url()) },
what: [{
action: 'selectOption',
args: [selector, value],
}],
};
await this.addPairToWorkflowAndNotifyClient(pair, page);
};
public onTimeSelection = async (page: Page, data: { selector: string, value: string }) => {
const { selector, value } = data;
try {
await page.fill(selector, value);
} catch (error) {
console.error("Failed to set time value:", error);
}
const pair: WhereWhatPair = {
where: { url: this.getBestUrl(page.url()) },
what: [{
action: 'fill',
args: [selector, value],
}],
};
await this.addPairToWorkflowAndNotifyClient(pair, page);
};
public onDateTimeLocalSelection = async (page: Page, data: { selector: string, value: string }) => {
const { selector, value } = data;
try {
await page.fill(selector, value);
} catch (error) {
console.error("Failed to fill datetime-local value:", error);
}
const pair: WhereWhatPair = {
where: { url: this.getBestUrl(page.url()) },
what: [{
action: 'fill',
args: [selector, value],
}],
};
await this.addPairToWorkflowAndNotifyClient(pair, page);
};
2024-06-08 00:08:51 +05:30
/**
* Generates a pair for the click event.
* @param coordinates The coordinates of the click event.
* @param page The page to use for obtaining the needed data.
* @returns {Promise<void>}
*/
2024-06-08 00:04:54 +05:30
public onClick = async (coordinates: Coordinates, page: Page) => {
let where: WhereWhatPair["where"] = { url: this.getBestUrl(page.url()) };
const selector = await this.generateSelector(page, coordinates, ActionType.Click);
2024-06-08 00:05:37 +05:30
logger.log('debug', `Element's selector: ${selector}`);
2024-12-18 18:17:50 +05:30
const elementInfo = await getElementInformation(page, coordinates, '', false);
console.log("Element info: ", elementInfo);
// Check if clicked element is a select dropdown
const isDropdown = elementInfo?.tagName === 'SELECT';
if (isDropdown && elementInfo.innerHTML) {
// Parse options from innerHTML
const options = elementInfo.innerHTML
.split('<option')
.slice(1) // Remove first empty element
.map(optionHtml => {
const valueMatch = optionHtml.match(/value="([^"]*)"/);
const disabledMatch = optionHtml.includes('disabled="disabled"');
const selectedMatch = optionHtml.includes('selected="selected"');
// Extract text content between > and </option>
const textMatch = optionHtml.match(/>([^<]*)</);
const text = textMatch
? textMatch[1]
.replace(/\n/g, '') // Remove all newlines
.replace(/\s+/g, ' ') // Replace multiple spaces with single space
.trim()
: '';
return {
value: valueMatch ? valueMatch[1] : '',
text,
disabled: disabledMatch,
selected: selectedMatch
};
});
// Notify client to show dropdown overlay
this.socket.emit('showDropdown', {
coordinates,
selector,
options
});
return;
}
2024-12-18 18:17:50 +05:30
// Check if clicked element is a date input
const isDateInput = elementInfo?.tagName === 'INPUT' && elementInfo?.attributes?.type === 'date';
2024-12-18 18:17:50 +05:30
if (isDateInput) {
// Notify client to show datepicker overlay
this.socket.emit('showDatePicker', {
coordinates,
selector
});
return;
}
const isTimeInput = elementInfo?.tagName === 'INPUT' && elementInfo?.attributes?.type === 'time';
if (isTimeInput) {
this.socket.emit('showTimePicker', {
coordinates,
selector
});
return;
}
const isDateTimeLocal = elementInfo?.tagName === 'INPUT' && elementInfo?.attributes?.type === 'datetime-local';
if (isDateTimeLocal) {
this.socket.emit('showDateTimePicker', {
coordinates,
selector
});
return;
}
2024-06-08 00:05:37 +05:30
//const element = await getElementMouseIsOver(page, coordinates);
//logger.log('debug', `Element: ${JSON.stringify(element, null, 2)}`);
if (selector) {
where.selectors = [selector];
}
const pair: WhereWhatPair = {
where,
what: [{
action: 'click',
args: [selector],
}],
}
if (selector) {
this.generatedData.lastUsedSelector = selector;
this.generatedData.lastAction = 'click';
}
await this.addPairToWorkflowAndNotifyClient(pair, page);
2024-06-08 00:04:54 +05:30
};
2024-06-08 00:09:49 +05:30
/**
* Generates a pair for the change url event.
* @param newUrl The new url to be changed to.
* @param page The page to use for obtaining the needed data.
* @returns {Promise<void>}
*/
2024-06-08 00:46:59 +05:30
public onChangeUrl = async (newUrl: string, page: Page) => {
this.generatedData.lastUsedSelector = '';
const pair: WhereWhatPair = {
where: { url: this.getBestUrl(page.url()) },
what: [
{
2024-06-08 00:46:59 +05:30
action: 'goto',
args: [newUrl],
}
],
}
await this.addPairToWorkflowAndNotifyClient(pair, page);
};
2024-06-08 00:11:36 +05:30
/**
* Generates a pair for the keypress event.
* @param key The key to be pressed.
* @param coordinates The coordinates of the keypress event.
* @param page The page to use for obtaining the needed data.
* @returns {Promise<void>}
*/
public onKeyboardInput = async (key: string, coordinates: Coordinates, page: Page) => {
let where: WhereWhatPair["where"] = { url: this.getBestUrl(page.url()) };
const selector = await this.generateSelector(page, coordinates, ActionType.Keydown);
if (selector) {
where.selectors = [selector];
}
const pair: WhereWhatPair = {
where,
what: [{
action: 'press',
args: [selector, encrypt(key)],
}],
}
if (selector) {
this.generatedData.lastUsedSelector = selector;
this.generatedData.lastAction = 'press';
}
await this.addPairToWorkflowAndNotifyClient(pair, page);
};
2024-09-23 17:57:50 +05:30
/**
* Returns tag name and text content for the specified selector
* used in customAction for decision modal
*/
2024-09-23 17:24:36 +05:30
private async getLastUsedSelectorInfo(page: Page, selector: string) {
const elementHandle = await page.$(selector);
if (elementHandle) {
const tagName = await elementHandle.evaluate(el => (el as HTMLElement).tagName);
2024-09-23 17:58:45 +05:30
// TODO: based on tagName, send data. Always innerText won't hold true. For now, can roll.
2024-09-23 17:24:36 +05:30
const innerText = await elementHandle.evaluate(el => (el as HTMLElement).innerText);
2024-09-23 17:26:45 +05:30
2024-09-23 17:24:36 +05:30
return { tagName, innerText };
}
2024-09-23 17:26:45 +05:30
return { tagName: '', innerText: '' };
2024-09-23 17:24:36 +05:30
}
2024-06-08 00:12:59 +05:30
/**
* Generates a pair for the custom action event.
* @param action The type of the custom action.
* @param settings The settings of the custom action.
* @param page The page to use for obtaining the needed data.
*/
public customAction = async (action: CustomActions, settings: any, page: Page) => {
const pair: WhereWhatPair = {
2024-06-08 00:46:59 +05:30
where: { url: this.getBestUrl(page.url()) },
what: [{
action,
args: settings ? Array.isArray(settings) ? settings : [settings] : [],
}],
}
await this.addPairToWorkflowAndNotifyClient(pair, page);
if (this.generatedData.lastUsedSelector) {
const elementInfo = await this.getLastUsedSelectorInfo(page, this.generatedData.lastUsedSelector);
this.socket.emit('decision', {
pair, actionType: 'customAction',
lastData: {
selector: this.generatedData.lastUsedSelector,
action: this.generatedData.lastAction,
2024-09-23 17:26:32 +05:30
tagName: elementInfo.tagName,
2024-09-23 17:26:45 +05:30
innerText: elementInfo.innerText,
2024-06-08 00:46:59 +05:30
}
});
}
};
2024-06-08 00:14:16 +05:30
/**
* Returns the currently generated workflow.
* @returns {WorkflowFile}
*/
2024-06-08 00:13:54 +05:30
public getWorkflowFile = () => {
return this.workflowRecord;
};
2024-06-08 00:15:11 +05:30
/**
* Removes a pair from the currently generated workflow.
* @param index The index of the pair to be removed.
* @returns void
*/
2024-06-08 00:14:54 +05:30
public removePairFromWorkflow = (index: number) => {
if (index <= this.workflowRecord.workflow.length && index >= 0) {
this.workflowRecord.workflow.splice(this.workflowRecord.workflow.length - (index + 1), 1);
logger.log('debug', `pair ${index}: Removed from workflow file.`);
} else {
logger.log('error', `Delete pair ${index}: Index out of range.`);
}
};
2024-06-08 00:16:11 +05:30
/**
* Adds a new pair to the currently generated workflow.
* @param index The index on which the pair should be added.
* @param pair The pair to be added.
* @returns void
*/
2024-06-08 00:15:52 +05:30
public addPairToWorkflow = (index: number, pair: WhereWhatPair) => {
if (index === this.workflowRecord.workflow.length) {
this.workflowRecord.workflow.unshift(pair);
logger.log('debug', `pair ${index}: Added to workflow file.`);
} else if (index < this.workflowRecord.workflow.length && index >= 0) {
this.workflowRecord.workflow.splice(
2024-06-08 00:46:59 +05:30
this.workflowRecord.workflow.length - index, 0, pair);
2024-06-08 00:15:52 +05:30
} else {
logger.log('error', `Add pair ${index}: Index out of range.`);
}
};
2024-06-08 00:17:10 +05:30
/**
* Updates a pair in the currently generated workflow.
* @param index The index of the pair to be updated.
* @param pair The pair to be used as a replacement.
* @returns void
*/
2024-06-08 00:16:51 +05:30
public updatePairInWorkflow = (index: number, pair: WhereWhatPair) => {
if (index <= this.workflowRecord.workflow.length && index >= 0) {
this.workflowRecord.workflow[this.workflowRecord.workflow.length - (index + 1)] = pair;
} else {
logger.log('error', `Update pair ${index}: Index out of range.`);
}
};
2024-06-08 00:18:06 +05:30
/**
* Updates the socket used for communication with the client.
* @param socket The socket to be used for communication.
* @returns void
*/
2024-06-08 00:46:59 +05:30
public updateSocket = (socket: Socket): void => {
2024-06-08 00:18:06 +05:30
this.socket = socket;
this.registerEventHandlers(socket);
};
/**
* Returns the currently generated workflow without all the generated flag actions.
* @param workflow The workflow for removing the generated flag actions from.
* @private
* @returns {WorkflowFile}
*/
private removeAllGeneratedFlags = (workflow: WorkflowFile): WorkflowFile => {
2024-06-08 00:46:59 +05:30
for (let i = 0; i < workflow.workflow.length; i++) {
if (
workflow.workflow[i].what[0] &&
workflow.workflow[i].what[0].action === 'flag' &&
workflow.workflow[i].what[0].args?.includes('generated')) {
workflow.workflow[i].what.splice(0, 1);
}
2024-06-08 00:46:59 +05:30
}
return workflow;
};
2024-06-08 00:21:48 +05:30
/**
* Adds generated flag actions to the workflow's pairs' what conditions.
* @param workflow The workflow for adding the generated flag actions from.
* @private
* @returns {WorkflowFile}
*/
public AddGeneratedFlags = (workflow: WorkflowFile): WorkflowFile => {
const copy = JSON.parse(JSON.stringify(workflow));
for (let i = 0; i < workflow.workflow.length; i++) {
copy.workflow[i].what.unshift({
action: 'flag',
args: ['generated'],
});
}
return copy;
};
2024-06-08 00:22:24 +05:30
/**
* Enables to update the generated workflow file.
* Adds a generated flag action for possible pausing during the interpretation.
* Used for loading a recorded workflow to already initialized Generator.
* @param workflowFile The workflow file to be used as a replacement for the current generated workflow.
* @returns void
*/
public updateWorkflowFile = (workflowFile: WorkflowFile, meta: MetaData) => {
this.recordingMeta = meta;
const params = this.checkWorkflowForParams(workflowFile);
if (params) {
this.recordingMeta.params = params;
}
this.workflowRecord = workflowFile;
}
2024-06-08 00:23:00 +05:30
/**
* Creates a recording metadata and stores the curren workflow
* with the metadata to the file system.
* @param fileName The name of the file.
* @returns {Promise<void>}
*/
2024-10-21 23:07:28 +05:30
public saveNewWorkflow = async (fileName: string, userId: number) => {
2024-06-08 00:23:00 +05:30
const recording = this.optimizeWorkflow(this.workflowRecord);
try {
this.recordingMeta = {
name: fileName,
2024-10-08 20:48:18 +05:30
id: uuid(),
2024-10-08 20:49:29 +05:30
createdAt: this.recordingMeta.createdAt || new Date().toLocaleString(),
2024-06-08 00:23:00 +05:30
pairs: recording.workflow.length,
2024-10-08 20:51:56 +05:30
updatedAt: new Date().toLocaleString(),
2024-06-08 00:23:00 +05:30
params: this.getParams() || [],
}
const robot = await Robot.create({
2024-10-21 23:07:28 +05:30
userId,
recording_meta: this.recordingMeta,
recording: recording,
});
2024-10-29 03:46:13 +05:30
capture(
'maxun-oss-robot-created',
{
2024-10-28 03:04:26 +05:30
robot_meta: robot.recording_meta,
recording: robot.recording,
}
2024-10-29 03:46:13 +05:30
)
logger.log('info', `Robot saved with id: ${robot.id}`);
2024-06-08 00:23:00 +05:30
}
2024-06-08 00:46:59 +05:30
catch (e) {
2024-06-08 00:23:00 +05:30
const { message } = e as Error;
2024-10-21 23:07:28 +05:30
logger.log('warn', `Cannot save the file to the local file system ${e}`)
2024-06-08 00:23:00 +05:30
}
this.socket.emit('fileSaved');
}
2024-06-08 00:26:29 +05:30
/**
* Uses a system of functions to generate a correct and unique css selector
* according to the action being performed.
* @param page The page to be used for obtaining the information and selector.
* @param coordinates The coordinates of the element.
* @param action The action for which the selector is being generated.
* @private
* @returns {Promise<string|null>}
*/
2024-08-08 05:29:48 +05:30
private generateSelector = async (page: Page, coordinates: Coordinates, action: ActionType) => {
const elementInfo = await getElementInformation(page, coordinates, this.listSelector, this.getList);
2024-08-08 06:24:20 +05:30
const selectorBasedOnCustomAction = (this.getList === true)
2024-12-17 11:37:05 +05:30
? await getNonUniqueSelectors(page, coordinates, this.listSelector)
2024-08-08 06:24:20 +05:30
: await getSelectors(page, coordinates);
if (this.paginationMode && selectorBasedOnCustomAction) {
// Chain selectors in specific priority order
const selectors = selectorBasedOnCustomAction;
const selectorChain = [
selectors?.iframeSelector?.full,
selectors?.shadowSelector?.full,
selectors?.testIdSelector,
selectors?.id,
selectors?.hrefSelector,
selectors?.accessibilitySelector,
selectors?.attrSelector,
selectors?.generalSelector
]
.filter(selector => selector !== null && selector !== undefined)
.join(',');
return selectorChain;
}
2024-09-23 17:26:45 +05:30
const bestSelector = getBestSelectorForAction(
{
type: action,
tagName: elementInfo?.tagName as TagName || '',
inputType: undefined,
value: undefined,
selectors: selectorBasedOnCustomAction || {},
timestamp: 0,
isPassword: false,
hasOnlyText: elementInfo?.hasOnlyText || false,
} as Action,
);
return bestSelector;
}
/**
* Generates data for highlighting the element on client side and emits the
* highlighter event to the client.
* @param page The page to be used for obtaining data.
* @param coordinates The coordinates of the element.
* @returns {Promise<void>}
*/
2024-08-08 05:29:48 +05:30
public generateDataForHighlighter = async (page: Page, coordinates: Coordinates) => {
const rect = await getRect(page, coordinates, this.listSelector, this.getList);
2024-08-08 05:29:48 +05:30
const displaySelector = await this.generateSelector(page, coordinates, ActionType.Click);
const elementInfo = await getElementInformation(page, coordinates, this.listSelector, this.getList);
if (rect) {
const highlighterData = {
rect,
selector: displaySelector,
elementInfo,
// Include shadow DOM specific information
shadowInfo: elementInfo?.isShadowRoot ? {
mode: elementInfo.shadowRootMode,
content: elementInfo.shadowRootContent
} : null
};
if (this.getList === true) {
if (this.listSelector !== '') {
2024-11-20 03:46:09 +05:30
const childSelectors = await getChildSelectors(page, this.listSelector || '');
this.socket.emit('highlighter', { ...highlighterData, childSelectors })
} else {
this.socket.emit('highlighter', { ...highlighterData });
}
} else {
this.socket.emit('highlighter', { ...highlighterData });
}
}
}
2024-06-08 00:32:49 +05:30
/**
* Notifies the client about the change of the url if navigation
* happens after some performed action.
* @param url The new url.
* @param fromNavBar Whether the navigation is from the simulated browser's navbar or not.
* @returns void
*/
2024-06-08 00:46:59 +05:30
public notifyUrlChange = (url: string) => {
2024-06-08 00:32:49 +05:30
if (this.socket) {
this.socket.emit('urlChanged', url);
}
}
2024-06-08 00:33:54 +05:30
/**
* Notifies the client about the new tab if popped-up
* @param page The page to be used for obtaining data.
* @param pageIndex The index of the page.
* @returns void
*/
2024-06-08 00:33:22 +05:30
public notifyOnNewTab = (page: Page, pageIndex: number) => {
if (this.socket) {
page.on('close', () => {
this.socket.emit('tabHasBeenClosed', pageIndex);
})
const parsedUrl = new URL(page.url());
const host = parsedUrl.hostname?.match(/\b(?!www\.)[a-zA-Z0-9]+/g)?.join('.');
this.socket.emit('newTab', host ? host : 'new tab')
}
}
2024-06-08 00:37:13 +05:30
/**
* Generates a pair for navigating to the previous page.
* This function alone adds the pair to the workflow and notifies the client.
* It's safe to always add a go back action to the first rule in the workflow and do not check
* general conditions for adding a pair to the workflow.
* @param newUrl The previous page's url.
* @returns void
*/
2024-06-08 00:36:54 +05:30
public onGoBack = (newUrl: string) => {
//it's safe to always add a go back action to the first rule in the workflow
this.workflowRecord.workflow[0].what.push({
action: 'goBack',
2024-06-08 00:46:59 +05:30
args: [{ waitUntil: 'commit' }],
2024-06-08 00:36:54 +05:30
});
this.notifyUrlChange(newUrl);
this.socket.emit('workflow', this.workflowRecord);
}
2024-06-08 00:38:28 +05:30
/**
* Generates a pair for navigating to the next page.
* This function alone adds the pair to the workflow and notifies the client.
* It's safe to always add a go forward action to the first rule in the workflow and do not check
* general conditions for adding a pair to the workflow.
* @param newUrl The next page's url.
* @returns void
*/
2024-06-08 00:38:00 +05:30
public onGoForward = (newUrl: string) => {
//it's safe to always add a go forward action to the first rule in the workflow
this.workflowRecord.workflow[0].what.push({
action: 'goForward',
2024-06-08 00:46:59 +05:30
args: [{ waitUntil: 'commit' }],
2024-06-08 00:38:00 +05:30
});
this.notifyUrlChange(newUrl);
this.socket.emit('workflow', this.workflowRecord);
}
2024-06-08 00:36:54 +05:30
2024-06-08 00:40:42 +05:30
/**
* Checks and returns possible pairs that would get over-shadowed by the pair
* from the current workflow.
* @param pair The pair that could be over-shadowing.
* @param page The page to be used for checking the visibility and accessibility of the selectors.
* @private
* @returns {Promise<PossibleOverShadow[]>}
*/
private IsOverShadowingAction = async (pair: WhereWhatPair, page: Page) => {
type possibleOverShadow = {
index: number;
isOverShadowing: boolean;
}
const possibleOverShadow: possibleOverShadow[] = [];
const haveSameUrl = this.workflowRecord.workflow
.filter((p, index) => {
if (p.where.url === pair.where.url) {
2024-06-08 00:46:59 +05:30
possibleOverShadow.push({ index: index, isOverShadowing: false });
return true;
} else {
return false;
}
});
2024-06-08 00:40:18 +05:30
if (haveSameUrl.length !== 0) {
for (let i = 0; i < haveSameUrl.length; i++) {
//@ts-ignore
if (haveSameUrl[i].where.selectors && haveSameUrl[i].where.selectors.length > 0) {
//@ts-ignore
const isOverShadowing = await isRuleOvershadowing(haveSameUrl[i].where.selectors, page);
if (isOverShadowing) {
possibleOverShadow[i].isOverShadowing = true;
}
}
}
}
return possibleOverShadow;
}
2024-06-08 00:41:39 +05:30
/**
* General over-shadowing handler.
* Checks for possible over-shadowed pairs and if found,
* adds the pair to the workflow in the correct way.
* @param pair The pair that could be over-shadowing.
* @param page The page to be used for checking the visibility and accessibility of the selectors.
* @private
* @returns {Promise<boolean>}
*/
2024-06-08 00:41:20 +05:30
private handleOverShadowing = async (pair: WhereWhatPair, page: Page, index: number): Promise<boolean> => {
const overShadowing = (await this.IsOverShadowingAction(pair, page))
.filter((p) => p.isOverShadowing);
if (overShadowing.length !== 0) {
for (const overShadowedAction of overShadowing) {
if (overShadowedAction.index === index) {
if (pair.where.selectors) {
for (const selector of pair.where.selectors) {
if (this.workflowRecord.workflow[index].where.selectors?.includes(selector)) {
break;
} else {
// add new selector to the where part of the overshadowing pair
2024-06-08 00:46:59 +05:30
this.workflowRecord.workflow[index].where.selectors?.push(selector);
2024-06-08 00:41:20 +05:30
}
}
}
// push the action automatically to the first/the closest rule which would be overShadowed
this.workflowRecord.workflow[index].what =
this.workflowRecord.workflow[index].what.concat(pair.what);
return true;
} else {
// notify client about overshadowing a further rule
return false;
}
}
}
return false;
}
2024-06-08 00:42:55 +05:30
/**
* Returns the best possible url representation for a where condition according to the heuristics.
* @param url The url to be checked and possibly replaced.
* @private
* @returns {string | {$regex: string}}
*/
2024-06-08 00:42:32 +05:30
private getBestUrl = (url: string) => {
const parsedUrl = new URL(url);
2024-06-08 00:46:59 +05:30
const protocol = parsedUrl.protocol === 'https:' || parsedUrl.protocol === 'http:' ? `${parsedUrl.protocol}//` : parsedUrl.protocol;
2024-06-08 00:42:32 +05:30
const regex = new RegExp(/(?=.*[A-Z])/g)
// remove all params with uppercase letters, they are most likely dynamically generated
// also escapes all regex characters from the params
const search = parsedUrl.search
.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')
.split('&').map((param, index) => {
if (!regex.test(param)) {
return param;
} else {
return '.*';
}
})
.join('&');
let bestUrl;
if (search) {
bestUrl = {
$regex: `^${protocol}${parsedUrl.host}${parsedUrl.pathname}${search}${parsedUrl.hash}`
}
} else {
bestUrl = `${protocol}${parsedUrl.host}${parsedUrl.pathname}${parsedUrl.hash}`;
}
return bestUrl;
}
2024-06-08 00:43:34 +05:30
/**
* Returns parameters if present in the workflow or null.
* @param workflow The workflow to be checked.
*/
2024-06-08 00:46:59 +05:30
private checkWorkflowForParams = (workflow: WorkflowFile): string[] | null => {
2024-06-08 00:43:34 +05:30
// for now the where condition cannot have any params, so we're checking only what part of the pair
// where only the args part of what condition can have a parameter
for (const pair of workflow.workflow) {
for (const condition of pair.what) {
if (condition.args) {
const params: any[] = [];
condition.args.forEach((arg) => {
if (arg.$param) {
params.push(arg.$param);
}
})
if (params.length !== 0) {
return params;
}
}
}
}
return null;
}
2024-06-08 00:44:00 +05:30
/**
* A function for workflow optimization once finished.
* @param workflow The workflow to be optimized.
*/
private optimizeWorkflow = (workflow: WorkflowFile) => {
// replace a sequence of press actions by a single fill action
let input = {
selector: '',
value: '',
actionCounter: 0,
};
const pushTheOptimizedAction = (pair: WhereWhatPair, index: number) => {
if (input.value.length === 1) {
// when only one press action is present, keep it and add a waitForLoadState action
pair.what.splice(index + 1, 0, {
action: 'waitForLoadState',
args: ['networkidle'],
})
} else {
// when more than one press action is present, add a type action
pair.what.splice(index - input.actionCounter, input.actionCounter, {
action: 'type',
args: [input.selector, encrypt(input.value)],
2024-06-08 00:46:59 +05:30
}, {
2024-06-08 00:44:00 +05:30
action: 'waitForLoadState',
2024-06-08 00:46:59 +05:30
args: ['networkidle'],
});
2024-06-08 00:44:00 +05:30
}
}
for (const pair of workflow.workflow) {
2024-06-08 00:46:59 +05:30
pair.what.forEach((condition, index) => {
2024-06-08 00:44:00 +05:30
if (condition.action === 'press') {
if (condition.args && condition.args[1]) {
if (!input.selector) {
input.selector = condition.args[0];
}
if (input.selector === condition.args[0]) {
input.actionCounter++;
if (condition.args[1].length === 1) {
input.value = input.value + condition.args[1];
} else if (condition.args[1] === 'Backspace') {
input.value = input.value.slice(0, -1);
} else if (condition.args[1] !== 'Shift') {
pushTheOptimizedAction(pair, index);
pair.what.splice(index + 1, 0, {
action: 'waitForLoadState',
args: ['networkidle'],
})
2024-06-08 00:46:59 +05:30
input = { selector: '', value: '', actionCounter: 0 };
2024-06-08 00:44:00 +05:30
}
} else {
pushTheOptimizedAction(pair, index);
input = {
selector: condition.args[0],
value: condition.args[1],
actionCounter: 1,
};
}
}
} else {
if (input.value.length !== 0) {
pushTheOptimizedAction(pair, index);
// clear the input
2024-06-08 00:46:59 +05:30
input = { selector: '', value: '', actionCounter: 0 };
2024-06-08 00:44:00 +05:30
}
}
});
}
return workflow;
}
2024-06-08 00:44:33 +05:30
/**
* Returns workflow params from the stored metadata.
*/
2024-06-08 00:46:59 +05:30
public getParams = (): string[] | null => {
return this.checkWorkflowForParams(this.workflowRecord);
}
2024-06-08 00:44:00 +05:30
2024-06-08 00:44:47 +05:30
/**
* Clears the last generated data index.
*/
public clearLastIndex = () => {
this.generatedData.lastIndex = null;
}
2024-12-05 23:19:55 +05:30
}