Merge pull request #208 from getmaxun/store-inputs

feat: handle browser link navigation
This commit is contained in:
Karishma Shukla
2024-12-04 21:53:10 +05:30
committed by GitHub
5 changed files with 207 additions and 36 deletions

View File

@@ -22,6 +22,7 @@ import { getBestSelectorForAction } from "../utils";
import { browserPool } from "../../server";
import { uuid } from "uuidv4";
import { capture } from "../../utils/analytics"
import { encrypt } from "../../utils/auth";
interface PersistedGeneratedData {
lastUsedSelector: string;
@@ -159,6 +160,55 @@ export class WorkflowGenerator {
})
};
/**
* New function to handle actionable check for scrapeList
* @param page The current Playwright Page object.
* @param config The scrapeList configuration object.
* @returns {Promise<string[]>} Array of actionable selectors.
*/
private async getSelectorsForScrapeList(page: Page, config: {
listSelector: string;
fields: any;
limit?: number;
pagination: any;
}): Promise<string[]> {
const { listSelector } = config;
// Verify if the selectors are present and actionable on the current page
const actionableSelectors: string[] = [];
if (listSelector) {
const isActionable = await page.isVisible(listSelector).catch(() => false);
if (isActionable) {
actionableSelectors.push(listSelector);
logger.log('debug', `List selector ${listSelector} is actionable.`);
} else {
logger.log('warn', `List selector ${listSelector} is not visible on the page.`);
}
}
return actionableSelectors;
}
/**
* New function to handle actionable check for scrapeList
* @param page The current Playwright Page object.
* @param schema The scrapeSchema configuration object.
* @returns {Promise<string[]>} Array of actionable selectors.
*/
private async getSelectorsForSchema(page: Page, schema: Record<string, { selector: string }>): Promise<string[]> {
const selectors = Object.values(schema).map((field) => field.selector);
// Verify if the selectors are present and actionable on the current page
const actionableSelectors: string[] = [];
for (const selector of selectors) {
const isActionable = await page.isVisible(selector).catch(() => false);
if (isActionable) {
actionableSelectors.push(selector);
}
}
return actionableSelectors;
}
/**
* Adds a newly generated pair to the workflow and notifies the client about it by
* sending the updated workflow through socket.
@@ -184,55 +234,67 @@ export class WorkflowGenerator {
*/
private addPairToWorkflowAndNotifyClient = async (pair: WhereWhatPair, page: Page) => {
let matched = false;
// validate if a pair with the same where conditions is already present in the workflow
// Check for scrapeSchema actions and enhance the where condition
if (pair.what[0].action === 'scrapeSchema') {
const schema = pair.what[0]?.args?.[0];
if (schema) {
const additionalSelectors = await this.getSelectorsForSchema(page, schema);
pair.where.selectors = [...(pair.where.selectors || []), ...additionalSelectors];
}
}
if (pair.what[0].action === 'scrapeList') {
const config = pair.what[0]?.args?.[0];
if (config) {
const actionableSelectors = await this.getSelectorsForScrapeList(page, config);
pair.where.selectors = [...(pair.where.selectors || []), ...actionableSelectors];
}
}
// Validate if the pair is already in the workflow
if (pair.where.selectors && pair.where.selectors[0]) {
const match = selectorAlreadyInWorkflow(pair.where.selectors[0], this.workflowRecord.workflow);
if (match) {
// if a match of where conditions is found, the new action is added into the matched rule
const matchedIndex = this.workflowRecord.workflow.indexOf(match);
if (pair.what[0].action !== 'waitForLoadState' && pair.what[0].action !== 'press') {
pair.what.push({
action: 'waitForLoadState',
args: ['networkidle'],
})
});
}
this.workflowRecord.workflow[matchedIndex].what = this.workflowRecord.workflow[matchedIndex].what.concat(pair.what);
logger.log('info', `Pushed ${JSON.stringify(this.workflowRecord.workflow[matchedIndex])} to workflow pair`);
matched = true;
}
}
// is the where conditions of the pair are not already in the workflow, we need to validate the where conditions
// for possible overshadowing of different rules and handle cases according to the recording logic
// Handle cases where the where condition isn't already present
if (!matched) {
const handled = await this.handleOverShadowing(pair, page, this.generatedData.lastIndex || 0);
if (!handled) {
//adding waitForLoadState with networkidle, for better success rate of automatically recorded workflows
if (pair.what[0].action !== 'waitForLoadState' && pair.what[0].action !== 'press') {
pair.what.push({
action: 'waitForLoadState',
args: ['networkidle'],
})
});
}
if (this.generatedData.lastIndex === 0) {
this.generatedData.lastIndex = null;
// we want to have the most specific selectors at the beginning of the workflow
this.workflowRecord.workflow.unshift(pair);
} else {
this.workflowRecord.workflow.splice(this.generatedData.lastIndex || 0, 0, pair);
if (this.generatedData.lastIndex) {
this.generatedData.lastIndex = this.generatedData.lastIndex - 1;
this.generatedData.lastIndex -= 1;
}
}
logger.log('info',
`${JSON.stringify(pair)}: Added to workflow file on index: ${this.generatedData.lastIndex || 0}`);
} else {
logger.log('debug',
` ${JSON.stringify(this.workflowRecord.workflow[this.generatedData.lastIndex || 0])} added action to workflow pair`);
}
}
// Emit the updated workflow to the client
this.socket.emit('workflow', this.workflowRecord);
logger.log('info', `Workflow emitted`);
};
/**
* Generates a pair for the click event.
@@ -300,7 +362,7 @@ export class WorkflowGenerator {
where,
what: [{
action: 'press',
args: [selector, key],
args: [selector, encrypt(key)],
}],
}
if (selector) {
@@ -797,7 +859,7 @@ export class WorkflowGenerator {
// when more than one press action is present, add a type action
pair.what.splice(index - input.actionCounter, input.actionCounter, {
action: 'type',
args: [input.selector, input.value],
args: [input.selector, encrypt(input.value)],
}, {
action: 'waitForLoadState',
args: ['networkidle'],

View File

@@ -3,6 +3,38 @@ import logger from "../../logger";
import { Socket } from "socket.io";
import { Page } from "playwright";
import { InterpreterSettings } from "../../types";
import { decrypt } from "../../utils/auth";
/**
* Decrypts any encrypted inputs in the workflow.
* @param workflow The workflow to decrypt.
*/
function decryptWorkflow(workflow: WorkflowFile): WorkflowFile {
const decryptedWorkflow = JSON.parse(JSON.stringify(workflow)) as WorkflowFile;
decryptedWorkflow.workflow.forEach((pair) => {
pair.what.forEach((action) => {
if ((action.action === 'type' || action.action === 'press') && Array.isArray(action.args) && action.args.length > 1) {
try {
const encryptedValue = action.args[1];
if (typeof encryptedValue === 'string') {
const decryptedValue = decrypt(encryptedValue);
action.args[1] = decryptedValue;
} else {
logger.log('error', 'Encrypted value is not a string');
action.args[1] = '';
}
} catch (error: unknown) {
const errorMessage = error instanceof Error ? error.message : String(error);
logger.log('error', `Failed to decrypt input value: ${errorMessage}`);
action.args[1] = '';
}
}
});
});
return decryptedWorkflow;
}
/**
* This class implements the main interpretation functions.
@@ -123,6 +155,9 @@ export class WorkflowInterpreter {
) => {
const params = settings.params ? settings.params : null;
delete settings.params;
const decryptedWorkflow = decryptWorkflow(workflow);
const options = {
...settings,
debugChannel: {
@@ -143,7 +178,7 @@ export class WorkflowInterpreter {
}
}
const interpreter = new Interpreter(workflow, options);
const interpreter = new Interpreter(decryptedWorkflow, options);
this.interpreter = interpreter;
interpreter.on('flag', async (page, resume) => {
@@ -212,6 +247,9 @@ export class WorkflowInterpreter {
public InterpretRecording = async (workflow: WorkflowFile, page: Page, settings: InterpreterSettings) => {
const params = settings.params ? settings.params : null;
delete settings.params;
const decryptedWorkflow = decryptWorkflow(workflow);
const options = {
...settings,
debugChannel: {
@@ -234,15 +272,19 @@ export class WorkflowInterpreter {
}
}
const interpreter = new Interpreter(workflow, options);
const interpreter = new Interpreter(decryptedWorkflow, options);
this.interpreter = interpreter;
const status = await interpreter.run(page, params);
const lastArray = this.serializableData.length > 1
? [this.serializableData[this.serializableData.length - 1]]
: this.serializableData;
const result = {
log: this.debugMessages,
result: status,
serializableOutput: this.serializableData.reduce((reducedObject, item, index) => {
serializableOutput: lastArray.reduce((reducedObject, item, index) => {
return {
[`item-${index}`]: item,
...reducedObject,