Merge pull request #208 from getmaxun/store-inputs
feat: handle browser link navigation
This commit is contained in:
@@ -249,7 +249,7 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
||||
}
|
||||
},
|
||||
(key) => key // Use the original key in the output
|
||||
));
|
||||
)) || [];
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -16,6 +16,23 @@ import Concurrency from './utils/concurrency';
|
||||
import Preprocessor from './preprocessor';
|
||||
import log, { Level } from './utils/logger';
|
||||
|
||||
/**
|
||||
* Extending the Window interface for custom scraping functions.
|
||||
*/
|
||||
declare global {
|
||||
interface Window {
|
||||
scrape: (selector: string | null) => Record<string, string>[];
|
||||
scrapeSchema: (
|
||||
schema: Record<string, { selector: string; tag: string; attribute: string }>
|
||||
) => Record<string, any>;
|
||||
scrapeList: (config: { listSelector: string; fields: any; limit?: number; pagination: any }) => Record<string, any>[];
|
||||
scrapeListAuto: (listSelector: string) => { selector: string; innerText: string }[];
|
||||
scrollDown: (pages?: number) => void;
|
||||
scrollUp: (pages?: number) => void;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Defines optional intepreter options (passed in constructor)
|
||||
*/
|
||||
@@ -31,7 +48,6 @@ interface InterpreterOptions {
|
||||
}>
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Class for running the Smart Workflows.
|
||||
*/
|
||||
@@ -50,6 +66,8 @@ export default class Interpreter extends EventEmitter {
|
||||
|
||||
private blocker: PlaywrightBlocker | null = null;
|
||||
|
||||
private cumulativeResults: Record<string, any>[] = [];
|
||||
|
||||
constructor(workflow: WorkflowFile, options?: Partial<InterpreterOptions>) {
|
||||
super();
|
||||
this.workflow = workflow.workflow;
|
||||
@@ -57,7 +75,9 @@ export default class Interpreter extends EventEmitter {
|
||||
this.options = {
|
||||
maxRepeats: 5,
|
||||
maxConcurrency: 5,
|
||||
serializableCallback: (data) => { log(JSON.stringify(data), Level.WARN); },
|
||||
serializableCallback: (data) => {
|
||||
log(JSON.stringify(data), Level.WARN);
|
||||
},
|
||||
binaryCallback: () => { log('Received binary data, thrashing them.', Level.WARN); },
|
||||
debug: false,
|
||||
debugChannel: {},
|
||||
@@ -214,11 +234,11 @@ export default class Interpreter extends EventEmitter {
|
||||
// every condition is treated as a single context
|
||||
|
||||
switch (key as keyof typeof operators) {
|
||||
case '$and':
|
||||
case '$and' as keyof typeof operators:
|
||||
return array?.every((x) => this.applicable(x, context));
|
||||
case '$or':
|
||||
case '$or' as keyof typeof operators:
|
||||
return array?.some((x) => this.applicable(x, context));
|
||||
case '$not':
|
||||
case '$not' as keyof typeof operators:
|
||||
return !this.applicable(<Where>value, context); // $not should be a unary operator
|
||||
default:
|
||||
throw new Error('Undefined logic operator.');
|
||||
@@ -233,9 +253,9 @@ export default class Interpreter extends EventEmitter {
|
||||
};
|
||||
|
||||
switch (key as keyof typeof meta) {
|
||||
case '$before':
|
||||
case '$before' as keyof typeof meta:
|
||||
return !usedActions.find(testRegexString);
|
||||
case '$after':
|
||||
case '$after' as keyof typeof meta:
|
||||
return !!usedActions.find(testRegexString);
|
||||
default:
|
||||
throw new Error('Undefined meta operator.');
|
||||
@@ -308,9 +328,43 @@ export default class Interpreter extends EventEmitter {
|
||||
|
||||
scrapeSchema: async (schema: Record<string, { selector: string; tag: string, attribute: string; }>) => {
|
||||
await this.ensureScriptsLoaded(page);
|
||||
|
||||
|
||||
const scrapeResult = await page.evaluate((schemaObj) => window.scrapeSchema(schemaObj), schema);
|
||||
await this.options.serializableCallback(scrapeResult);
|
||||
|
||||
const newResults = Array.isArray(scrapeResult) ? scrapeResult : [scrapeResult];
|
||||
newResults.forEach((result) => {
|
||||
Object.entries(result).forEach(([key, value]) => {
|
||||
const keyExists = this.cumulativeResults.some(
|
||||
(item) => key in item && item[key] !== undefined
|
||||
);
|
||||
|
||||
if (!keyExists) {
|
||||
this.cumulativeResults.push({ [key]: value });
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
const mergedResult: Record<string, string>[] = [
|
||||
Object.fromEntries(
|
||||
Object.entries(
|
||||
this.cumulativeResults.reduce((acc, curr) => {
|
||||
Object.entries(curr).forEach(([key, value]) => {
|
||||
// If the key doesn't exist or the current value is not undefined, add/update it
|
||||
if (value !== undefined) {
|
||||
acc[key] = value;
|
||||
}
|
||||
});
|
||||
return acc;
|
||||
}, {})
|
||||
)
|
||||
)
|
||||
];
|
||||
|
||||
// Log cumulative results after each action
|
||||
console.log("CUMULATIVE results:", this.cumulativeResults);
|
||||
console.log("MERGED results:", mergedResult);
|
||||
|
||||
await this.options.serializableCallback(mergedResult);
|
||||
},
|
||||
|
||||
scrapeList: async (config: { listSelector: string, fields: any, limit?: number, pagination: any }) => {
|
||||
@@ -357,7 +411,7 @@ export default class Interpreter extends EventEmitter {
|
||||
};
|
||||
|
||||
for (const step of steps) {
|
||||
this.log(`Launching ${step.action}`, Level.LOG);
|
||||
this.log(`Launching ${String(step.action)}`, Level.LOG);
|
||||
|
||||
if (step.action in wawActions) {
|
||||
// "Arrayifying" here should not be needed (TS + syntax checker - only arrays; but why not)
|
||||
@@ -365,7 +419,7 @@ export default class Interpreter extends EventEmitter {
|
||||
await wawActions[step.action as CustomFunctions](...(params ?? []));
|
||||
} else {
|
||||
// Implements the dot notation for the "method name" in the workflow
|
||||
const levels = step.action.split('.');
|
||||
const levels = String(step.action).split('.');
|
||||
const methodName = levels[levels.length - 1];
|
||||
|
||||
let invokee: any = page;
|
||||
@@ -534,9 +588,14 @@ export default class Interpreter extends EventEmitter {
|
||||
if (this.options.debug) {
|
||||
this.log(`Current state is: \n${JSON.stringify(pageState, null, 2)}`, Level.WARN);
|
||||
}
|
||||
const actionId = workflow.findIndex(
|
||||
(step) => this.applicable(step.where, pageState, usedActions),
|
||||
);
|
||||
|
||||
const actionId = workflow.findIndex((step) => {
|
||||
const isApplicable = this.applicable(step.where, pageState, usedActions);
|
||||
console.log(`Where:`, step.where);
|
||||
console.log(`Page state:`, pageState);
|
||||
console.log(`Match result: ${isApplicable}`);
|
||||
return isApplicable;
|
||||
});
|
||||
|
||||
const action = workflow[actionId];
|
||||
|
||||
|
||||
@@ -22,6 +22,7 @@ import { getBestSelectorForAction } from "../utils";
|
||||
import { browserPool } from "../../server";
|
||||
import { uuid } from "uuidv4";
|
||||
import { capture } from "../../utils/analytics"
|
||||
import { encrypt } from "../../utils/auth";
|
||||
|
||||
interface PersistedGeneratedData {
|
||||
lastUsedSelector: string;
|
||||
@@ -159,6 +160,55 @@ export class WorkflowGenerator {
|
||||
})
|
||||
};
|
||||
|
||||
/**
|
||||
* New function to handle actionable check for scrapeList
|
||||
* @param page The current Playwright Page object.
|
||||
* @param config The scrapeList configuration object.
|
||||
* @returns {Promise<string[]>} Array of actionable selectors.
|
||||
*/
|
||||
private async getSelectorsForScrapeList(page: Page, config: {
|
||||
listSelector: string;
|
||||
fields: any;
|
||||
limit?: number;
|
||||
pagination: any;
|
||||
}): Promise<string[]> {
|
||||
const { listSelector } = config;
|
||||
|
||||
// Verify if the selectors are present and actionable on the current page
|
||||
const actionableSelectors: string[] = [];
|
||||
if (listSelector) {
|
||||
const isActionable = await page.isVisible(listSelector).catch(() => false);
|
||||
if (isActionable) {
|
||||
actionableSelectors.push(listSelector);
|
||||
logger.log('debug', `List selector ${listSelector} is actionable.`);
|
||||
} else {
|
||||
logger.log('warn', `List selector ${listSelector} is not visible on the page.`);
|
||||
}
|
||||
}
|
||||
|
||||
return actionableSelectors;
|
||||
}
|
||||
|
||||
/**
|
||||
* New function to handle actionable check for scrapeList
|
||||
* @param page The current Playwright Page object.
|
||||
* @param schema The scrapeSchema configuration object.
|
||||
* @returns {Promise<string[]>} Array of actionable selectors.
|
||||
*/
|
||||
private async getSelectorsForSchema(page: Page, schema: Record<string, { selector: string }>): Promise<string[]> {
|
||||
const selectors = Object.values(schema).map((field) => field.selector);
|
||||
|
||||
// Verify if the selectors are present and actionable on the current page
|
||||
const actionableSelectors: string[] = [];
|
||||
for (const selector of selectors) {
|
||||
const isActionable = await page.isVisible(selector).catch(() => false);
|
||||
if (isActionable) {
|
||||
actionableSelectors.push(selector);
|
||||
}
|
||||
}
|
||||
return actionableSelectors;
|
||||
}
|
||||
|
||||
/**
|
||||
* Adds a newly generated pair to the workflow and notifies the client about it by
|
||||
* sending the updated workflow through socket.
|
||||
@@ -184,55 +234,67 @@ export class WorkflowGenerator {
|
||||
*/
|
||||
private addPairToWorkflowAndNotifyClient = async (pair: WhereWhatPair, page: Page) => {
|
||||
let matched = false;
|
||||
// validate if a pair with the same where conditions is already present in the workflow
|
||||
|
||||
// Check for scrapeSchema actions and enhance the where condition
|
||||
if (pair.what[0].action === 'scrapeSchema') {
|
||||
const schema = pair.what[0]?.args?.[0];
|
||||
if (schema) {
|
||||
const additionalSelectors = await this.getSelectorsForSchema(page, schema);
|
||||
pair.where.selectors = [...(pair.where.selectors || []), ...additionalSelectors];
|
||||
}
|
||||
}
|
||||
|
||||
if (pair.what[0].action === 'scrapeList') {
|
||||
const config = pair.what[0]?.args?.[0];
|
||||
if (config) {
|
||||
const actionableSelectors = await this.getSelectorsForScrapeList(page, config);
|
||||
pair.where.selectors = [...(pair.where.selectors || []), ...actionableSelectors];
|
||||
}
|
||||
}
|
||||
|
||||
// Validate if the pair is already in the workflow
|
||||
if (pair.where.selectors && pair.where.selectors[0]) {
|
||||
const match = selectorAlreadyInWorkflow(pair.where.selectors[0], this.workflowRecord.workflow);
|
||||
if (match) {
|
||||
// if a match of where conditions is found, the new action is added into the matched rule
|
||||
const matchedIndex = this.workflowRecord.workflow.indexOf(match);
|
||||
if (pair.what[0].action !== 'waitForLoadState' && pair.what[0].action !== 'press') {
|
||||
pair.what.push({
|
||||
action: 'waitForLoadState',
|
||||
args: ['networkidle'],
|
||||
})
|
||||
});
|
||||
}
|
||||
this.workflowRecord.workflow[matchedIndex].what = this.workflowRecord.workflow[matchedIndex].what.concat(pair.what);
|
||||
logger.log('info', `Pushed ${JSON.stringify(this.workflowRecord.workflow[matchedIndex])} to workflow pair`);
|
||||
matched = true;
|
||||
}
|
||||
}
|
||||
// is the where conditions of the pair are not already in the workflow, we need to validate the where conditions
|
||||
// for possible overshadowing of different rules and handle cases according to the recording logic
|
||||
|
||||
// Handle cases where the where condition isn't already present
|
||||
if (!matched) {
|
||||
const handled = await this.handleOverShadowing(pair, page, this.generatedData.lastIndex || 0);
|
||||
if (!handled) {
|
||||
//adding waitForLoadState with networkidle, for better success rate of automatically recorded workflows
|
||||
if (pair.what[0].action !== 'waitForLoadState' && pair.what[0].action !== 'press') {
|
||||
pair.what.push({
|
||||
action: 'waitForLoadState',
|
||||
args: ['networkidle'],
|
||||
})
|
||||
});
|
||||
}
|
||||
if (this.generatedData.lastIndex === 0) {
|
||||
this.generatedData.lastIndex = null;
|
||||
// we want to have the most specific selectors at the beginning of the workflow
|
||||
this.workflowRecord.workflow.unshift(pair);
|
||||
} else {
|
||||
this.workflowRecord.workflow.splice(this.generatedData.lastIndex || 0, 0, pair);
|
||||
if (this.generatedData.lastIndex) {
|
||||
this.generatedData.lastIndex = this.generatedData.lastIndex - 1;
|
||||
this.generatedData.lastIndex -= 1;
|
||||
}
|
||||
}
|
||||
logger.log('info',
|
||||
`${JSON.stringify(pair)}: Added to workflow file on index: ${this.generatedData.lastIndex || 0}`);
|
||||
} else {
|
||||
logger.log('debug',
|
||||
` ${JSON.stringify(this.workflowRecord.workflow[this.generatedData.lastIndex || 0])} added action to workflow pair`);
|
||||
}
|
||||
}
|
||||
|
||||
// Emit the updated workflow to the client
|
||||
this.socket.emit('workflow', this.workflowRecord);
|
||||
logger.log('info', `Workflow emitted`);
|
||||
};
|
||||
|
||||
|
||||
/**
|
||||
* Generates a pair for the click event.
|
||||
@@ -300,7 +362,7 @@ export class WorkflowGenerator {
|
||||
where,
|
||||
what: [{
|
||||
action: 'press',
|
||||
args: [selector, key],
|
||||
args: [selector, encrypt(key)],
|
||||
}],
|
||||
}
|
||||
if (selector) {
|
||||
@@ -797,7 +859,7 @@ export class WorkflowGenerator {
|
||||
// when more than one press action is present, add a type action
|
||||
pair.what.splice(index - input.actionCounter, input.actionCounter, {
|
||||
action: 'type',
|
||||
args: [input.selector, input.value],
|
||||
args: [input.selector, encrypt(input.value)],
|
||||
}, {
|
||||
action: 'waitForLoadState',
|
||||
args: ['networkidle'],
|
||||
|
||||
@@ -3,6 +3,38 @@ import logger from "../../logger";
|
||||
import { Socket } from "socket.io";
|
||||
import { Page } from "playwright";
|
||||
import { InterpreterSettings } from "../../types";
|
||||
import { decrypt } from "../../utils/auth";
|
||||
|
||||
/**
|
||||
* Decrypts any encrypted inputs in the workflow.
|
||||
* @param workflow The workflow to decrypt.
|
||||
*/
|
||||
function decryptWorkflow(workflow: WorkflowFile): WorkflowFile {
|
||||
const decryptedWorkflow = JSON.parse(JSON.stringify(workflow)) as WorkflowFile;
|
||||
|
||||
decryptedWorkflow.workflow.forEach((pair) => {
|
||||
pair.what.forEach((action) => {
|
||||
if ((action.action === 'type' || action.action === 'press') && Array.isArray(action.args) && action.args.length > 1) {
|
||||
try {
|
||||
const encryptedValue = action.args[1];
|
||||
if (typeof encryptedValue === 'string') {
|
||||
const decryptedValue = decrypt(encryptedValue);
|
||||
action.args[1] = decryptedValue;
|
||||
} else {
|
||||
logger.log('error', 'Encrypted value is not a string');
|
||||
action.args[1] = '';
|
||||
}
|
||||
} catch (error: unknown) {
|
||||
const errorMessage = error instanceof Error ? error.message : String(error);
|
||||
logger.log('error', `Failed to decrypt input value: ${errorMessage}`);
|
||||
action.args[1] = '';
|
||||
}
|
||||
}
|
||||
});
|
||||
});
|
||||
|
||||
return decryptedWorkflow;
|
||||
}
|
||||
|
||||
/**
|
||||
* This class implements the main interpretation functions.
|
||||
@@ -123,6 +155,9 @@ export class WorkflowInterpreter {
|
||||
) => {
|
||||
const params = settings.params ? settings.params : null;
|
||||
delete settings.params;
|
||||
|
||||
const decryptedWorkflow = decryptWorkflow(workflow);
|
||||
|
||||
const options = {
|
||||
...settings,
|
||||
debugChannel: {
|
||||
@@ -143,7 +178,7 @@ export class WorkflowInterpreter {
|
||||
}
|
||||
}
|
||||
|
||||
const interpreter = new Interpreter(workflow, options);
|
||||
const interpreter = new Interpreter(decryptedWorkflow, options);
|
||||
this.interpreter = interpreter;
|
||||
|
||||
interpreter.on('flag', async (page, resume) => {
|
||||
@@ -212,6 +247,9 @@ export class WorkflowInterpreter {
|
||||
public InterpretRecording = async (workflow: WorkflowFile, page: Page, settings: InterpreterSettings) => {
|
||||
const params = settings.params ? settings.params : null;
|
||||
delete settings.params;
|
||||
|
||||
const decryptedWorkflow = decryptWorkflow(workflow);
|
||||
|
||||
const options = {
|
||||
...settings,
|
||||
debugChannel: {
|
||||
@@ -234,15 +272,19 @@ export class WorkflowInterpreter {
|
||||
}
|
||||
}
|
||||
|
||||
const interpreter = new Interpreter(workflow, options);
|
||||
const interpreter = new Interpreter(decryptedWorkflow, options);
|
||||
this.interpreter = interpreter;
|
||||
|
||||
const status = await interpreter.run(page, params);
|
||||
|
||||
const lastArray = this.serializableData.length > 1
|
||||
? [this.serializableData[this.serializableData.length - 1]]
|
||||
: this.serializableData;
|
||||
|
||||
const result = {
|
||||
log: this.debugMessages,
|
||||
result: status,
|
||||
serializableOutput: this.serializableData.reduce((reducedObject, item, index) => {
|
||||
serializableOutput: lastArray.reduce((reducedObject, item, index) => {
|
||||
return {
|
||||
[`item-${index}`]: item,
|
||||
...reducedObject,
|
||||
|
||||
@@ -54,6 +54,7 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
|
||||
const [showCaptureScreenshot, setShowCaptureScreenshot] = useState(true);
|
||||
const [showCaptureText, setShowCaptureText] = useState(true);
|
||||
const [hoverStates, setHoverStates] = useState<{ [id: string]: boolean }>({});
|
||||
const [browserStepIdList, setBrowserStepIdList] = useState<number[]>([]);
|
||||
|
||||
const { lastAction, notify, currentWorkflowActionsState, setCurrentWorkflowActionsState } = useGlobalInfoStore();
|
||||
const { getText, startGetText, stopGetText, getScreenshot, startGetScreenshot, stopGetScreenshot, getList, startGetList, stopGetList, startPaginationMode, stopPaginationMode, paginationType, updatePaginationType, limitType, customLimit, updateLimitType, updateCustomLimit, stopLimitMode, startLimitMode, captureStage, setCaptureStage } = useActionContext();
|
||||
@@ -195,12 +196,18 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
|
||||
const getTextSettingsObject = useCallback(() => {
|
||||
const settings: Record<string, { selector: string; tag?: string;[key: string]: any }> = {};
|
||||
browserSteps.forEach(step => {
|
||||
if (browserStepIdList.includes(step.id)) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (step.type === 'text' && step.label && step.selectorObj?.selector) {
|
||||
settings[step.label] = step.selectorObj;
|
||||
}
|
||||
setBrowserStepIdList(prevList => [...prevList, step.id]);
|
||||
});
|
||||
|
||||
return settings;
|
||||
}, [browserSteps]);
|
||||
}, [browserSteps, browserStepIdList]);
|
||||
|
||||
|
||||
const stopCaptureAndEmitGetTextSettings = useCallback(() => {
|
||||
@@ -211,6 +218,7 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
|
||||
}
|
||||
stopGetText();
|
||||
const settings = getTextSettingsObject();
|
||||
console.log("SETTINGS", settings);
|
||||
const hasTextSteps = browserSteps.some(step => step.type === 'text');
|
||||
if (hasTextSteps) {
|
||||
socket?.emit('action', { action: 'scrapeSchema', settings });
|
||||
|
||||
Reference in New Issue
Block a user