From 079863d015c1d3883d3d4c2154239fe419117046 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Fri, 6 Dec 2024 03:39:15 +0530 Subject: [PATCH 01/15] feat: add earliest selectors logic for page state --- maxun-core/src/interpret.ts | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index a7a5de47..06586038 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -121,6 +121,26 @@ export default class Interpreter extends EventEmitter { } } + private getPreviousSelectors(workflow: Workflow, actionId: number): string[] { + const selectors: string[] = []; + let index = actionId - 1; + + while (index >= 0) { + const previousSelectors = workflow[index]?.where?.selectors; + if (previousSelectors && previousSelectors.length > 0) { + previousSelectors.forEach((selector) => { + if (!selectors.includes(selector)) { + selectors.push(selector); // Avoid duplicates + } + }); + break; // Exit the loop once valid selectors are found + } + index--; // Move further back in the workflow + } + + return selectors; + } + /** * Returns the context object from given Page and the current workflow.\ * \ @@ -130,11 +150,11 @@ export default class Interpreter extends EventEmitter { * @param workflow Current **initialized** workflow (array of where-what pairs). * @returns {PageState} State of the current page. */ - private async getState(page: Page, workflow: Workflow): Promise { + private async getState(page: Page, workflow: Workflow, selectors: string[]): Promise { /** * All the selectors present in the current Workflow */ - const selectors = Preprocessor.extractSelectors(workflow); + // const selectors = Preprocessor.extractSelectors(workflow); /** * Determines whether the element targetted by the selector is [actionable](https://playwright.dev/docs/actionability). @@ -365,6 +385,7 @@ export default class Interpreter extends EventEmitter { console.log("MERGED results:", mergedResult); await this.options.serializableCallback(mergedResult); + // await this.options.serializableCallback(scrapeResult); }, scrapeList: async (config: { listSelector: string, fields: any, limit?: number, pagination: any }) => { @@ -550,6 +571,7 @@ export default class Interpreter extends EventEmitter { // apply ad-blocker to the current page await this.applyAdBlocker(p); const usedActions: string[] = []; + const selectors: string[] = []; let lastAction = null; let repeatCount = 0; @@ -579,7 +601,7 @@ export default class Interpreter extends EventEmitter { let pageState = {}; try { - pageState = await this.getState(p, workflow); + pageState = await this.getState(p, workflow, selectors); } catch (e: any) { this.log('The browser has been closed.'); return; @@ -615,6 +637,9 @@ export default class Interpreter extends EventEmitter { try { await this.carryOutSteps(p, action.what); usedActions.push(action.id ?? 'undefined'); + + selectors.push(...this.getPreviousSelectors(workflow, actionId)); + console.log("SELECTORS", selectors); } catch (e) { this.log(e, Level.ERROR); } From 964913775e77b31061428432fcc2093e8f6f7206 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Fri, 6 Dec 2024 03:40:58 +0530 Subject: [PATCH 02/15] fix: add on load emit urlChanged --- .../browser-management/classes/RemoteBrowser.ts | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/server/src/browser-management/classes/RemoteBrowser.ts b/server/src/browser-management/classes/RemoteBrowser.ts index 769787da..e30632c3 100644 --- a/server/src/browser-management/classes/RemoteBrowser.ts +++ b/server/src/browser-management/classes/RemoteBrowser.ts @@ -402,14 +402,14 @@ export class RemoteBrowser { await this.currentPage?.close(); this.currentPage = newPage; if (this.currentPage) { - this.currentPage.on('framenavigated', (frame) => { - if (frame === this.currentPage?.mainFrame()) { - this.socket.emit('urlChanged', this.currentPage.url()); - } - }); - // this.currentPage.on('load', (page) => { - // this.socket.emit('urlChanged', page.url()); - // }) + // this.currentPage.on('framenavigated', (frame) => { + // if (frame === this.currentPage?.mainFrame()) { + // this.socket.emit('urlChanged', this.currentPage.url()); + // } + // }); + this.currentPage.on('load', (page) => { + this.socket.emit('urlChanged', page.url()); + }) this.client = await this.currentPage.context().newCDPSession(this.currentPage); await this.subscribeToScreencast(); } else { From 0d6633130596993aded87bf71420c9374e04a13c Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Fri, 6 Dec 2024 16:57:55 +0530 Subject: [PATCH 03/15] fix: add on load socket emit --- server/src/browser-management/classes/RemoteBrowser.ts | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/server/src/browser-management/classes/RemoteBrowser.ts b/server/src/browser-management/classes/RemoteBrowser.ts index e30632c3..cfcc96f8 100644 --- a/server/src/browser-management/classes/RemoteBrowser.ts +++ b/server/src/browser-management/classes/RemoteBrowser.ts @@ -370,11 +370,11 @@ export class RemoteBrowser { await this.stopScreencast(); this.currentPage = page; - this.currentPage.on('framenavigated', (frame) => { - if (frame === this.currentPage?.mainFrame()) { - this.socket.emit('urlChanged', this.currentPage.url()); - } - }); + // this.currentPage.on('framenavigated', (frame) => { + // if (frame === this.currentPage?.mainFrame()) { + // this.socket.emit('urlChanged', this.currentPage.url()); + // } + // }); //await this.currentPage.setViewportSize({ height: 400, width: 900 }) this.client = await this.currentPage.context().newCDPSession(this.currentPage); From bffe8389889d909c4913f6d68b4085100dc3bf8c Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Fri, 6 Dec 2024 17:06:20 +0530 Subject: [PATCH 04/15] feat: add earliest selectors from workflow --- maxun-core/src/interpret.ts | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 06586038..5c24317c 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -127,6 +127,7 @@ export default class Interpreter extends EventEmitter { while (index >= 0) { const previousSelectors = workflow[index]?.where?.selectors; + console.log("Previous Selectors:", previousSelectors); if (previousSelectors && previousSelectors.length > 0) { previousSelectors.forEach((selector) => { if (!selectors.includes(selector)) { @@ -156,6 +157,8 @@ export default class Interpreter extends EventEmitter { */ // const selectors = Preprocessor.extractSelectors(workflow); + console.log("All selectors:", selectors); + /** * Determines whether the element targetted by the selector is [actionable](https://playwright.dev/docs/actionability). * @param selector Selector to be queried @@ -164,8 +167,8 @@ export default class Interpreter extends EventEmitter { const actionable = async (selector: string): Promise => { try { const proms = [ - page.isEnabled(selector, { timeout: 500 }), - page.isVisible(selector, { timeout: 500 }), + page.isEnabled(selector, { timeout: 2000 }), + page.isVisible(selector, { timeout: 2000 }), ]; return await Promise.all(proms).then((bools) => bools.every((x) => x)); @@ -627,19 +630,26 @@ export default class Interpreter extends EventEmitter { if (this.options.debugChannel?.activeId) { this.options.debugChannel.activeId(actionId); } - + repeatCount = action === lastAction ? repeatCount + 1 : 0; - if (this.options.maxRepeats && repeatCount >= this.options.maxRepeats) { + + console.log("REPEAT COUNT", repeatCount); + if (this.options.maxRepeats && repeatCount > this.options.maxRepeats) { return; } lastAction = action; - + try { + console.log("Carrying out:", action.what); await this.carryOutSteps(p, action.what); usedActions.push(action.id ?? 'undefined'); - selectors.push(...this.getPreviousSelectors(workflow, actionId)); - console.log("SELECTORS", selectors); + const newSelectors = this.getPreviousSelectors(workflow, actionId); + newSelectors.forEach(selector => { + if (!selectors.includes(selector)) { + selectors.push(selector); + } + }); } catch (e) { this.log(e, Level.ERROR); } From 0ee50e1c26eaad4326135c76117ebe479e1606ed Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Fri, 6 Dec 2024 22:10:28 +0530 Subject: [PATCH 05/15] feat: add bottom up workflow traversal --- maxun-core/src/interpret.ts | 107 +++++++++++++++++++++++++++--------- 1 file changed, 82 insertions(+), 25 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 5c24317c..2457f79b 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -121,27 +121,55 @@ export default class Interpreter extends EventEmitter { } } - private getPreviousSelectors(workflow: Workflow, actionId: number): string[] { - const selectors: string[] = []; - let index = actionId - 1; + // private getPreviousSelectors(workflow: Workflow, actionId: number): string[] { + // const selectors: string[] = []; + // let index = actionId - 1; - while (index >= 0) { - const previousSelectors = workflow[index]?.where?.selectors; - console.log("Previous Selectors:", previousSelectors); - if (previousSelectors && previousSelectors.length > 0) { - previousSelectors.forEach((selector) => { + // while (index >= 0) { + // const previousSelectors = workflow[index]?.where?.selectors; + // console.log("Previous Selectors:", previousSelectors); + // if (previousSelectors && previousSelectors.length > 0) { + // previousSelectors.forEach((selector) => { + // if (!selectors.includes(selector)) { + // selectors.push(selector); // Avoid duplicates + // } + // }); + // break; // Exit the loop once valid selectors are found + // } + // index--; // Move further back in the workflow + // } + + // return selectors; + // } + + private getSelectors(workflow: Workflow, actionId: number): string[] { + const selectors: string[] = []; + + // Validate actionId + if (actionId <= 0) { + console.log("No previous selectors to collect."); + return selectors; // Empty array as there are no previous steps + } + + // Iterate from the start up to (but not including) actionId + for (let index = 0; index < actionId; index++) { + const currentSelectors = workflow[index]?.where?.selectors; + console.log(`Selectors at step ${index}:`, currentSelectors); + + if (currentSelectors && currentSelectors.length > 0) { + currentSelectors.forEach((selector) => { if (!selectors.includes(selector)) { selectors.push(selector); // Avoid duplicates } }); - break; // Exit the loop once valid selectors are found } - index--; // Move further back in the workflow } + console.log("Collected Selectors:", selectors); return selectors; } + /** * Returns the context object from given Page and the current workflow.\ * \ @@ -167,8 +195,8 @@ export default class Interpreter extends EventEmitter { const actionable = async (selector: string): Promise => { try { const proms = [ - page.isEnabled(selector, { timeout: 2000 }), - page.isVisible(selector, { timeout: 2000 }), + page.isEnabled(selector, { timeout: 500 }), + page.isVisible(selector, { timeout: 500 }), ]; return await Promise.all(proms).then((bools) => bools.every((x) => x)); @@ -198,7 +226,7 @@ export default class Interpreter extends EventEmitter { ...p, [cookie.name]: cookie.value, }), {}), - selectors: presentSelectors, + selectors: selectors, }; } @@ -570,11 +598,29 @@ export default class Interpreter extends EventEmitter { return allResults; } + private getMatchingActionId(workflow: Workflow, pageState: PageState, usedActions: string[]) { + for (let actionId = workflow.length - 1; actionId >= 0; actionId--) { + const step = workflow[actionId]; + const isApplicable = this.applicable(step.where, pageState, usedActions); + console.log("-------------------------------------------------------------"); + console.log(`Where:`, step.where); + console.log(`Page state:`, pageState); + console.log(`Match result: ${isApplicable}`); + console.log("-------------------------------------------------------------"); + + if (isApplicable) { + return actionId; + } + } + } + private async runLoop(p: Page, workflow: Workflow) { + const workflowCopy: Workflow = JSON.parse(JSON.stringify(workflow)); + // apply ad-blocker to the current page await this.applyAdBlocker(p); const usedActions: string[] = []; - const selectors: string[] = []; + let selectors: string[] = []; let lastAction = null; let repeatCount = 0; @@ -584,7 +630,7 @@ export default class Interpreter extends EventEmitter { * e.g. via `enqueueLinks`. */ p.on('popup', (popup) => { - this.concurrency.addJob(() => this.runLoop(popup, workflow)); + this.concurrency.addJob(() => this.runLoop(popup, workflowCopy)); }); /* eslint no-constant-condition: ["warn", { "checkLoops": false }] */ @@ -604,7 +650,8 @@ export default class Interpreter extends EventEmitter { let pageState = {}; try { - pageState = await this.getState(p, workflow, selectors); + pageState = await this.getState(p, workflowCopy, selectors); + selectors = []; } catch (e: any) { this.log('The browser has been closed.'); return; @@ -614,16 +661,22 @@ export default class Interpreter extends EventEmitter { this.log(`Current state is: \n${JSON.stringify(pageState, null, 2)}`, Level.WARN); } - const actionId = workflow.findIndex((step) => { - const isApplicable = this.applicable(step.where, pageState, usedActions); - console.log(`Where:`, step.where); - console.log(`Page state:`, pageState); - console.log(`Match result: ${isApplicable}`); - return isApplicable; - }); + // const actionId = workflow.findIndex((step) => { + // const isApplicable = this.applicable(step.where, pageState, usedActions); + // console.log("-------------------------------------------------------------"); + // console.log(`Where:`, step.where); + // console.log(`Page state:`, pageState); + // console.log(`Match result: ${isApplicable}`); + // console.log("-------------------------------------------------------------"); + // return isApplicable; + // }); - const action = workflow[actionId]; + const actionId = this.getMatchingActionId(workflowCopy, pageState, usedActions); + const action = workflowCopy[actionId]; + + console.log("MATCHED ACTION:", action); + console.log("MATCHED ACTION ID:", actionId); this.log(`Matched ${JSON.stringify(action?.where)}`, Level.LOG); if (action) { // action is matched @@ -643,8 +696,12 @@ export default class Interpreter extends EventEmitter { console.log("Carrying out:", action.what); await this.carryOutSteps(p, action.what); usedActions.push(action.id ?? 'undefined'); + + workflowCopy.splice(actionId, 1); + console.log(`Action with ID ${action.id} removed from the workflow copy.`); - const newSelectors = this.getPreviousSelectors(workflow, actionId); + // const newSelectors = this.getPreviousSelectors(workflow, actionId); + const newSelectors = this.getSelectors(workflowCopy, actionId); newSelectors.forEach(selector => { if (!selectors.includes(selector)) { selectors.push(selector); From d6be2683fdfc8863db235b6c679cd93eca15af45 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Sat, 7 Dec 2024 21:16:58 +0530 Subject: [PATCH 06/15] feat: add check to match action url and return --- maxun-core/src/interpret.ts | 63 ++++++++++++++++++++++--------------- 1 file changed, 37 insertions(+), 26 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 2457f79b..fce67257 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -179,54 +179,62 @@ export default class Interpreter extends EventEmitter { * @param workflow Current **initialized** workflow (array of where-what pairs). * @returns {PageState} State of the current page. */ - private async getState(page: Page, workflow: Workflow, selectors: string[]): Promise { + private async getState(page: Page, workflowCopy: Workflow, selectors: string[]): Promise { /** * All the selectors present in the current Workflow */ // const selectors = Preprocessor.extractSelectors(workflow); - - console.log("All selectors:", selectors); + // console.log("Current selectors:", selectors); /** * Determines whether the element targetted by the selector is [actionable](https://playwright.dev/docs/actionability). * @param selector Selector to be queried * @returns True if the targetted element is actionable, false otherwise. */ - const actionable = async (selector: string): Promise => { - try { - const proms = [ - page.isEnabled(selector, { timeout: 500 }), - page.isVisible(selector, { timeout: 500 }), - ]; + // const actionable = async (selector: string): Promise => { + // try { + // const proms = [ + // page.isEnabled(selector, { timeout: 5000 }), + // page.isVisible(selector, { timeout: 5000 }), + // ]; - return await Promise.all(proms).then((bools) => bools.every((x) => x)); - } catch (e) { - // log(e, Level.ERROR); - return false; - } - }; + // return await Promise.all(proms).then((bools) => bools.every((x) => x)); + // } catch (e) { + // // log(e, Level.ERROR); + // return false; + // } + // }; /** * Object of selectors present in the current page. */ - const presentSelectors: SelectorArray = await Promise.all( - selectors.map(async (selector) => { - if (await actionable(selector)) { - return [selector]; - } - return []; - }), - ).then((x) => x.flat()); + // const presentSelectors: SelectorArray = await Promise.all( + // selectors.map(async (selector) => { + // if (await actionable(selector)) { + // return [selector]; + // } + // return []; + // }), + // ).then((x) => x.flat()); + const action = workflowCopy[workflowCopy.length - 1]; + + console.log("Next action:", action) + + let url: any = page.url(); + + if (action && action.where.url !== url && action.where.url !== "about:blank") { + url = action.where.url; + } return { - url: page.url(), + url, cookies: (await page.context().cookies([page.url()])) .reduce((p, cookie) => ( { ...p, [cookie.name]: cookie.value, }), {}), - selectors: selectors, + selectors, }; } @@ -622,6 +630,7 @@ export default class Interpreter extends EventEmitter { const usedActions: string[] = []; let selectors: string[] = []; let lastAction = null; + let actionId = -1 let repeatCount = 0; /** @@ -649,9 +658,11 @@ export default class Interpreter extends EventEmitter { } let pageState = {}; + let getStateTest = "Hello"; try { pageState = await this.getState(p, workflowCopy, selectors); selectors = []; + console.log("Empty selectors:", selectors) } catch (e: any) { this.log('The browser has been closed.'); return; @@ -671,7 +682,7 @@ export default class Interpreter extends EventEmitter { // return isApplicable; // }); - const actionId = this.getMatchingActionId(workflowCopy, pageState, usedActions); + actionId = this.getMatchingActionId(workflowCopy, pageState, usedActions); const action = workflowCopy[actionId]; From db37c72ce5b8a97f2fa9be6cd12c3b3f0bb07d62 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Sat, 7 Dec 2024 21:18:37 +0530 Subject: [PATCH 07/15] fix: add goto frame navigation --- server/src/browser-management/classes/RemoteBrowser.ts | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/server/src/browser-management/classes/RemoteBrowser.ts b/server/src/browser-management/classes/RemoteBrowser.ts index cfcc96f8..f1d18f3f 100644 --- a/server/src/browser-management/classes/RemoteBrowser.ts +++ b/server/src/browser-management/classes/RemoteBrowser.ts @@ -402,11 +402,11 @@ export class RemoteBrowser { await this.currentPage?.close(); this.currentPage = newPage; if (this.currentPage) { - // this.currentPage.on('framenavigated', (frame) => { - // if (frame === this.currentPage?.mainFrame()) { - // this.socket.emit('urlChanged', this.currentPage.url()); - // } - // }); + this.currentPage.on('framenavigated', (frame) => { + if (frame === this.currentPage?.mainFrame()) { + this.socket.emit('urlChanged', this.currentPage.url()); + } + }); this.currentPage.on('load', (page) => { this.socket.emit('urlChanged', page.url()); }) From a8e8c1de82b0ff47add118b8ab74377d4076f42c Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Sat, 7 Dec 2024 21:53:55 +0530 Subject: [PATCH 08/15] fix: rm about:blank url check for action --- maxun-core/src/interpret.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index fce67257..c2a1186c 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -222,7 +222,7 @@ export default class Interpreter extends EventEmitter { let url: any = page.url(); - if (action && action.where.url !== url && action.where.url !== "about:blank") { + if (action && action.where.url !== url) { url = action.where.url; } From 342fd79588e14a5c9b66a11329b9a06d3c4980b6 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Sat, 7 Dec 2024 22:34:02 +0530 Subject: [PATCH 09/15] feat: add bottom up get selectors logic --- maxun-core/src/interpret.ts | 31 +++++++++++-------------------- 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index c2a1186c..844b46c7 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -142,31 +142,23 @@ export default class Interpreter extends EventEmitter { // return selectors; // } - private getSelectors(workflow: Workflow, actionId: number): string[] { - const selectors: string[] = []; + private getSelectors(workflow: Workflow): string[] { + const selectorsSet = new Set(); - // Validate actionId - if (actionId <= 0) { - console.log("No previous selectors to collect."); - return selectors; // Empty array as there are no previous steps + if (workflow.length === 0) { + return []; } - // Iterate from the start up to (but not including) actionId - for (let index = 0; index < actionId; index++) { + for (let index = workflow.length - 1; index >= 0; index--) { const currentSelectors = workflow[index]?.where?.selectors; - console.log(`Selectors at step ${index}:`, currentSelectors); if (currentSelectors && currentSelectors.length > 0) { - currentSelectors.forEach((selector) => { - if (!selectors.includes(selector)) { - selectors.push(selector); // Avoid duplicates - } - }); + currentSelectors.forEach((selector) => selectorsSet.add(selector)); + return Array.from(selectorsSet); } } - console.log("Collected Selectors:", selectors); - return selectors; + return []; } @@ -216,9 +208,8 @@ export default class Interpreter extends EventEmitter { // return []; // }), // ).then((x) => x.flat()); - const action = workflowCopy[workflowCopy.length - 1]; - console.log("Next action:", action) + const action = workflowCopy[workflowCopy.length - 1]; let url: any = page.url(); @@ -709,10 +700,10 @@ export default class Interpreter extends EventEmitter { usedActions.push(action.id ?? 'undefined'); workflowCopy.splice(actionId, 1); - console.log(`Action with ID ${action.id} removed from the workflow copy.`); + console.log(`Action with ID ${actionId} removed from the workflow copy.`); // const newSelectors = this.getPreviousSelectors(workflow, actionId); - const newSelectors = this.getSelectors(workflowCopy, actionId); + const newSelectors = this.getSelectors(workflowCopy); newSelectors.forEach(selector => { if (!selectors.includes(selector)) { selectors.push(selector); From be6d8ab249d6486e7af9266dd43b3bab53d21012 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Sun, 8 Dec 2024 18:04:49 +0530 Subject: [PATCH 10/15] feat: add selectors in bottom up order --- maxun-core/src/interpret.ts | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 844b46c7..de9a9845 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -121,24 +121,30 @@ export default class Interpreter extends EventEmitter { } } - // private getPreviousSelectors(workflow: Workflow, actionId: number): string[] { + // private getSelectors(workflow: Workflow, actionId: number): string[] { // const selectors: string[] = []; - // let index = actionId - 1; - // while (index >= 0) { - // const previousSelectors = workflow[index]?.where?.selectors; - // console.log("Previous Selectors:", previousSelectors); - // if (previousSelectors && previousSelectors.length > 0) { - // previousSelectors.forEach((selector) => { + // // Validate actionId + // if (actionId <= 0) { + // console.log("No previous selectors to collect."); + // return selectors; // Empty array as there are no previous steps + // } + + // // Iterate from the start up to (but not including) actionId + // for (let index = 0; index < actionId; index++) { + // const currentSelectors = workflow[index]?.where?.selectors; + // console.log(`Selectors at step ${index}:`, currentSelectors); + + // if (currentSelectors && currentSelectors.length > 0) { + // currentSelectors.forEach((selector) => { // if (!selectors.includes(selector)) { // selectors.push(selector); // Avoid duplicates // } // }); - // break; // Exit the loop once valid selectors are found // } - // index--; // Move further back in the workflow // } + // console.log("Collected Selectors:", selectors); // return selectors; // } @@ -208,12 +214,14 @@ export default class Interpreter extends EventEmitter { // return []; // }), // ).then((x) => x.flat()); - + const action = workflowCopy[workflowCopy.length - 1]; + // console.log("Next action:", action) + let url: any = page.url(); - if (action && action.where.url !== url) { + if (action && action.where.url !== url && action.where.url !== "about:blank") { url = action.where.url; } @@ -700,7 +708,7 @@ export default class Interpreter extends EventEmitter { usedActions.push(action.id ?? 'undefined'); workflowCopy.splice(actionId, 1); - console.log(`Action with ID ${actionId} removed from the workflow copy.`); + console.log(`Action with ID ${action.id} removed from the workflow copy.`); // const newSelectors = this.getPreviousSelectors(workflow, actionId); const newSelectors = this.getSelectors(workflowCopy); From 5259e3e386787267662755dd02ea9fc7c71d56b4 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Sun, 8 Dec 2024 18:06:57 +0530 Subject: [PATCH 11/15] feat: add on flag logic for InterpretRecording --- .../classes/Interpreter.ts | 26 +++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/server/src/workflow-management/classes/Interpreter.ts b/server/src/workflow-management/classes/Interpreter.ts index d53259b7..b982b172 100644 --- a/server/src/workflow-management/classes/Interpreter.ts +++ b/server/src/workflow-management/classes/Interpreter.ts @@ -244,7 +244,12 @@ export class WorkflowInterpreter { * @param page The page instance used to interact with the browser. * @param settings The settings to use for the interpretation. */ - public InterpretRecording = async (workflow: WorkflowFile, page: Page, settings: InterpreterSettings) => { + public InterpretRecording = async ( + workflow: WorkflowFile, + page: Page, + updatePageOnPause: (page: Page) => void, + settings: InterpreterSettings + ) => { const params = settings.params ? settings.params : null; delete settings.params; @@ -262,7 +267,7 @@ export class WorkflowInterpreter { this.socket.emit('debugMessage', msg) }, }, - serializableCallback: (data: string) => { + serializableCallback: (data: any) => { this.serializableData.push(data); this.socket.emit('serializableCallback', data); }, @@ -275,6 +280,23 @@ export class WorkflowInterpreter { const interpreter = new Interpreter(decryptedWorkflow, options); this.interpreter = interpreter; + interpreter.on('flag', async (page, resume) => { + if (this.activeId !== null && this.breakpoints[this.activeId]) { + logger.log('debug', `breakpoint hit id: ${this.activeId}`); + this.socket.emit('breakpointHit'); + this.interpretationIsPaused = true; + } + + if (this.interpretationIsPaused) { + this.interpretationResume = resume; + logger.log('debug', `Paused inside of flag: ${page.url()}`); + updatePageOnPause(page); + this.socket.emit('log', '----- The interpretation has been paused -----', false); + } else { + resume(); + } + }); + const status = await interpreter.run(page, params); const lastArray = this.serializableData.length > 1 From aec65d1b2232fef9a2c70ac3b43e0001dbbafe42 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Sun, 8 Dec 2024 18:07:40 +0530 Subject: [PATCH 12/15] feat: add flag generation logic --- server/src/api/record.ts | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/server/src/api/record.ts b/server/src/api/record.ts index 5b33b12f..05560487 100644 --- a/server/src/api/record.ts +++ b/server/src/api/record.ts @@ -15,6 +15,8 @@ import { io, Socket } from "socket.io-client"; import { BinaryOutputService } from "../storage/mino"; import { AuthenticatedRequest } from "../routes/record" import {capture} from "../utils/analytics"; +import { Page } from "playwright"; +import { WorkflowFile } from "maxun-core"; chromium.use(stealthPlugin()); const formatRecording = (recordingData: any) => { @@ -533,6 +535,17 @@ function resetRecordingState(browserId: string, id: string) { id = ''; } +function AddGeneratedFlags(workflow: WorkflowFile) { + const copy = JSON.parse(JSON.stringify(workflow)); + for (let i = 0; i < workflow.workflow.length; i++) { + copy.workflow[i].what.unshift({ + action: 'flag', + args: ['generated'], + }); + } + return copy; +}; + async function executeRun(id: string) { try { const run = await Run.findOne({ where: { runId: id } }); @@ -560,13 +573,14 @@ async function executeRun(id: string) { throw new Error('Could not access browser'); } - const currentPage = await browser.getCurrentPage(); + let currentPage = await browser.getCurrentPage(); if (!currentPage) { throw new Error('Could not create a new page'); } + const workflow = AddGeneratedFlags(recording.recording); const interpretationInfo = await browser.interpreter.InterpretRecording( - recording.recording, currentPage, plainRun.interpreterSettings + workflow, currentPage, (newPage: Page) => currentPage = newPage, plainRun.interpreterSettings ); const binaryOutputService = new BinaryOutputService('maxun-run-screenshots'); From 0a81292bea115446c9323bc6df5647b1b19f684d Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Sun, 8 Dec 2024 18:08:05 +0530 Subject: [PATCH 13/15] feat: add flag generation logic --- server/src/routes/storage.ts | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/server/src/routes/storage.ts b/server/src/routes/storage.ts index d1f648f8..ddadf240 100644 --- a/server/src/routes/storage.ts +++ b/server/src/routes/storage.ts @@ -18,6 +18,8 @@ import { AuthenticatedRequest } from './record'; import { computeNextRun } from '../utils/schedule'; import { capture } from "../utils/analytics"; import { tryCatch } from 'bullmq'; +import { WorkflowFile } from 'maxun-core'; +import { Page } from 'playwright'; chromium.use(stealthPlugin()); export const router = Router(); @@ -422,6 +424,17 @@ router.get('/runs/run/:id', requireSignIn, async (req, res) => { } }); +function AddGeneratedFlags(workflow: WorkflowFile) { + const copy = JSON.parse(JSON.stringify(workflow)); + for (let i = 0; i < workflow.workflow.length; i++) { + copy.workflow[i].what.unshift({ + action: 'flag', + args: ['generated'], + }); + } + return copy; +}; + /** * PUT endpoint for finishing a run and saving it to the storage. */ @@ -443,10 +456,11 @@ router.post('/runs/run/:id', requireSignIn, async (req: AuthenticatedRequest, re // interpret the run in active browser const browser = browserPool.getRemoteBrowser(plainRun.browserId); - const currentPage = browser?.getCurrentPage(); + let currentPage = browser?.getCurrentPage(); if (browser && currentPage) { + const workflow = AddGeneratedFlags(recording.recording); const interpretationInfo = await browser.interpreter.InterpretRecording( - recording.recording, currentPage, plainRun.interpreterSettings); + workflow, currentPage, (newPage: Page) => currentPage = newPage, plainRun.interpreterSettings); const binaryOutputService = new BinaryOutputService('maxun-run-screenshots'); const uploadedBinaryOutput = await binaryOutputService.uploadAndStoreBinaryOutput(run, interpretationInfo.binaryOutput); await destroyRemoteBrowser(plainRun.browserId); From 45f0c819ea05831ccc0fe20781adfebc39b48f47 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Sun, 8 Dec 2024 18:10:53 +0530 Subject: [PATCH 14/15] feat: add flag generation logic --- .../workflow-management/scheduler/index.ts | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/server/src/workflow-management/scheduler/index.ts b/server/src/workflow-management/scheduler/index.ts index 02ca905f..169b0061 100644 --- a/server/src/workflow-management/scheduler/index.ts +++ b/server/src/workflow-management/scheduler/index.ts @@ -11,6 +11,8 @@ import Run from "../../models/Run"; import { getDecryptedProxyConfig } from "../../routes/proxy"; import { BinaryOutputService } from "../../storage/mino"; import { capture } from "../../utils/analytics"; +import { WorkflowFile } from "maxun-core"; +import { Page } from "playwright"; chromium.use(stealthPlugin()); async function createWorkflowAndStoreMetadata(id: string, userId: string) { @@ -79,6 +81,17 @@ async function createWorkflowAndStoreMetadata(id: string, userId: string) { } } +function AddGeneratedFlags(workflow: WorkflowFile) { + const copy = JSON.parse(JSON.stringify(workflow)); + for (let i = 0; i < workflow.workflow.length; i++) { + copy.workflow[i].what.unshift({ + action: 'flag', + args: ['generated'], + }); + } + return copy; +}; + async function executeRun(id: string) { try { const run = await Run.findOne({ where: { runId: id } }); @@ -106,13 +119,15 @@ async function executeRun(id: string) { throw new Error('Could not access browser'); } - const currentPage = await browser.getCurrentPage(); + let currentPage = await browser.getCurrentPage(); if (!currentPage) { throw new Error('Could not create a new page'); } + const workflow = AddGeneratedFlags(recording.recording); const interpretationInfo = await browser.interpreter.InterpretRecording( - recording.recording, currentPage, plainRun.interpreterSettings); + workflow, currentPage, (newPage: Page) => currentPage = newPage, plainRun.interpreterSettings + ); const binaryOutputService = new BinaryOutputService('maxun-run-screenshots'); const uploadedBinaryOutput = await binaryOutputService.uploadAndStoreBinaryOutput(run, interpretationInfo.binaryOutput); From a30211de6d3ed309a5477a44091b19a4f72a9dd0 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Sun, 8 Dec 2024 18:55:19 +0530 Subject: [PATCH 15/15] fix: add frame navigation logic in place of load --- server/src/browser-management/classes/RemoteBrowser.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/server/src/browser-management/classes/RemoteBrowser.ts b/server/src/browser-management/classes/RemoteBrowser.ts index f1d18f3f..3f5b677c 100644 --- a/server/src/browser-management/classes/RemoteBrowser.ts +++ b/server/src/browser-management/classes/RemoteBrowser.ts @@ -407,9 +407,9 @@ export class RemoteBrowser { this.socket.emit('urlChanged', this.currentPage.url()); } }); - this.currentPage.on('load', (page) => { - this.socket.emit('urlChanged', page.url()); - }) + // this.currentPage.on('load', (page) => { + // this.socket.emit('urlChanged', page.url()); + // }) this.client = await this.currentPage.context().newCDPSession(this.currentPage); await this.subscribeToScreencast(); } else {