diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index de9a9845..bd90e40f 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -534,6 +534,8 @@ export default class Interpreter extends EventEmitter { case 'clickNext': const pageResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); + // console.log("Page results:", pageResults); + // Filter out already scraped items const newResults = pageResults.filter(item => { const uniqueKey = JSON.stringify(item); @@ -541,9 +543,9 @@ export default class Interpreter extends EventEmitter { scrapedItems.add(uniqueKey); // Mark as scraped return true; }); - + allResults = allResults.concat(newResults); - + if (config.limit && allResults.length >= config.limit) { return allResults.slice(0, config.limit); } @@ -553,7 +555,7 @@ export default class Interpreter extends EventEmitter { return allResults; // No more pages to scrape } await Promise.all([ - nextButton.click(), + nextButton.dispatchEvent('click'), page.waitForNavigation({ waitUntil: 'networkidle' }) ]); diff --git a/package.json b/package.json index 977daada..a7d634f3 100644 --- a/package.json +++ b/package.json @@ -36,6 +36,7 @@ "fortawesome": "^0.0.1-security", "google-auth-library": "^9.14.1", "googleapis": "^144.0.0", + "idcac-playwright": "^0.1.3", "ioredis": "^5.4.1", "joi": "^17.6.0", "jsonwebtoken": "^9.0.2", diff --git a/server/src/browser-management/classes/RemoteBrowser.ts b/server/src/browser-management/classes/RemoteBrowser.ts index 3f5b677c..0081cb5c 100644 --- a/server/src/browser-management/classes/RemoteBrowser.ts +++ b/server/src/browser-management/classes/RemoteBrowser.ts @@ -15,6 +15,7 @@ import { InterpreterSettings, RemoteBrowserOptions } from "../../types"; import { WorkflowGenerator } from "../../workflow-management/classes/Generator"; import { WorkflowInterpreter } from "../../workflow-management/classes/Interpreter"; import { getDecryptedProxyConfig } from '../../routes/proxy'; +import { getInjectableScript } from 'idcac-playwright'; chromium.use(stealthPlugin()); @@ -168,6 +169,7 @@ export class RemoteBrowser { this.currentPage.on('framenavigated', (frame) => { if (frame === this.currentPage?.mainFrame()) { + this.currentPage.evaluate(getInjectableScript()) this.socket.emit('urlChanged', this.currentPage.url()); } }); @@ -370,11 +372,12 @@ export class RemoteBrowser { await this.stopScreencast(); this.currentPage = page; - // this.currentPage.on('framenavigated', (frame) => { - // if (frame === this.currentPage?.mainFrame()) { - // this.socket.emit('urlChanged', this.currentPage.url()); - // } - // }); + this.currentPage.on('framenavigated', (frame) => { + if (frame === this.currentPage?.mainFrame()) { + this.currentPage.evaluate(getInjectableScript()); + this.socket.emit('urlChanged', this.currentPage.url()); + } + }); //await this.currentPage.setViewportSize({ height: 400, width: 900 }) this.client = await this.currentPage.context().newCDPSession(this.currentPage); @@ -404,6 +407,7 @@ export class RemoteBrowser { if (this.currentPage) { this.currentPage.on('framenavigated', (frame) => { if (frame === this.currentPage?.mainFrame()) { + this.currentPage.evaluate(getInjectableScript()); this.socket.emit('urlChanged', this.currentPage.url()); } }); diff --git a/server/src/workflow-management/selector.ts b/server/src/workflow-management/selector.ts index 193de891..f3925ec9 100644 --- a/server/src/workflow-management/selector.ts +++ b/server/src/workflow-management/selector.ts @@ -20,49 +20,6 @@ type Workflow = WorkflowFile["workflow"]; * @category WorkflowManagement-Selectors * @returns {Promise} */ -export const getRect = async (page: Page, coordinates: Coordinates) => { - try { - const rect = await page.evaluate( - async ({ x, y }) => { - const el = document.elementFromPoint(x, y) as HTMLElement; - if (el) { - const { parentElement } = el; - // Match the logic in recorder.ts for link clicks - const element = parentElement?.tagName === 'A' ? parentElement : el; - const rectangle = element?.getBoundingClientRect(); - // @ts-ignore - if (rectangle) { - return { - x: rectangle.x, - y: rectangle.y, - width: rectangle.width, - height: rectangle.height, - top: rectangle.top, - right: rectangle.right, - bottom: rectangle.bottom, - left: rectangle.left, - }; - } - } - }, - { x: coordinates.x, y: coordinates.y }, - ); - return rect; - } catch (error) { - const { message, stack } = error as Error; - logger.log('error', `Error while retrieving selector: ${message}`); - logger.log('error', `Stack: ${stack}`); - } -} - -/** - * Checks the basic info about an element and returns a {@link BaseActionInfo} object. - * If the element is not found, returns undefined. - * @param page The page instance. - * @param coordinates Coordinates of an element. - * @category WorkflowManagement-Selectors - * @returns {Promise} - */ export const getElementInformation = async ( page: Page, coordinates: Coordinates @@ -70,10 +27,38 @@ export const getElementInformation = async ( try { const elementInfo = await page.evaluate( async ({ x, y }) => { - const el = document.elementFromPoint(x, y) as HTMLElement; - if (el) { - const { parentElement } = el; - const element = parentElement?.tagName === 'A' ? parentElement : el; + const originalEl = document.elementFromPoint(x, y) as HTMLElement; + if (originalEl) { + let element = originalEl; + + if (originalEl.tagName === 'A') { + element = originalEl; + } else if (originalEl.parentElement?.tagName === 'A') { + element = originalEl.parentElement; + } else { + // Generic parent finding logic based on visual containment + while (element.parentElement) { + const parentRect = element.parentElement.getBoundingClientRect(); + const childRect = element.getBoundingClientRect(); + + // Check if parent visually contains the child + const fullyContained = + parentRect.left <= childRect.left && + parentRect.right >= childRect.right && + parentRect.top <= childRect.top && + parentRect.bottom >= childRect.bottom; + + // Additional checks for more comprehensive containment + const significantOverlap = + (childRect.width * childRect.height) / + (parentRect.width * parentRect.height) > 0.5; + + if (fullyContained && significantOverlap) { + element = element.parentElement; + } else { + break; + } + } } let info: { tagName: string; @@ -98,7 +83,7 @@ export const getElementInformation = async ( ); } - // Gather specific information based on the tag + // Existing tag-specific logic if (element?.tagName === 'A') { info.url = (element as HTMLAnchorElement).href; info.innerText = element.innerText ?? ''; @@ -112,7 +97,6 @@ export const getElementInformation = async ( info.innerHTML = element.innerHTML; info.outerHTML = element.outerHTML; - return info; } return null; @@ -127,6 +111,67 @@ export const getElementInformation = async ( } }; +export const getRect = async (page: Page, coordinates: Coordinates) => { + try { + const rect = await page.evaluate( + async ({ x, y }) => { + const originalEl = document.elementFromPoint(x, y) as HTMLElement; + if (originalEl) { + let element = originalEl; + + if (originalEl.tagName === 'A') { + element = originalEl; + } else if (originalEl.parentElement?.tagName === 'A') { + element = originalEl.parentElement; + } else { + while (element.parentElement) { + const parentRect = element.parentElement.getBoundingClientRect(); + const childRect = element.getBoundingClientRect(); + + const fullyContained = + parentRect.left <= childRect.left && + parentRect.right >= childRect.right && + parentRect.top <= childRect.top && + parentRect.bottom >= childRect.bottom; + + const significantOverlap = + (childRect.width * childRect.height) / + (parentRect.width * parentRect.height) > 0.5; + + if (fullyContained && significantOverlap) { + element = element.parentElement; + } else { + break; + } + }} + + //element = element?.parentElement?.tagName === 'A' ? element?.parentElement : element; + const rectangle = element?.getBoundingClientRect(); + + if (rectangle) { + return { + x: rectangle.x, + y: rectangle.y, + width: rectangle.width, + height: rectangle.height, + top: rectangle.top, + right: rectangle.right, + bottom: rectangle.bottom, + left: rectangle.left, + }; + } + } + }, + { x: coordinates.x, y: coordinates.y }, + ); + return rect; + } catch (error) { + const { message, stack } = error as Error; + logger.log('error', `Error while retrieving selector: ${message}`); + logger.log('error', `Stack: ${stack}`); + } +} + /** * Returns the best and unique css {@link Selectors} for the element on the page. @@ -742,7 +787,6 @@ interface SelectorResult { export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates): Promise => { try { const selectors = await page.evaluate(({ x, y }: { x: number, y: number }) => { - function getNonUniqueSelector(element: HTMLElement): string { let selector = element.tagName.toLowerCase(); @@ -774,8 +818,37 @@ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates return path.join(' > '); } - const element = document.elementFromPoint(x, y) as HTMLElement | null; - if (!element) return null; + const originalEl = document.elementFromPoint(x, y) as HTMLElement; + if (!originalEl) return null; + + let element = originalEl; + + if (originalEl.tagName === 'A') { + element = originalEl; + } else if (originalEl.parentElement?.tagName === 'A') { + element = originalEl.parentElement; + } else { + while (element.parentElement) { + const parentRect = element.parentElement.getBoundingClientRect(); + const childRect = element.getBoundingClientRect(); + + const fullyContained = + parentRect.left <= childRect.left && + parentRect.right >= childRect.right && + parentRect.top <= childRect.top && + parentRect.bottom >= childRect.bottom; + + const significantOverlap = + (childRect.width * childRect.height) / + (parentRect.width * parentRect.height) > 0.5; + + if (fullyContained && significantOverlap) { + element = element.parentElement; + } else { + break; + } + } + } const generalSelector = getSelectorPath(element); return { @@ -790,7 +863,6 @@ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates } }; - export const getChildSelectors = async (page: Page, parentSelector: string): Promise => { try { const childSelectors = await page.evaluate((parentSelector: string) => {