Merge pull request #235 from getmaxun/rect-improve
feat: rect dom depth information
This commit is contained in:
@@ -534,6 +534,8 @@ export default class Interpreter extends EventEmitter {
|
||||
case 'clickNext':
|
||||
const pageResults = await page.evaluate((cfg) => window.scrapeList(cfg), config);
|
||||
|
||||
// console.log("Page results:", pageResults);
|
||||
|
||||
// Filter out already scraped items
|
||||
const newResults = pageResults.filter(item => {
|
||||
const uniqueKey = JSON.stringify(item);
|
||||
@@ -541,9 +543,9 @@ export default class Interpreter extends EventEmitter {
|
||||
scrapedItems.add(uniqueKey); // Mark as scraped
|
||||
return true;
|
||||
});
|
||||
|
||||
|
||||
allResults = allResults.concat(newResults);
|
||||
|
||||
|
||||
if (config.limit && allResults.length >= config.limit) {
|
||||
return allResults.slice(0, config.limit);
|
||||
}
|
||||
@@ -553,7 +555,7 @@ export default class Interpreter extends EventEmitter {
|
||||
return allResults; // No more pages to scrape
|
||||
}
|
||||
await Promise.all([
|
||||
nextButton.click(),
|
||||
nextButton.dispatchEvent('click'),
|
||||
page.waitForNavigation({ waitUntil: 'networkidle' })
|
||||
]);
|
||||
|
||||
|
||||
@@ -36,6 +36,7 @@
|
||||
"fortawesome": "^0.0.1-security",
|
||||
"google-auth-library": "^9.14.1",
|
||||
"googleapis": "^144.0.0",
|
||||
"idcac-playwright": "^0.1.3",
|
||||
"ioredis": "^5.4.1",
|
||||
"joi": "^17.6.0",
|
||||
"jsonwebtoken": "^9.0.2",
|
||||
|
||||
@@ -15,6 +15,7 @@ import { InterpreterSettings, RemoteBrowserOptions } from "../../types";
|
||||
import { WorkflowGenerator } from "../../workflow-management/classes/Generator";
|
||||
import { WorkflowInterpreter } from "../../workflow-management/classes/Interpreter";
|
||||
import { getDecryptedProxyConfig } from '../../routes/proxy';
|
||||
import { getInjectableScript } from 'idcac-playwright';
|
||||
chromium.use(stealthPlugin());
|
||||
|
||||
|
||||
@@ -168,6 +169,7 @@ export class RemoteBrowser {
|
||||
|
||||
this.currentPage.on('framenavigated', (frame) => {
|
||||
if (frame === this.currentPage?.mainFrame()) {
|
||||
this.currentPage.evaluate(getInjectableScript())
|
||||
this.socket.emit('urlChanged', this.currentPage.url());
|
||||
}
|
||||
});
|
||||
@@ -370,11 +372,12 @@ export class RemoteBrowser {
|
||||
await this.stopScreencast();
|
||||
this.currentPage = page;
|
||||
|
||||
// this.currentPage.on('framenavigated', (frame) => {
|
||||
// if (frame === this.currentPage?.mainFrame()) {
|
||||
// this.socket.emit('urlChanged', this.currentPage.url());
|
||||
// }
|
||||
// });
|
||||
this.currentPage.on('framenavigated', (frame) => {
|
||||
if (frame === this.currentPage?.mainFrame()) {
|
||||
this.currentPage.evaluate(getInjectableScript());
|
||||
this.socket.emit('urlChanged', this.currentPage.url());
|
||||
}
|
||||
});
|
||||
|
||||
//await this.currentPage.setViewportSize({ height: 400, width: 900 })
|
||||
this.client = await this.currentPage.context().newCDPSession(this.currentPage);
|
||||
@@ -404,6 +407,7 @@ export class RemoteBrowser {
|
||||
if (this.currentPage) {
|
||||
this.currentPage.on('framenavigated', (frame) => {
|
||||
if (frame === this.currentPage?.mainFrame()) {
|
||||
this.currentPage.evaluate(getInjectableScript());
|
||||
this.socket.emit('urlChanged', this.currentPage.url());
|
||||
}
|
||||
});
|
||||
|
||||
@@ -20,49 +20,6 @@ type Workflow = WorkflowFile["workflow"];
|
||||
* @category WorkflowManagement-Selectors
|
||||
* @returns {Promise<Rectangle|undefined|null>}
|
||||
*/
|
||||
export const getRect = async (page: Page, coordinates: Coordinates) => {
|
||||
try {
|
||||
const rect = await page.evaluate(
|
||||
async ({ x, y }) => {
|
||||
const el = document.elementFromPoint(x, y) as HTMLElement;
|
||||
if (el) {
|
||||
const { parentElement } = el;
|
||||
// Match the logic in recorder.ts for link clicks
|
||||
const element = parentElement?.tagName === 'A' ? parentElement : el;
|
||||
const rectangle = element?.getBoundingClientRect();
|
||||
// @ts-ignore
|
||||
if (rectangle) {
|
||||
return {
|
||||
x: rectangle.x,
|
||||
y: rectangle.y,
|
||||
width: rectangle.width,
|
||||
height: rectangle.height,
|
||||
top: rectangle.top,
|
||||
right: rectangle.right,
|
||||
bottom: rectangle.bottom,
|
||||
left: rectangle.left,
|
||||
};
|
||||
}
|
||||
}
|
||||
},
|
||||
{ x: coordinates.x, y: coordinates.y },
|
||||
);
|
||||
return rect;
|
||||
} catch (error) {
|
||||
const { message, stack } = error as Error;
|
||||
logger.log('error', `Error while retrieving selector: ${message}`);
|
||||
logger.log('error', `Stack: ${stack}`);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Checks the basic info about an element and returns a {@link BaseActionInfo} object.
|
||||
* If the element is not found, returns undefined.
|
||||
* @param page The page instance.
|
||||
* @param coordinates Coordinates of an element.
|
||||
* @category WorkflowManagement-Selectors
|
||||
* @returns {Promise<BaseActionInfo|undefined>}
|
||||
*/
|
||||
export const getElementInformation = async (
|
||||
page: Page,
|
||||
coordinates: Coordinates
|
||||
@@ -70,10 +27,38 @@ export const getElementInformation = async (
|
||||
try {
|
||||
const elementInfo = await page.evaluate(
|
||||
async ({ x, y }) => {
|
||||
const el = document.elementFromPoint(x, y) as HTMLElement;
|
||||
if (el) {
|
||||
const { parentElement } = el;
|
||||
const element = parentElement?.tagName === 'A' ? parentElement : el;
|
||||
const originalEl = document.elementFromPoint(x, y) as HTMLElement;
|
||||
if (originalEl) {
|
||||
let element = originalEl;
|
||||
|
||||
if (originalEl.tagName === 'A') {
|
||||
element = originalEl;
|
||||
} else if (originalEl.parentElement?.tagName === 'A') {
|
||||
element = originalEl.parentElement;
|
||||
} else {
|
||||
// Generic parent finding logic based on visual containment
|
||||
while (element.parentElement) {
|
||||
const parentRect = element.parentElement.getBoundingClientRect();
|
||||
const childRect = element.getBoundingClientRect();
|
||||
|
||||
// Check if parent visually contains the child
|
||||
const fullyContained =
|
||||
parentRect.left <= childRect.left &&
|
||||
parentRect.right >= childRect.right &&
|
||||
parentRect.top <= childRect.top &&
|
||||
parentRect.bottom >= childRect.bottom;
|
||||
|
||||
// Additional checks for more comprehensive containment
|
||||
const significantOverlap =
|
||||
(childRect.width * childRect.height) /
|
||||
(parentRect.width * parentRect.height) > 0.5;
|
||||
|
||||
if (fullyContained && significantOverlap) {
|
||||
element = element.parentElement;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
} }
|
||||
|
||||
let info: {
|
||||
tagName: string;
|
||||
@@ -98,7 +83,7 @@ export const getElementInformation = async (
|
||||
);
|
||||
}
|
||||
|
||||
// Gather specific information based on the tag
|
||||
// Existing tag-specific logic
|
||||
if (element?.tagName === 'A') {
|
||||
info.url = (element as HTMLAnchorElement).href;
|
||||
info.innerText = element.innerText ?? '';
|
||||
@@ -112,7 +97,6 @@ export const getElementInformation = async (
|
||||
|
||||
info.innerHTML = element.innerHTML;
|
||||
info.outerHTML = element.outerHTML;
|
||||
|
||||
return info;
|
||||
}
|
||||
return null;
|
||||
@@ -127,6 +111,67 @@ export const getElementInformation = async (
|
||||
}
|
||||
};
|
||||
|
||||
export const getRect = async (page: Page, coordinates: Coordinates) => {
|
||||
try {
|
||||
const rect = await page.evaluate(
|
||||
async ({ x, y }) => {
|
||||
const originalEl = document.elementFromPoint(x, y) as HTMLElement;
|
||||
if (originalEl) {
|
||||
let element = originalEl;
|
||||
|
||||
if (originalEl.tagName === 'A') {
|
||||
element = originalEl;
|
||||
} else if (originalEl.parentElement?.tagName === 'A') {
|
||||
element = originalEl.parentElement;
|
||||
} else {
|
||||
while (element.parentElement) {
|
||||
const parentRect = element.parentElement.getBoundingClientRect();
|
||||
const childRect = element.getBoundingClientRect();
|
||||
|
||||
const fullyContained =
|
||||
parentRect.left <= childRect.left &&
|
||||
parentRect.right >= childRect.right &&
|
||||
parentRect.top <= childRect.top &&
|
||||
parentRect.bottom >= childRect.bottom;
|
||||
|
||||
const significantOverlap =
|
||||
(childRect.width * childRect.height) /
|
||||
(parentRect.width * parentRect.height) > 0.5;
|
||||
|
||||
if (fullyContained && significantOverlap) {
|
||||
element = element.parentElement;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}}
|
||||
|
||||
//element = element?.parentElement?.tagName === 'A' ? element?.parentElement : element;
|
||||
const rectangle = element?.getBoundingClientRect();
|
||||
|
||||
if (rectangle) {
|
||||
return {
|
||||
x: rectangle.x,
|
||||
y: rectangle.y,
|
||||
width: rectangle.width,
|
||||
height: rectangle.height,
|
||||
top: rectangle.top,
|
||||
right: rectangle.right,
|
||||
bottom: rectangle.bottom,
|
||||
left: rectangle.left,
|
||||
};
|
||||
}
|
||||
}
|
||||
},
|
||||
{ x: coordinates.x, y: coordinates.y },
|
||||
);
|
||||
return rect;
|
||||
} catch (error) {
|
||||
const { message, stack } = error as Error;
|
||||
logger.log('error', `Error while retrieving selector: ${message}`);
|
||||
logger.log('error', `Stack: ${stack}`);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Returns the best and unique css {@link Selectors} for the element on the page.
|
||||
@@ -742,7 +787,6 @@ interface SelectorResult {
|
||||
export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates): Promise<SelectorResult> => {
|
||||
try {
|
||||
const selectors = await page.evaluate(({ x, y }: { x: number, y: number }) => {
|
||||
|
||||
function getNonUniqueSelector(element: HTMLElement): string {
|
||||
let selector = element.tagName.toLowerCase();
|
||||
|
||||
@@ -774,8 +818,37 @@ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates
|
||||
return path.join(' > ');
|
||||
}
|
||||
|
||||
const element = document.elementFromPoint(x, y) as HTMLElement | null;
|
||||
if (!element) return null;
|
||||
const originalEl = document.elementFromPoint(x, y) as HTMLElement;
|
||||
if (!originalEl) return null;
|
||||
|
||||
let element = originalEl;
|
||||
|
||||
if (originalEl.tagName === 'A') {
|
||||
element = originalEl;
|
||||
} else if (originalEl.parentElement?.tagName === 'A') {
|
||||
element = originalEl.parentElement;
|
||||
} else {
|
||||
while (element.parentElement) {
|
||||
const parentRect = element.parentElement.getBoundingClientRect();
|
||||
const childRect = element.getBoundingClientRect();
|
||||
|
||||
const fullyContained =
|
||||
parentRect.left <= childRect.left &&
|
||||
parentRect.right >= childRect.right &&
|
||||
parentRect.top <= childRect.top &&
|
||||
parentRect.bottom >= childRect.bottom;
|
||||
|
||||
const significantOverlap =
|
||||
(childRect.width * childRect.height) /
|
||||
(parentRect.width * parentRect.height) > 0.5;
|
||||
|
||||
if (fullyContained && significantOverlap) {
|
||||
element = element.parentElement;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const generalSelector = getSelectorPath(element);
|
||||
return {
|
||||
@@ -790,7 +863,6 @@ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
export const getChildSelectors = async (page: Page, parentSelector: string): Promise<string[]> => {
|
||||
try {
|
||||
const childSelectors = await page.evaluate((parentSelector: string) => {
|
||||
|
||||
Reference in New Issue
Block a user