Merge branch 'develop' into browser-service
This commit is contained in:
@@ -460,8 +460,9 @@ export default class Interpreter extends EventEmitter {
|
||||
for (const link of links) {
|
||||
// eslint-disable-next-line
|
||||
this.concurrency.addJob(async () => {
|
||||
let newPage = null;
|
||||
try {
|
||||
const newPage = await context.newPage();
|
||||
newPage = await context.newPage();
|
||||
await newPage.goto(link);
|
||||
await newPage.waitForLoadState('networkidle');
|
||||
await this.runLoop(newPage, this.initializedWorkflow!);
|
||||
@@ -470,6 +471,14 @@ export default class Interpreter extends EventEmitter {
|
||||
// but newPage(), goto() and waitForLoadState() don't (and will kill
|
||||
// the interpreter by throwing).
|
||||
this.log(<Error>e, Level.ERROR);
|
||||
} finally {
|
||||
if (newPage && !newPage.isClosed()) {
|
||||
try {
|
||||
await newPage.close();
|
||||
} catch (closeError) {
|
||||
this.log('Failed to close enqueued page', Level.WARN);
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -1463,41 +1472,57 @@ export default class Interpreter extends EventEmitter {
|
||||
* User-requested concurrency should be entirely managed by the concurrency manager,
|
||||
* e.g. via `enqueueLinks`.
|
||||
*/
|
||||
p.on('popup', (popup) => {
|
||||
const popupHandler = (popup) => {
|
||||
this.concurrency.addJob(() => this.runLoop(popup, workflowCopy));
|
||||
});
|
||||
};
|
||||
p.on('popup', popupHandler);
|
||||
|
||||
/* eslint no-constant-condition: ["warn", { "checkLoops": false }] */
|
||||
let loopIterations = 0;
|
||||
const MAX_LOOP_ITERATIONS = 1000; // Circuit breaker
|
||||
|
||||
// Cleanup function to remove popup listener
|
||||
const cleanup = () => {
|
||||
try {
|
||||
if (!p.isClosed()) {
|
||||
p.removeListener('popup', popupHandler);
|
||||
}
|
||||
} catch (cleanupError) {
|
||||
}
|
||||
};
|
||||
|
||||
while (true) {
|
||||
if (this.isAborted) {
|
||||
this.log('Workflow aborted during step execution', Level.WARN);
|
||||
cleanup();
|
||||
return;
|
||||
}
|
||||
|
||||
// Circuit breaker to prevent infinite loops
|
||||
if (++loopIterations > MAX_LOOP_ITERATIONS) {
|
||||
this.log('Maximum loop iterations reached, terminating to prevent infinite loop', Level.ERROR);
|
||||
cleanup();
|
||||
return;
|
||||
}
|
||||
|
||||
// Checks whether the page was closed from outside,
|
||||
// or the workflow execution has been stopped via `interpreter.stop()`
|
||||
if (p.isClosed() || !this.stopper) {
|
||||
cleanup();
|
||||
return;
|
||||
}
|
||||
|
||||
try {
|
||||
await p.waitForLoadState();
|
||||
} catch (e) {
|
||||
cleanup();
|
||||
await p.close();
|
||||
return;
|
||||
}
|
||||
|
||||
if (workflowCopy.length === 0) {
|
||||
this.log('All actions completed. Workflow finished.', Level.LOG);
|
||||
cleanup();
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1589,6 +1614,7 @@ export default class Interpreter extends EventEmitter {
|
||||
}
|
||||
} else {
|
||||
//await this.disableAdBlocker(p);
|
||||
cleanup();
|
||||
return;
|
||||
}
|
||||
}
|
||||
@@ -1681,4 +1707,44 @@ export default class Interpreter extends EventEmitter {
|
||||
throw new Error('Cannot stop, there is no running workflow!');
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Cleanup method to release resources and prevent memory leaks
|
||||
* Call this when the interpreter is no longer needed
|
||||
*/
|
||||
public async cleanup(): Promise<void> {
|
||||
try {
|
||||
// Stop any running workflows first
|
||||
if (this.stopper) {
|
||||
try {
|
||||
await this.stop();
|
||||
} catch (error: any) {
|
||||
this.log(`Error stopping workflow during cleanup: ${error.message}`, Level.WARN);
|
||||
}
|
||||
}
|
||||
|
||||
// Clear ad-blocker resources
|
||||
if (this.blocker) {
|
||||
try {
|
||||
this.blocker = null;
|
||||
this.log('Ad-blocker resources cleared', Level.DEBUG);
|
||||
} catch (error: any) {
|
||||
this.log(`Error cleaning up ad-blocker: ${error.message}`, Level.WARN);
|
||||
}
|
||||
}
|
||||
|
||||
// Clear accumulated data to free memory
|
||||
this.cumulativeResults = [];
|
||||
this.namedResults = {};
|
||||
this.serializableDataByType = { scrapeList: {}, scrapeSchema: {} };
|
||||
|
||||
// Reset state
|
||||
this.isAborted = false;
|
||||
this.initializedWorkflow = null;
|
||||
|
||||
this.log('Interpreter cleanup completed', Level.DEBUG);
|
||||
} catch (error: any) {
|
||||
this.log(`Error during interpreter cleanup: ${error.message}`, Level.ERROR);
|
||||
throw error;
|
||||
}
|
||||
}
|
||||
}
|
||||
10
package.json
10
package.json
@@ -12,8 +12,6 @@
|
||||
"@mui/material": "^5.6.2",
|
||||
"@react-oauth/google": "^0.12.1",
|
||||
"@tanstack/react-query": "^5.90.2",
|
||||
"@testing-library/react": "^13.1.1",
|
||||
"@testing-library/user-event": "^13.5.0",
|
||||
"@types/bcrypt": "^5.0.2",
|
||||
"@types/body-parser": "^1.19.5",
|
||||
"@types/csurf": "^1.11.5",
|
||||
@@ -38,14 +36,12 @@
|
||||
"dotenv": "^16.0.0",
|
||||
"express": "^4.17.2",
|
||||
"express-session": "^1.18.1",
|
||||
"fortawesome": "^0.0.1-security",
|
||||
"google-auth-library": "^9.14.1",
|
||||
"googleapis": "^144.0.0",
|
||||
"i18next": "^24.0.2",
|
||||
"i18next-browser-languagedetector": "^8.0.0",
|
||||
"i18next-http-backend": "^3.0.1",
|
||||
"idcac-playwright": "^0.1.3",
|
||||
"ioredis": "^5.4.1",
|
||||
"joi": "^17.6.0",
|
||||
"joplin-turndown-plugin-gfm": "^1.0.12",
|
||||
"jsonwebtoken": "^9.0.2",
|
||||
@@ -64,11 +60,8 @@
|
||||
"posthog-node": "^4.2.1",
|
||||
"react": "^18.0.0",
|
||||
"react-dom": "^18.0.0",
|
||||
"react-highlight": "0.15.0",
|
||||
"react-i18next": "^15.1.3",
|
||||
"react-router-dom": "^6.26.1",
|
||||
"react-simple-code-editor": "^0.11.2",
|
||||
"react-transition-group": "^4.4.2",
|
||||
"rrweb-snapshot": "^2.0.0-alpha.4",
|
||||
"sequelize": "^6.37.3",
|
||||
"sequelize-typescript": "^2.1.6",
|
||||
@@ -119,9 +112,6 @@
|
||||
"@types/node": "22.7.9",
|
||||
"@types/node-cron": "^3.0.11",
|
||||
"@types/node-fetch": "^2.6.12",
|
||||
"@types/prismjs": "^1.26.0",
|
||||
"@types/react-highlight": "^0.12.5",
|
||||
"@types/react-transition-group": "^4.4.4",
|
||||
"@types/styled-components": "^5.1.23",
|
||||
"@types/swagger-jsdoc": "^6.0.4",
|
||||
"@types/swagger-ui-express": "^4.1.6",
|
||||
|
||||
@@ -658,6 +658,16 @@ async function executeRun(id: string, userId: string, requestedFormats?: string[
|
||||
};
|
||||
}
|
||||
|
||||
browser = browserPool.getRemoteBrowser(plainRun.browserId);
|
||||
if (!browser) {
|
||||
throw new Error('Could not access browser');
|
||||
}
|
||||
|
||||
let currentPage = await browser.getCurrentPage();
|
||||
if (!currentPage) {
|
||||
throw new Error('Could not create a new page');
|
||||
}
|
||||
|
||||
if (recording.recording_meta.type === 'scrape') {
|
||||
logger.log('info', `Executing scrape robot for API run ${id}`);
|
||||
|
||||
@@ -686,13 +696,13 @@ async function executeRun(id: string, userId: string, requestedFormats?: string[
|
||||
|
||||
// Markdown conversion
|
||||
if (formats.includes('markdown')) {
|
||||
markdown = await convertPageToMarkdown(url);
|
||||
markdown = await convertPageToMarkdown(url, currentPage);
|
||||
serializableOutput.markdown = [{ content: markdown }];
|
||||
}
|
||||
|
||||
// HTML conversion
|
||||
if (formats.includes('html')) {
|
||||
html = await convertPageToHTML(url);
|
||||
html = await convertPageToHTML(url, currentPage);
|
||||
serializableOutput.html = [{ content: html }];
|
||||
}
|
||||
|
||||
@@ -820,16 +830,6 @@ async function executeRun(id: string, userId: string, requestedFormats?: string[
|
||||
|
||||
plainRun.status = 'running';
|
||||
|
||||
browser = browserPool.getRemoteBrowser(plainRun.browserId);
|
||||
if (!browser) {
|
||||
throw new Error('Could not access browser');
|
||||
}
|
||||
|
||||
let currentPage = await browser.getCurrentPage();
|
||||
if (!currentPage) {
|
||||
throw new Error('Could not create a new page');
|
||||
}
|
||||
|
||||
const workflow = AddGeneratedFlags(recording.recording);
|
||||
|
||||
browser.interpreter.setRunId(plainRun.runId);
|
||||
|
||||
@@ -1,9 +1,27 @@
|
||||
import { connectToRemoteBrowser } from "../browser-management/browserConnection";
|
||||
import { parseMarkdown } from "./markdown";
|
||||
import logger from "../logger";
|
||||
|
||||
async function gotoWithFallback(page: any, url: string) {
|
||||
try {
|
||||
return await page.goto(url, {
|
||||
waitUntil: "networkidle",
|
||||
timeout: 100000,
|
||||
});
|
||||
} catch (err) {
|
||||
// fallback: JS-heavy or unstable sites
|
||||
return await page.goto(url, {
|
||||
waitUntil: "domcontentloaded",
|
||||
timeout: 100000,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetches a webpage, strips scripts/styles/images/etc,
|
||||
* returns clean Markdown using parser.
|
||||
* @param url - The URL to convert
|
||||
* @param existingPage - Optional existing Playwright page instance to reuse
|
||||
*/
|
||||
export async function convertPageToMarkdown(url: string): Promise<string> {
|
||||
const browser = await connectToRemoteBrowser();
|
||||
@@ -11,7 +29,7 @@ export async function convertPageToMarkdown(url: string): Promise<string> {
|
||||
|
||||
await page.goto(url, { waitUntil: "networkidle", timeout: 100000 });
|
||||
|
||||
await page.addInitScript(() => {
|
||||
const cleanedHtml = await page.evaluate(() => {
|
||||
const selectors = [
|
||||
"script",
|
||||
"style",
|
||||
@@ -42,14 +60,16 @@ export async function convertPageToMarkdown(url: string): Promise<string> {
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
// Re-extract HTML after cleanup
|
||||
const cleanedHtml = await page.evaluate(() => {
|
||||
return document.documentElement.outerHTML;
|
||||
});
|
||||
|
||||
await browser.close();
|
||||
if (shouldCloseBrowser && browser) {
|
||||
logger.log('info', `[Scrape] Closing browser instance created for markdown conversion`);
|
||||
await browser.close();
|
||||
} else {
|
||||
logger.log('info', `[Scrape] Keeping existing browser instance open after markdown conversion`);
|
||||
}
|
||||
|
||||
// Convert cleaned HTML → Markdown
|
||||
const markdown = await parseMarkdown(cleanedHtml, url);
|
||||
@@ -59,6 +79,8 @@ export async function convertPageToMarkdown(url: string): Promise<string> {
|
||||
/**
|
||||
* Fetches a webpage, strips scripts/styles/images/etc,
|
||||
* returns clean HTML.
|
||||
* @param url - The URL to convert
|
||||
* @param existingPage - Optional existing Playwright page instance to reuse
|
||||
*/
|
||||
export async function convertPageToHTML(url: string): Promise<string> {
|
||||
const browser = await connectToRemoteBrowser();
|
||||
@@ -66,7 +88,7 @@ export async function convertPageToHTML(url: string): Promise<string> {
|
||||
|
||||
await page.goto(url, { waitUntil: "networkidle", timeout: 100000 });
|
||||
|
||||
await page.addInitScript(() => {
|
||||
const cleanedHtml = await page.evaluate(() => {
|
||||
const selectors = [
|
||||
"script",
|
||||
"style",
|
||||
@@ -97,14 +119,16 @@ export async function convertPageToHTML(url: string): Promise<string> {
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
// Re-extract HTML after cleanup
|
||||
const cleanedHtml = await page.evaluate(() => {
|
||||
return document.documentElement.outerHTML;
|
||||
});
|
||||
|
||||
await browser.close();
|
||||
if (shouldCloseBrowser && browser) {
|
||||
logger.log('info', `[Scrape] Closing browser instance created for HTML conversion`);
|
||||
await browser.close();
|
||||
} else {
|
||||
logger.log('info', `[Scrape] Keeping existing browser instance open after HTML conversion`);
|
||||
}
|
||||
|
||||
// Return cleaned HTML directly
|
||||
return cleanedHtml;
|
||||
|
||||
@@ -181,7 +181,7 @@ async function processRunExecution(job: Job<ExecuteRunData>) {
|
||||
|
||||
logger.log('info', `Browser ${browserId} found and ready for execution`);
|
||||
|
||||
try {
|
||||
try {
|
||||
// Find the recording
|
||||
const recording = await Robot.findOne({ where: { 'recording_meta.id': plainRun.robotMetaId }, raw: true });
|
||||
|
||||
@@ -189,6 +189,30 @@ async function processRunExecution(job: Job<ExecuteRunData>) {
|
||||
throw new Error(`Recording for run ${data.runId} not found`);
|
||||
}
|
||||
|
||||
let currentPage = browser.getCurrentPage();
|
||||
|
||||
const pageWaitStart = Date.now();
|
||||
let lastPageLogTime = 0;
|
||||
let pageAttempts = 0;
|
||||
const MAX_PAGE_ATTEMPTS = 15;
|
||||
|
||||
while (!currentPage && (Date.now() - pageWaitStart) < BROWSER_PAGE_TIMEOUT && pageAttempts < MAX_PAGE_ATTEMPTS) {
|
||||
const currentTime = Date.now();
|
||||
pageAttempts++;
|
||||
|
||||
if (currentTime - lastPageLogTime > 5000) {
|
||||
logger.log('info', `Page not ready for browser ${browserId}, waiting... (${Math.round((currentTime - pageWaitStart) / 1000)}s elapsed)`);
|
||||
lastPageLogTime = currentTime;
|
||||
}
|
||||
|
||||
await new Promise(resolve => setTimeout(resolve, 1000));
|
||||
currentPage = browser.getCurrentPage();
|
||||
}
|
||||
|
||||
if (!currentPage) {
|
||||
throw new Error(`No current page available for browser ${browserId} after ${BROWSER_PAGE_TIMEOUT/1000}s timeout`);
|
||||
}
|
||||
|
||||
if (recording.recording_meta.type === 'scrape') {
|
||||
logger.log('info', `Executing scrape robot for run ${data.runId}`);
|
||||
|
||||
@@ -212,13 +236,13 @@ async function processRunExecution(job: Job<ExecuteRunData>) {
|
||||
|
||||
// Markdown conversion
|
||||
if (formats.includes('markdown')) {
|
||||
markdown = await convertPageToMarkdown(url);
|
||||
markdown = await convertPageToMarkdown(url, currentPage);
|
||||
serializableOutput.markdown = [{ content: markdown }];
|
||||
}
|
||||
|
||||
// HTML conversion
|
||||
if (formats.includes('html')) {
|
||||
html = await convertPageToHTML(url);
|
||||
html = await convertPageToHTML(url, currentPage);
|
||||
serializableOutput.html = [{ content: html }];
|
||||
}
|
||||
|
||||
@@ -328,30 +352,6 @@ async function processRunExecution(job: Job<ExecuteRunData>) {
|
||||
}
|
||||
};
|
||||
|
||||
let currentPage = browser.getCurrentPage();
|
||||
|
||||
const pageWaitStart = Date.now();
|
||||
let lastPageLogTime = 0;
|
||||
let pageAttempts = 0;
|
||||
const MAX_PAGE_ATTEMPTS = 15;
|
||||
|
||||
while (!currentPage && (Date.now() - pageWaitStart) < BROWSER_PAGE_TIMEOUT && pageAttempts < MAX_PAGE_ATTEMPTS) {
|
||||
const currentTime = Date.now();
|
||||
pageAttempts++;
|
||||
|
||||
if (currentTime - lastPageLogTime > 5000) {
|
||||
logger.log('info', `Page not ready for browser ${browserId}, waiting... (${Math.round((currentTime - pageWaitStart) / 1000)}s elapsed)`);
|
||||
lastPageLogTime = currentTime;
|
||||
}
|
||||
|
||||
await new Promise(resolve => setTimeout(resolve, 1000));
|
||||
currentPage = browser.getCurrentPage();
|
||||
}
|
||||
|
||||
if (!currentPage) {
|
||||
throw new Error(`No current page available for browser ${browserId} after ${BROWSER_PAGE_TIMEOUT/1000}s timeout`);
|
||||
}
|
||||
|
||||
logger.log('info', `Starting workflow execution for run ${data.runId}`);
|
||||
|
||||
await run.update({
|
||||
|
||||
@@ -205,6 +205,16 @@ async function executeRun(id: string, userId: string) {
|
||||
}
|
||||
}
|
||||
|
||||
browser = browserPool.getRemoteBrowser(plainRun.browserId);
|
||||
if (!browser) {
|
||||
throw new Error('Could not access browser');
|
||||
}
|
||||
|
||||
let currentPage = await browser.getCurrentPage();
|
||||
if (!currentPage) {
|
||||
throw new Error('Could not create a new page');
|
||||
}
|
||||
|
||||
if (recording.recording_meta.type === 'scrape') {
|
||||
logger.log('info', `Executing scrape robot for scheduled run ${id}`);
|
||||
|
||||
@@ -249,13 +259,13 @@ async function executeRun(id: string, userId: string) {
|
||||
|
||||
// Markdown conversion
|
||||
if (formats.includes('markdown')) {
|
||||
markdown = await convertPageToMarkdown(url);
|
||||
markdown = await convertPageToMarkdown(url, currentPage);
|
||||
serializableOutput.markdown = [{ content: markdown }];
|
||||
}
|
||||
|
||||
// HTML conversion
|
||||
if (formats.includes('html')) {
|
||||
html = await convertPageToHTML(url);
|
||||
html = await convertPageToHTML(url, currentPage);
|
||||
serializableOutput.html = [{ content: html }];
|
||||
}
|
||||
|
||||
@@ -388,16 +398,6 @@ async function executeRun(id: string, userId: string) {
|
||||
logger.log('warn', `Failed to send run-started notification for run ${plainRun.runId}: ${socketError.message}`);
|
||||
}
|
||||
|
||||
browser = browserPool.getRemoteBrowser(plainRun.browserId);
|
||||
if (!browser) {
|
||||
throw new Error('Could not access browser');
|
||||
}
|
||||
|
||||
let currentPage = await browser.getCurrentPage();
|
||||
if (!currentPage) {
|
||||
throw new Error('Could not create a new page');
|
||||
}
|
||||
|
||||
const workflow = AddGeneratedFlags(recording.recording);
|
||||
|
||||
// Set run ID for real-time data persistence
|
||||
|
||||
@@ -304,8 +304,6 @@ export const BrowserWindow = () => {
|
||||
|
||||
const createFieldsFromChildSelectors = useCallback(
|
||||
(childSelectors: string[], listSelector: string) => {
|
||||
if (!childSelectors.length || !currentSnapshot) return {};
|
||||
|
||||
const iframeElement = document.querySelector(
|
||||
"#dom-browser-iframe"
|
||||
) as HTMLIFrameElement;
|
||||
@@ -323,7 +321,6 @@ export const BrowserWindow = () => {
|
||||
|
||||
const uniqueChildSelectors = [...new Set(childSelectors)];
|
||||
|
||||
// Filter child selectors that occur in at least 2 out of first 10 list elements
|
||||
const validateChildSelectors = (selectors: string[]): string[] => {
|
||||
try {
|
||||
// Get first 10 list elements
|
||||
@@ -352,13 +349,10 @@ export const BrowserWindow = () => {
|
||||
|
||||
// If we can't access the element, it's likely in shadow DOM - include it
|
||||
if (!testElement) {
|
||||
console.log(`Including potentially shadow DOM selector: ${selector}`);
|
||||
validSelectors.push(selector);
|
||||
continue;
|
||||
}
|
||||
} catch (accessError) {
|
||||
// If there's an error accessing, assume shadow DOM and include it
|
||||
console.log(`Including selector due to access error: ${selector}`);
|
||||
validSelectors.push(selector);
|
||||
continue;
|
||||
}
|
||||
@@ -395,7 +389,6 @@ export const BrowserWindow = () => {
|
||||
}
|
||||
};
|
||||
|
||||
// Enhanced XPath evaluation for multiple elements
|
||||
const evaluateXPathAllWithShadowSupport = (
|
||||
document: Document,
|
||||
xpath: string,
|
||||
@@ -423,8 +416,6 @@ export const BrowserWindow = () => {
|
||||
return elements;
|
||||
}
|
||||
|
||||
// If shadow DOM is indicated and regular XPath fails, use shadow DOM traversal
|
||||
// This is a simplified version - for multiple elements, we'll primarily rely on regular XPath
|
||||
return elements;
|
||||
} catch (err) {
|
||||
console.error("XPath evaluation failed:", xpath, err);
|
||||
@@ -432,7 +423,9 @@ export const BrowserWindow = () => {
|
||||
}
|
||||
};
|
||||
|
||||
const validatedChildSelectors = validateChildSelectors(uniqueChildSelectors);
|
||||
const isValidData = (text: string | null | undefined): boolean => {
|
||||
return !!text && text.trim().length > 0;
|
||||
};
|
||||
|
||||
const isElementVisible = (element: HTMLElement): boolean => {
|
||||
try {
|
||||
@@ -443,443 +436,119 @@ export const BrowserWindow = () => {
|
||||
}
|
||||
};
|
||||
|
||||
const isValidData = (data: string): boolean => {
|
||||
if (!data || data.trim().length === 0) return false;
|
||||
const createFieldData = (element: HTMLElement, selector: string, forceAttribute?: string) => {
|
||||
const tagName = element.tagName.toLowerCase();
|
||||
let data = '';
|
||||
let attribute = forceAttribute || 'innerText';
|
||||
|
||||
const trimmed = data.trim();
|
||||
|
||||
// Filter out single letters
|
||||
if (trimmed.length === 1) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Filter out pure symbols/punctuation
|
||||
if (trimmed.length < 3 && /^[^\w\s]+$/.test(trimmed)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Filter out whitespace and punctuation only
|
||||
if (/^[\s\p{P}\p{S}]*$/u.test(trimmed)) return false;
|
||||
|
||||
return trimmed.length > 0;
|
||||
};
|
||||
|
||||
// Enhanced shadow DOM-aware element evaluation
|
||||
const evaluateXPathWithShadowSupport = (
|
||||
document: Document,
|
||||
xpath: string,
|
||||
isShadow: boolean = false
|
||||
): Element | null => {
|
||||
try {
|
||||
// First try regular XPath evaluation
|
||||
const result = document.evaluate(
|
||||
xpath,
|
||||
document,
|
||||
null,
|
||||
XPathResult.FIRST_ORDERED_NODE_TYPE,
|
||||
null
|
||||
).singleNodeValue as Element | null;
|
||||
|
||||
if (!isShadow || result) {
|
||||
return result;
|
||||
if (forceAttribute) {
|
||||
if (forceAttribute === 'href') {
|
||||
data = element.getAttribute('href') || '';
|
||||
} else if (forceAttribute === 'innerText') {
|
||||
data = (element.textContent || '').trim();
|
||||
}
|
||||
|
||||
// If shadow DOM is indicated and regular XPath fails, use shadow DOM traversal
|
||||
let cleanPath = xpath;
|
||||
let isIndexed = false;
|
||||
|
||||
const indexedMatch = xpath.match(/^\((.*?)\)\[(\d+)\](.*)$/);
|
||||
if (indexedMatch) {
|
||||
cleanPath = indexedMatch[1] + indexedMatch[3];
|
||||
isIndexed = true;
|
||||
} else if (tagName === 'img') {
|
||||
data = element.getAttribute('src') || '';
|
||||
attribute = 'src';
|
||||
} else if (tagName === 'a') {
|
||||
const href = element.getAttribute('href') || '';
|
||||
const text = (element.textContent || '').trim();
|
||||
if (href && href !== '#' && !href.startsWith('javascript:')) {
|
||||
data = href;
|
||||
attribute = 'href';
|
||||
} else if (text) {
|
||||
data = text;
|
||||
attribute = 'innerText';
|
||||
}
|
||||
|
||||
const pathParts = cleanPath
|
||||
.replace(/^\/\//, "")
|
||||
.split("/")
|
||||
.map((p) => p.trim())
|
||||
.filter((p) => p.length > 0);
|
||||
|
||||
let currentContexts: (Document | Element | ShadowRoot)[] = [document];
|
||||
|
||||
for (let i = 0; i < pathParts.length; i++) {
|
||||
const part = pathParts[i];
|
||||
const nextContexts: (Element | ShadowRoot)[] = [];
|
||||
|
||||
for (const ctx of currentContexts) {
|
||||
const positionalMatch = part.match(/^([^[]+)\[(\d+)\]$/);
|
||||
let partWithoutPosition = part;
|
||||
let requestedPosition: number | null = null;
|
||||
|
||||
if (positionalMatch) {
|
||||
partWithoutPosition = positionalMatch[1];
|
||||
requestedPosition = parseInt(positionalMatch[2]);
|
||||
}
|
||||
|
||||
const matched = queryInsideContext(ctx, partWithoutPosition);
|
||||
|
||||
let elementsToAdd = matched;
|
||||
if (requestedPosition !== null) {
|
||||
const index = requestedPosition - 1;
|
||||
if (index >= 0 && index < matched.length) {
|
||||
elementsToAdd = [matched[index]];
|
||||
} else {
|
||||
elementsToAdd = [];
|
||||
}
|
||||
}
|
||||
|
||||
elementsToAdd.forEach((el) => {
|
||||
nextContexts.push(el);
|
||||
if (el.shadowRoot) {
|
||||
nextContexts.push(el.shadowRoot);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
if (nextContexts.length === 0) {
|
||||
return null;
|
||||
}
|
||||
|
||||
currentContexts = nextContexts;
|
||||
}
|
||||
|
||||
if (currentContexts.length > 0) {
|
||||
if (isIndexed && indexedMatch) {
|
||||
const requestedIndex = parseInt(indexedMatch[2]) - 1;
|
||||
if (requestedIndex >= 0 && requestedIndex < currentContexts.length) {
|
||||
return currentContexts[requestedIndex] as Element;
|
||||
} else {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
return currentContexts[0] as Element;
|
||||
}
|
||||
|
||||
return null;
|
||||
} catch (err) {
|
||||
console.error("XPath evaluation failed:", xpath, err);
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
const queryInsideContext = (
|
||||
context: Document | Element | ShadowRoot,
|
||||
part: string
|
||||
): Element[] => {
|
||||
try {
|
||||
const { tagName, conditions } = parseXPathPart(part);
|
||||
|
||||
const candidateElements = Array.from(context.querySelectorAll(tagName));
|
||||
if (candidateElements.length === 0) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const matchingElements = candidateElements.filter((el) => {
|
||||
return elementMatchesConditions(el, conditions);
|
||||
});
|
||||
|
||||
return matchingElements;
|
||||
} catch (err) {
|
||||
console.error("Error in queryInsideContext:", err);
|
||||
return [];
|
||||
}
|
||||
};
|
||||
|
||||
const parseXPathPart = (
|
||||
part: string
|
||||
): { tagName: string; conditions: string[] } => {
|
||||
const tagMatch = part.match(/^([a-zA-Z0-9-]+)/);
|
||||
const tagName = tagMatch ? tagMatch[1] : "*";
|
||||
|
||||
const conditionMatches = part.match(/\[([^\]]+)\]/g);
|
||||
const conditions = conditionMatches
|
||||
? conditionMatches.map((c) => c.slice(1, -1))
|
||||
: [];
|
||||
|
||||
return { tagName, conditions };
|
||||
};
|
||||
|
||||
const elementMatchesConditions = (
|
||||
element: Element,
|
||||
conditions: string[]
|
||||
): boolean => {
|
||||
for (const condition of conditions) {
|
||||
if (!elementMatchesCondition(element, condition)) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
};
|
||||
|
||||
const elementMatchesCondition = (
|
||||
element: Element,
|
||||
condition: string
|
||||
): boolean => {
|
||||
condition = condition.trim();
|
||||
|
||||
if (/^\d+$/.test(condition)) {
|
||||
return true;
|
||||
} else {
|
||||
data = (element.textContent || '').trim();
|
||||
attribute = 'innerText';
|
||||
}
|
||||
|
||||
// Handle @attribute="value"
|
||||
const attrMatch = condition.match(/^@([^=]+)=["']([^"']+)["']$/);
|
||||
if (attrMatch) {
|
||||
const [, attr, value] = attrMatch;
|
||||
const elementValue = element.getAttribute(attr);
|
||||
return elementValue === value;
|
||||
}
|
||||
if (!data) return null;
|
||||
|
||||
// Handle contains(@class, 'value')
|
||||
const classContainsMatch = condition.match(
|
||||
/^contains\(@class,\s*["']([^"']+)["']\)$/
|
||||
);
|
||||
if (classContainsMatch) {
|
||||
const className = classContainsMatch[1];
|
||||
return element.classList.contains(className);
|
||||
}
|
||||
|
||||
// Handle contains(@attribute, 'value')
|
||||
const attrContainsMatch = condition.match(
|
||||
/^contains\(@([^,]+),\s*["']([^"']+)["']\)$/
|
||||
);
|
||||
if (attrContainsMatch) {
|
||||
const [, attr, value] = attrContainsMatch;
|
||||
const elementValue = element.getAttribute(attr) || "";
|
||||
return elementValue.includes(value);
|
||||
}
|
||||
|
||||
// Handle text()="value"
|
||||
const textMatch = condition.match(/^text\(\)=["']([^"']+)["']$/);
|
||||
if (textMatch) {
|
||||
const expectedText = textMatch[1];
|
||||
const elementText = element.textContent?.trim() || "";
|
||||
return elementText === expectedText;
|
||||
}
|
||||
|
||||
// Handle contains(text(), 'value')
|
||||
const textContainsMatch = condition.match(
|
||||
/^contains\(text\(\),\s*["']([^"']+)["']\)$/
|
||||
);
|
||||
if (textContainsMatch) {
|
||||
const expectedText = textContainsMatch[1];
|
||||
const elementText = element.textContent?.trim() || "";
|
||||
return elementText.includes(expectedText);
|
||||
}
|
||||
|
||||
// Handle count(*)=0 (element has no children)
|
||||
if (condition === "count(*)=0") {
|
||||
return element.children.length === 0;
|
||||
}
|
||||
|
||||
// Handle other count conditions
|
||||
const countMatch = condition.match(/^count\(\*\)=(\d+)$/);
|
||||
if (countMatch) {
|
||||
const expectedCount = parseInt(countMatch[1]);
|
||||
return element.children.length === expectedCount;
|
||||
}
|
||||
|
||||
return true;
|
||||
};
|
||||
|
||||
// Enhanced value extraction with shadow DOM support
|
||||
const extractValueWithShadowSupport = (
|
||||
element: Element,
|
||||
attribute: string
|
||||
): string | null => {
|
||||
if (!element) return null;
|
||||
|
||||
const baseURL =
|
||||
element.ownerDocument?.location?.href || window.location.origin;
|
||||
|
||||
// Check shadow DOM content first
|
||||
if (element.shadowRoot) {
|
||||
const shadowContent = element.shadowRoot.textContent;
|
||||
if (shadowContent?.trim()) {
|
||||
return shadowContent.trim();
|
||||
return {
|
||||
data,
|
||||
selectorObj: {
|
||||
selector,
|
||||
attribute,
|
||||
tag: tagName.toUpperCase(),
|
||||
isShadow: element.getRootNode() instanceof ShadowRoot
|
||||
}
|
||||
}
|
||||
|
||||
if (attribute === "innerText") {
|
||||
let textContent =
|
||||
(element as HTMLElement).innerText?.trim() ||
|
||||
(element as HTMLElement).textContent?.trim();
|
||||
|
||||
if (!textContent) {
|
||||
const dataAttributes = [
|
||||
"data-600",
|
||||
"data-text",
|
||||
"data-label",
|
||||
"data-value",
|
||||
"data-content",
|
||||
];
|
||||
for (const attr of dataAttributes) {
|
||||
const dataValue = element.getAttribute(attr);
|
||||
if (dataValue && dataValue.trim()) {
|
||||
textContent = dataValue.trim();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return textContent || null;
|
||||
} else if (attribute === "innerHTML") {
|
||||
return element.innerHTML?.trim() || null;
|
||||
} else if (attribute === "href") {
|
||||
let anchorElement = element;
|
||||
|
||||
if (element.tagName !== "A") {
|
||||
anchorElement =
|
||||
element.closest("a") ||
|
||||
element.parentElement?.closest("a") ||
|
||||
element;
|
||||
}
|
||||
|
||||
const hrefValue = anchorElement.getAttribute("href");
|
||||
if (!hrefValue || hrefValue.trim() === "") {
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
return new URL(hrefValue, baseURL).href;
|
||||
} catch (e) {
|
||||
console.warn("Error creating URL from", hrefValue, e);
|
||||
return hrefValue;
|
||||
}
|
||||
} else if (attribute === "src") {
|
||||
const attrValue = element.getAttribute(attribute);
|
||||
const dataAttr = attrValue || element.getAttribute("data-" + attribute);
|
||||
|
||||
if (!dataAttr || dataAttr.trim() === "") {
|
||||
const style = window.getComputedStyle(element as HTMLElement);
|
||||
const bgImage = style.backgroundImage;
|
||||
if (bgImage && bgImage !== "none") {
|
||||
const matches = bgImage.match(/url\(['"]?([^'")]+)['"]?\)/);
|
||||
return matches ? new URL(matches[1], baseURL).href : null;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
try {
|
||||
return new URL(dataAttr, baseURL).href;
|
||||
} catch (e) {
|
||||
console.warn("Error creating URL from", dataAttr, e);
|
||||
return dataAttr;
|
||||
}
|
||||
}
|
||||
return element.getAttribute(attribute);
|
||||
};
|
||||
|
||||
// Simple deepest child finder - limit depth to prevent hanging
|
||||
const findDeepestChild = (element: HTMLElement): HTMLElement => {
|
||||
let deepest = element;
|
||||
let maxDepth = 0;
|
||||
|
||||
const traverse = (el: HTMLElement, depth: number) => {
|
||||
if (depth > 3) return;
|
||||
|
||||
const text = el.textContent?.trim() || "";
|
||||
if (isValidData(text) && depth > maxDepth) {
|
||||
maxDepth = depth;
|
||||
deepest = el;
|
||||
}
|
||||
|
||||
const children = Array.from(el.children).slice(0, 3);
|
||||
children.forEach((child) => {
|
||||
if (child instanceof HTMLElement) {
|
||||
traverse(child, depth + 1);
|
||||
}
|
||||
});
|
||||
};
|
||||
|
||||
traverse(element, 0);
|
||||
return deepest;
|
||||
};
|
||||
|
||||
validatedChildSelectors.forEach((childSelector, index) => {
|
||||
const validatedChildSelectors = validateChildSelectors(uniqueChildSelectors);
|
||||
|
||||
validatedChildSelectors.forEach((selector, index) => {
|
||||
try {
|
||||
// Detect if this selector should use shadow DOM traversal
|
||||
const isShadowSelector = childSelector.includes('>>') ||
|
||||
childSelector.startsWith('//') &&
|
||||
(listSelector.includes('>>') || currentSnapshot?.snapshot);
|
||||
|
||||
const element = evaluateXPathWithShadowSupport(
|
||||
const elements = evaluateXPathAllWithShadowSupport(
|
||||
iframeElement.contentDocument!,
|
||||
childSelector,
|
||||
isShadowSelector
|
||||
) as HTMLElement;
|
||||
selector,
|
||||
selector.includes(">>") || selector.startsWith("//")
|
||||
);
|
||||
|
||||
if (element && isElementVisible(element)) {
|
||||
if (elements.length === 0) return;
|
||||
|
||||
const element = elements[0] as HTMLElement;
|
||||
const tagName = element.tagName.toLowerCase();
|
||||
const isShadow = element.getRootNode() instanceof ShadowRoot;
|
||||
|
||||
if (isElementVisible(element)) {
|
||||
const rect = element.getBoundingClientRect();
|
||||
const position = { x: rect.left, y: rect.top };
|
||||
|
||||
const tagName = element.tagName.toLowerCase();
|
||||
const isShadow = element.getRootNode() instanceof ShadowRoot;
|
||||
|
||||
if (tagName === "a") {
|
||||
const anchor = element as HTMLAnchorElement;
|
||||
const href = extractValueWithShadowSupport(anchor, "href");
|
||||
const text = extractValueWithShadowSupport(anchor, "innerText");
|
||||
|
||||
if (
|
||||
href &&
|
||||
href.trim() !== "" &&
|
||||
href !== window.location.href &&
|
||||
!href.startsWith("javascript:") &&
|
||||
!href.startsWith("#")
|
||||
) {
|
||||
const fieldIdHref = Date.now() + index * 1000;
|
||||
|
||||
candidateFields.push({
|
||||
id: fieldIdHref,
|
||||
element: element,
|
||||
isLeaf: true,
|
||||
depth: 0,
|
||||
position: position,
|
||||
field: {
|
||||
id: fieldIdHref,
|
||||
type: "text",
|
||||
label: `Label ${index * 2 + 1}`,
|
||||
data: href,
|
||||
selectorObj: {
|
||||
selector: childSelector,
|
||||
tag: element.tagName,
|
||||
isShadow: isShadow,
|
||||
attribute: "href",
|
||||
},
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
const fieldIdText = Date.now() + index * 1000 + 1;
|
||||
if (tagName === 'a') {
|
||||
const href = element.getAttribute('href');
|
||||
const text = (element.textContent || '').trim();
|
||||
|
||||
if (text && isValidData(text)) {
|
||||
candidateFields.push({
|
||||
id: fieldIdText,
|
||||
element: element,
|
||||
isLeaf: true,
|
||||
depth: 0,
|
||||
position: position,
|
||||
field: {
|
||||
id: fieldIdText,
|
||||
type: "text",
|
||||
label: `Label ${index * 2 + 2}`,
|
||||
data: text,
|
||||
selectorObj: {
|
||||
selector: childSelector,
|
||||
tag: element.tagName,
|
||||
isShadow: isShadow,
|
||||
attribute: "innerText",
|
||||
},
|
||||
},
|
||||
});
|
||||
const textField = createFieldData(element, selector, 'innerText');
|
||||
if (textField && textField.data) {
|
||||
const fieldId = Date.now() + index * 1000;
|
||||
|
||||
candidateFields.push({
|
||||
id: fieldId,
|
||||
element: element,
|
||||
isLeaf: true,
|
||||
depth: 0,
|
||||
position: position,
|
||||
field: {
|
||||
id: fieldId,
|
||||
type: "text",
|
||||
label: `Label ${index * 2 + 1}`,
|
||||
data: textField.data,
|
||||
selectorObj: textField.selectorObj
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
if (href && href !== '#' && !href.startsWith('javascript:')) {
|
||||
const hrefField = createFieldData(element, selector, 'href');
|
||||
if (hrefField && hrefField.data) {
|
||||
const fieldId = Date.now() + index * 1000 + 1;
|
||||
|
||||
candidateFields.push({
|
||||
id: fieldId,
|
||||
element: element,
|
||||
isLeaf: true,
|
||||
depth: 0,
|
||||
position: position,
|
||||
field: {
|
||||
id: fieldId,
|
||||
type: "text",
|
||||
label: `Label ${index * 2 + 2}`,
|
||||
data: hrefField.data,
|
||||
selectorObj: hrefField.selectorObj
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
} else if (tagName === "img") {
|
||||
const img = element as HTMLImageElement;
|
||||
const src = extractValueWithShadowSupport(img, "src");
|
||||
const alt = extractValueWithShadowSupport(img, "alt");
|
||||
const src = element.getAttribute("src");
|
||||
|
||||
if (src && !src.startsWith("data:") && src.length > 10) {
|
||||
if (src && isValidData(src)) {
|
||||
const fieldId = Date.now() + index * 1000;
|
||||
|
||||
candidateFields.push({
|
||||
@@ -894,7 +563,7 @@ export const BrowserWindow = () => {
|
||||
label: `Label ${index + 1}`,
|
||||
data: src,
|
||||
selectorObj: {
|
||||
selector: childSelector,
|
||||
selector: selector,
|
||||
tag: element.tagName,
|
||||
isShadow: isShadow,
|
||||
attribute: "src",
|
||||
@@ -902,9 +571,11 @@ export const BrowserWindow = () => {
|
||||
},
|
||||
});
|
||||
}
|
||||
} else {
|
||||
const fieldData = createFieldData(element, selector);
|
||||
|
||||
if (alt && isValidData(alt)) {
|
||||
const fieldId = Date.now() + index * 1000 + 1;
|
||||
if (fieldData && fieldData.data && isValidData(fieldData.data)) {
|
||||
const fieldId = Date.now() + index * 1000;
|
||||
|
||||
candidateFields.push({
|
||||
id: fieldId,
|
||||
@@ -912,127 +583,39 @@ export const BrowserWindow = () => {
|
||||
isLeaf: true,
|
||||
depth: 0,
|
||||
position: position,
|
||||
field: {
|
||||
id: fieldId,
|
||||
type: "text",
|
||||
label: `Label ${index + 2}`,
|
||||
data: alt,
|
||||
selectorObj: {
|
||||
selector: childSelector,
|
||||
tag: element.tagName,
|
||||
isShadow: isShadow,
|
||||
attribute: "alt",
|
||||
},
|
||||
},
|
||||
});
|
||||
}
|
||||
} else {
|
||||
const deepestElement = findDeepestChild(element);
|
||||
const data = extractValueWithShadowSupport(deepestElement, "innerText");
|
||||
|
||||
if (data && isValidData(data)) {
|
||||
const isLeaf = isLeafElement(deepestElement);
|
||||
const depth = getElementDepthFromList(
|
||||
deepestElement,
|
||||
listSelector,
|
||||
iframeElement.contentDocument!
|
||||
);
|
||||
|
||||
const fieldId = Date.now() + index;
|
||||
|
||||
candidateFields.push({
|
||||
id: fieldId,
|
||||
element: deepestElement,
|
||||
isLeaf: isLeaf,
|
||||
depth: depth,
|
||||
position: position,
|
||||
field: {
|
||||
id: fieldId,
|
||||
type: "text",
|
||||
label: `Label ${index + 1}`,
|
||||
data: data,
|
||||
selectorObj: {
|
||||
selector: childSelector,
|
||||
tag: deepestElement.tagName,
|
||||
isShadow: deepestElement.getRootNode() instanceof ShadowRoot,
|
||||
attribute: "innerText",
|
||||
},
|
||||
},
|
||||
data: fieldData.data,
|
||||
selectorObj: fieldData.selectorObj
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch (error) {
|
||||
console.warn(
|
||||
`Failed to process child selector ${childSelector}:`,
|
||||
error
|
||||
);
|
||||
console.warn(`Failed to process child selector ${selector}:`, error);
|
||||
}
|
||||
});
|
||||
|
||||
candidateFields.sort((a, b) => {
|
||||
const yDiff = a.position.y - b.position.y;
|
||||
|
||||
|
||||
if (Math.abs(yDiff) <= 5) {
|
||||
return a.position.x - b.position.x;
|
||||
}
|
||||
|
||||
|
||||
return yDiff;
|
||||
});
|
||||
|
||||
const filteredCandidates = removeParentChildDuplicates(candidateFields);
|
||||
|
||||
const finalFields = removeDuplicateContent(filteredCandidates);
|
||||
return finalFields;
|
||||
},
|
||||
[currentSnapshot]
|
||||
);
|
||||
|
||||
const isLeafElement = (element: HTMLElement): boolean => {
|
||||
const children = Array.from(element.children) as HTMLElement[];
|
||||
|
||||
if (children.length === 0) return true;
|
||||
|
||||
const hasContentfulChildren = children.some((child) => {
|
||||
const text = child.textContent?.trim() || "";
|
||||
return text.length > 0 && text !== element.textContent?.trim();
|
||||
});
|
||||
|
||||
return !hasContentfulChildren;
|
||||
};
|
||||
|
||||
const getElementDepthFromList = (
|
||||
element: HTMLElement,
|
||||
listSelector: string,
|
||||
document: Document
|
||||
): number => {
|
||||
try {
|
||||
const listResult = document.evaluate(
|
||||
listSelector,
|
||||
document,
|
||||
null,
|
||||
XPathResult.FIRST_ORDERED_NODE_TYPE,
|
||||
null
|
||||
);
|
||||
|
||||
const listElement = listResult.singleNodeValue as HTMLElement;
|
||||
if (!listElement) return 0;
|
||||
|
||||
let depth = 0;
|
||||
let current = element;
|
||||
|
||||
while (current && current !== listElement && current.parentElement) {
|
||||
depth++;
|
||||
current = current.parentElement;
|
||||
if (depth > 20) break;
|
||||
}
|
||||
|
||||
return current === listElement ? depth : 0;
|
||||
} catch (error) {
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
const removeParentChildDuplicates = (
|
||||
candidates: Array<{
|
||||
id: number;
|
||||
@@ -1242,6 +825,29 @@ export const BrowserWindow = () => {
|
||||
}
|
||||
}, [browserSteps, getList, listSelector, initialAutoFieldIds, currentListActionId, manuallyAddedFieldIds]);
|
||||
|
||||
useEffect(() => {
|
||||
if (currentListActionId && browserSteps.length > 0) {
|
||||
const activeStep = browserSteps.find(
|
||||
s => s.type === 'list' && s.actionId === currentListActionId
|
||||
) as ListStep | undefined;
|
||||
|
||||
if (activeStep) {
|
||||
if (currentListId !== activeStep.id) {
|
||||
setCurrentListId(activeStep.id);
|
||||
}
|
||||
if (listSelector !== activeStep.listSelector) {
|
||||
setListSelector(activeStep.listSelector);
|
||||
}
|
||||
if (JSON.stringify(fields) !== JSON.stringify(activeStep.fields)) {
|
||||
setFields(activeStep.fields);
|
||||
}
|
||||
if (activeStep.pagination?.selector && paginationSelector !== activeStep.pagination.selector) {
|
||||
setPaginationSelector(activeStep.pagination.selector);
|
||||
}
|
||||
}
|
||||
}
|
||||
}, [currentListActionId, browserSteps, currentListId, listSelector, fields, paginationSelector]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!isDOMMode) {
|
||||
capturedElementHighlighter.clearHighlights();
|
||||
@@ -1637,6 +1243,22 @@ export const BrowserWindow = () => {
|
||||
paginationType !== "scrollUp" &&
|
||||
paginationType !== "none"
|
||||
) {
|
||||
let targetListId = currentListId;
|
||||
let targetFields = fields;
|
||||
|
||||
if ((!targetListId || targetListId === 0) && currentListActionId) {
|
||||
const activeStep = browserSteps.find(
|
||||
s => s.type === 'list' && s.actionId === currentListActionId
|
||||
) as ListStep | undefined;
|
||||
|
||||
if (activeStep) {
|
||||
targetListId = activeStep.id;
|
||||
if (Object.keys(targetFields).length === 0 && Object.keys(activeStep.fields).length > 0) {
|
||||
targetFields = activeStep.fields;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
setPaginationSelector(highlighterData.selector);
|
||||
notify(
|
||||
`info`,
|
||||
@@ -1646,8 +1268,8 @@ export const BrowserWindow = () => {
|
||||
);
|
||||
addListStep(
|
||||
listSelector!,
|
||||
fields,
|
||||
currentListId || 0,
|
||||
targetFields,
|
||||
targetListId || 0,
|
||||
currentListActionId || `list-${crypto.randomUUID()}`,
|
||||
{
|
||||
type: paginationType,
|
||||
@@ -1812,6 +1434,8 @@ export const BrowserWindow = () => {
|
||||
socket,
|
||||
t,
|
||||
paginationSelector,
|
||||
highlighterData,
|
||||
browserSteps
|
||||
]
|
||||
);
|
||||
|
||||
@@ -1864,6 +1488,22 @@ export const BrowserWindow = () => {
|
||||
paginationType !== "scrollUp" &&
|
||||
paginationType !== "none"
|
||||
) {
|
||||
let targetListId = currentListId;
|
||||
let targetFields = fields;
|
||||
|
||||
if ((!targetListId || targetListId === 0) && currentListActionId) {
|
||||
const activeStep = browserSteps.find(
|
||||
s => s.type === 'list' && s.actionId === currentListActionId
|
||||
) as ListStep | undefined;
|
||||
|
||||
if (activeStep) {
|
||||
targetListId = activeStep.id;
|
||||
if (Object.keys(targetFields).length === 0 && Object.keys(activeStep.fields).length > 0) {
|
||||
targetFields = activeStep.fields;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
setPaginationSelector(highlighterData.selector);
|
||||
notify(
|
||||
`info`,
|
||||
@@ -1873,8 +1513,8 @@ export const BrowserWindow = () => {
|
||||
);
|
||||
addListStep(
|
||||
listSelector!,
|
||||
fields,
|
||||
currentListId || 0,
|
||||
targetFields,
|
||||
targetListId || 0,
|
||||
currentListActionId || `list-${crypto.randomUUID()}`,
|
||||
{ type: paginationType, selector: highlighterData.selector, isShadow: highlighterData.isShadow },
|
||||
undefined,
|
||||
@@ -2046,6 +1686,31 @@ export const BrowserWindow = () => {
|
||||
}
|
||||
}, [paginationMode, resetPaginationSelector]);
|
||||
|
||||
useEffect(() => {
|
||||
if (paginationMode && currentListActionId) {
|
||||
const currentListStep = browserSteps.find(
|
||||
step => step.type === 'list' && step.actionId === currentListActionId
|
||||
) as (ListStep & { type: 'list' }) | undefined;
|
||||
|
||||
const currentSelector = currentListStep?.pagination?.selector;
|
||||
const currentType = currentListStep?.pagination?.type;
|
||||
|
||||
if (['clickNext', 'clickLoadMore'].includes(paginationType)) {
|
||||
if (!currentSelector || (currentType && currentType !== paginationType)) {
|
||||
setPaginationSelector('');
|
||||
}
|
||||
}
|
||||
|
||||
const stepSelector = currentListStep?.pagination?.selector;
|
||||
|
||||
if (stepSelector && !paginationSelector) {
|
||||
setPaginationSelector(stepSelector);
|
||||
} else if (!stepSelector && paginationSelector) {
|
||||
setPaginationSelector('');
|
||||
}
|
||||
}
|
||||
}, [browserSteps, paginationMode, currentListActionId, paginationSelector]);
|
||||
|
||||
return (
|
||||
<div
|
||||
onClick={handleClick}
|
||||
@@ -2310,6 +1975,7 @@ export const BrowserWindow = () => {
|
||||
listSelector={listSelector}
|
||||
cachedChildSelectors={cachedChildSelectors}
|
||||
paginationMode={paginationMode}
|
||||
paginationSelector={paginationSelector}
|
||||
paginationType={paginationType}
|
||||
limitMode={limitMode}
|
||||
isCachingChildSelectors={isCachingChildSelectors}
|
||||
|
||||
@@ -100,6 +100,7 @@ interface RRWebDOMBrowserRendererProps {
|
||||
listSelector?: string | null;
|
||||
cachedChildSelectors?: string[];
|
||||
paginationMode?: boolean;
|
||||
paginationSelector?: string;
|
||||
paginationType?: string;
|
||||
limitMode?: boolean;
|
||||
isCachingChildSelectors?: boolean;
|
||||
@@ -153,6 +154,7 @@ export const DOMBrowserRenderer: React.FC<RRWebDOMBrowserRendererProps> = ({
|
||||
listSelector = null,
|
||||
cachedChildSelectors = [],
|
||||
paginationMode = false,
|
||||
paginationSelector = "",
|
||||
paginationType = "",
|
||||
limitMode = false,
|
||||
isCachingChildSelectors = false,
|
||||
@@ -257,6 +259,13 @@ export const DOMBrowserRenderer: React.FC<RRWebDOMBrowserRendererProps> = ({
|
||||
else if (listSelector) {
|
||||
if (limitMode) {
|
||||
shouldHighlight = false;
|
||||
} else if (
|
||||
paginationMode &&
|
||||
paginationSelector &&
|
||||
paginationType !== "" &&
|
||||
!["none", "scrollDown", "scrollUp"].includes(paginationType)
|
||||
) {
|
||||
shouldHighlight = false;
|
||||
} else if (
|
||||
paginationMode &&
|
||||
paginationType !== "" &&
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import React, { useState, useCallback, useEffect, useMemo } from 'react';
|
||||
import React, { useState, useCallback, useEffect, useRef, useMemo } from 'react';
|
||||
import { Button, Paper, Box, TextField, IconButton, Tooltip } from "@mui/material";
|
||||
import { WorkflowFile } from "maxun-core";
|
||||
import Typography from "@mui/material/Typography";
|
||||
@@ -15,9 +15,9 @@ import ActionDescriptionBox from '../action/ActionDescriptionBox';
|
||||
import { useThemeMode } from '../../context/theme-provider';
|
||||
import { useTranslation } from 'react-i18next';
|
||||
import { useBrowserDimensionsStore } from '../../context/browserDimensions';
|
||||
import { emptyWorkflow } from '../../shared/constants';
|
||||
import { clientListExtractor } from '../../helpers/clientListExtractor';
|
||||
import { clientSelectorGenerator } from '../../helpers/clientSelectorGenerator';
|
||||
import { clientPaginationDetector } from '../../helpers/clientPaginationDetector';
|
||||
|
||||
const fetchWorkflow = (id: string, callback: (response: WorkflowFile) => void) => {
|
||||
getActiveWorkflow(id).then(
|
||||
@@ -45,6 +45,13 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
|
||||
const [showCaptureText, setShowCaptureText] = useState(true);
|
||||
const { panelHeight } = useBrowserDimensionsStore();
|
||||
|
||||
const [autoDetectedPagination, setAutoDetectedPagination] = useState<{
|
||||
type: PaginationType;
|
||||
selector: string | null;
|
||||
confidence: 'high' | 'medium' | 'low';
|
||||
} | null>(null);
|
||||
const autoDetectionRunRef = useRef<string | null>(null);
|
||||
|
||||
const { lastAction, notify, currentWorkflowActionsState, setCurrentWorkflowActionsState, resetInterpretationLog, currentListActionId, setCurrentListActionId, currentTextActionId, setCurrentTextActionId, currentScreenshotActionId, setCurrentScreenshotActionId, isDOMMode, setIsDOMMode, currentSnapshot, setCurrentSnapshot, updateDOMMode, initialUrl, setRecordingUrl, currentTextGroupName } = useGlobalInfoStore();
|
||||
const {
|
||||
getText, startGetText, stopGetText,
|
||||
@@ -62,7 +69,7 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
|
||||
startAction, finishAction
|
||||
} = useActionContext();
|
||||
|
||||
const { browserSteps, updateBrowserTextStepLabel, deleteBrowserStep, addScreenshotStep, updateListTextFieldLabel, removeListTextField, updateListStepLimit, deleteStepsByActionId, updateListStepData, updateScreenshotStepData, emitActionForStep } = useBrowserSteps();
|
||||
const { browserSteps, addScreenshotStep, updateListStepLimit, updateListStepPagination, deleteStepsByActionId, updateListStepData, updateScreenshotStepData, emitActionForStep } = useBrowserSteps();
|
||||
const { id, socket } = useSocketStore();
|
||||
const { t } = useTranslation();
|
||||
|
||||
@@ -72,6 +79,73 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
|
||||
setWorkflow(data);
|
||||
}, [setWorkflow]);
|
||||
|
||||
useEffect(() => {
|
||||
if (!paginationType || !currentListActionId) return;
|
||||
|
||||
const currentListStep = browserSteps.find(
|
||||
step => step.type === 'list' && step.actionId === currentListActionId
|
||||
) as (BrowserStep & { type: 'list' }) | undefined;
|
||||
|
||||
const currentSelector = currentListStep?.pagination?.selector;
|
||||
const currentType = currentListStep?.pagination?.type;
|
||||
|
||||
if (['clickNext', 'clickLoadMore'].includes(paginationType)) {
|
||||
const needsSelector = !currentSelector && !currentType;
|
||||
const typeChanged = currentType && currentType !== paginationType;
|
||||
|
||||
if (typeChanged) {
|
||||
const iframeElement = document.querySelector('#browser-window iframe') as HTMLIFrameElement;
|
||||
if (iframeElement?.contentDocument && currentSelector) {
|
||||
try {
|
||||
function evaluateSelector(selector: string, doc: Document): Element[] {
|
||||
if (selector.startsWith('//') || selector.startsWith('(//')) {
|
||||
try {
|
||||
const result = doc.evaluate(selector, doc, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
||||
const elements: Element[] = [];
|
||||
for (let i = 0; i < result.snapshotLength; i++) {
|
||||
const node = result.snapshotItem(i);
|
||||
if (node && node.nodeType === Node.ELEMENT_NODE) {
|
||||
elements.push(node as Element);
|
||||
}
|
||||
}
|
||||
return elements;
|
||||
} catch (err) {
|
||||
return [];
|
||||
}
|
||||
} else {
|
||||
try {
|
||||
return Array.from(doc.querySelectorAll(selector));
|
||||
} catch (err) {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const elements = evaluateSelector(currentSelector, iframeElement.contentDocument);
|
||||
elements.forEach((el: Element) => {
|
||||
(el as HTMLElement).style.outline = '';
|
||||
(el as HTMLElement).style.outlineOffset = '';
|
||||
(el as HTMLElement).style.zIndex = '';
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('Error removing pagination highlight:', error);
|
||||
}
|
||||
}
|
||||
|
||||
if (currentListStep) {
|
||||
updateListStepPagination(currentListStep.id, {
|
||||
type: paginationType,
|
||||
selector: null,
|
||||
});
|
||||
}
|
||||
|
||||
startPaginationMode();
|
||||
} else if (needsSelector) {
|
||||
startPaginationMode();
|
||||
}
|
||||
}
|
||||
}, [paginationType, currentListActionId, browserSteps, updateListStepPagination, startPaginationMode]);
|
||||
|
||||
useEffect(() => {
|
||||
if (socket) {
|
||||
const domModeHandler = (data: any) => {
|
||||
@@ -391,7 +465,182 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
|
||||
return;
|
||||
}
|
||||
|
||||
startPaginationMode();
|
||||
const currentListStepForAutoDetect = browserSteps.find(
|
||||
step => step.type === 'list' && step.actionId === currentListActionId
|
||||
) as (BrowserStep & { type: 'list'; listSelector?: string }) | undefined;
|
||||
|
||||
if (currentListStepForAutoDetect?.listSelector) {
|
||||
if (autoDetectionRunRef.current !== currentListActionId) {
|
||||
autoDetectionRunRef.current = currentListActionId;
|
||||
|
||||
notify('info', 'Detecting pagination...');
|
||||
|
||||
try {
|
||||
socket?.emit('testPaginationScroll', {
|
||||
listSelector: currentListStepForAutoDetect.listSelector
|
||||
});
|
||||
|
||||
const handleScrollTestResult = (result: any) => {
|
||||
if (result.success && result.contentLoaded) {
|
||||
setAutoDetectedPagination({
|
||||
type: 'scrollDown',
|
||||
selector: null,
|
||||
confidence: 'high'
|
||||
});
|
||||
updatePaginationType('scrollDown');
|
||||
|
||||
const latestListStep = browserSteps.find(
|
||||
step => step.type === 'list' && step.actionId === currentListActionId
|
||||
);
|
||||
if (latestListStep) {
|
||||
updateListStepPagination(latestListStep.id, {
|
||||
type: 'scrollDown',
|
||||
selector: null,
|
||||
isShadow: false
|
||||
});
|
||||
}
|
||||
} else if (result.success && !result.contentLoaded) {
|
||||
const iframeElement = document.querySelector('#browser-window iframe') as HTMLIFrameElement;
|
||||
const iframeDoc = iframeElement?.contentDocument;
|
||||
|
||||
if (iframeDoc) {
|
||||
const detectionResult = clientPaginationDetector.autoDetectPagination(
|
||||
iframeDoc,
|
||||
currentListStepForAutoDetect.listSelector!,
|
||||
clientSelectorGenerator,
|
||||
{ disableScrollDetection: true }
|
||||
);
|
||||
|
||||
if (detectionResult.type) {
|
||||
setAutoDetectedPagination({
|
||||
type: detectionResult.type,
|
||||
selector: detectionResult.selector,
|
||||
confidence: detectionResult.confidence
|
||||
});
|
||||
|
||||
const latestListStep = browserSteps.find(
|
||||
step => step.type === 'list' && step.actionId === currentListActionId
|
||||
);
|
||||
if (latestListStep) {
|
||||
updateListStepPagination(latestListStep.id, {
|
||||
type: detectionResult.type,
|
||||
selector: detectionResult.selector,
|
||||
isShadow: false
|
||||
});
|
||||
}
|
||||
|
||||
updatePaginationType(detectionResult.type);
|
||||
|
||||
if (detectionResult.selector && (detectionResult.type === 'clickNext' || detectionResult.type === 'clickLoadMore')) {
|
||||
try {
|
||||
function evaluateSelector(selector: string, doc: Document): Element[] {
|
||||
try {
|
||||
const isXPath = selector.startsWith('//') || selector.startsWith('(//');
|
||||
if (isXPath) {
|
||||
const result = doc.evaluate(
|
||||
selector,
|
||||
doc,
|
||||
null,
|
||||
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE,
|
||||
null
|
||||
);
|
||||
const elements: Element[] = [];
|
||||
for (let i = 0; i < result.snapshotLength; i++) {
|
||||
const node = result.snapshotItem(i);
|
||||
if (node && node.nodeType === Node.ELEMENT_NODE) {
|
||||
elements.push(node as Element);
|
||||
}
|
||||
}
|
||||
return elements;
|
||||
} else {
|
||||
try {
|
||||
const allElements = Array.from(doc.querySelectorAll(selector));
|
||||
if (allElements.length > 0) {
|
||||
return allElements;
|
||||
}
|
||||
} catch (err) {
|
||||
console.warn('[RightSidePanel] Full chained selector failed, trying individual selectors:', err);
|
||||
}
|
||||
|
||||
const selectorParts = selector.split(',');
|
||||
for (const part of selectorParts) {
|
||||
try {
|
||||
const elements = Array.from(doc.querySelectorAll(part.trim()));
|
||||
if (elements.length > 0) {
|
||||
return elements;
|
||||
}
|
||||
} catch (err) {
|
||||
console.warn('[RightSidePanel] Selector part failed:', part.trim(), err);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return [];
|
||||
}
|
||||
} catch (err) {
|
||||
console.error('[RightSidePanel] Selector evaluation failed:', selector, err);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
const elements = evaluateSelector(detectionResult.selector, iframeDoc);
|
||||
if (elements.length > 0) {
|
||||
elements.forEach((el: Element) => {
|
||||
(el as HTMLElement).style.outline = '3px dashed #ff00c3';
|
||||
(el as HTMLElement).style.outlineOffset = '2px';
|
||||
(el as HTMLElement).style.zIndex = '9999';
|
||||
});
|
||||
|
||||
const firstElement = elements[0] as HTMLElement;
|
||||
const elementRect = firstElement.getBoundingClientRect();
|
||||
const iframeWindow = iframeElement.contentWindow;
|
||||
if (iframeWindow) {
|
||||
const targetY = elementRect.top + iframeWindow.scrollY - (iframeWindow.innerHeight / 2) + (elementRect.height / 2);
|
||||
iframeWindow.scrollTo({ top: targetY, behavior: 'smooth' });
|
||||
}
|
||||
|
||||
const paginationTypeLabel = detectionResult.type === 'clickNext' ? 'Next Button' : 'Load More Button';
|
||||
notify('info', `${paginationTypeLabel} has been auto-detected and highlighted on the page`);
|
||||
} else {
|
||||
console.warn(' No elements found for selector:', detectionResult.selector);
|
||||
}
|
||||
} catch (error) {
|
||||
console.error('Error highlighting pagination button:', error);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
setAutoDetectedPagination(null);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
console.error('Scroll test failed:', result.error);
|
||||
setAutoDetectedPagination(null);
|
||||
}
|
||||
|
||||
socket?.off('paginationScrollTestResult', handleScrollTestResult);
|
||||
};
|
||||
|
||||
socket?.on('paginationScrollTestResult', handleScrollTestResult);
|
||||
|
||||
setTimeout(() => {
|
||||
socket?.off('paginationScrollTestResult', handleScrollTestResult);
|
||||
}, 5000);
|
||||
|
||||
} catch (error) {
|
||||
console.error('Scroll test failed:', error);
|
||||
setAutoDetectedPagination(null);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const shouldSkipPaginationMode = autoDetectedPagination && (
|
||||
['scrollDown', 'scrollUp'].includes(autoDetectedPagination.type) ||
|
||||
(['clickNext', 'clickLoadMore'].includes(autoDetectedPagination.type) && autoDetectedPagination.selector)
|
||||
);
|
||||
|
||||
if (!shouldSkipPaginationMode) {
|
||||
startPaginationMode();
|
||||
}
|
||||
|
||||
setShowPaginationOptions(true);
|
||||
setCaptureStage('pagination');
|
||||
break;
|
||||
@@ -460,6 +709,7 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
|
||||
case 'pagination':
|
||||
stopPaginationMode();
|
||||
setShowPaginationOptions(false);
|
||||
setAutoDetectedPagination(null);
|
||||
setCaptureStage('initial');
|
||||
break;
|
||||
}
|
||||
@@ -495,17 +745,58 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
|
||||
socket.emit('removeAction', { actionId: currentListActionId });
|
||||
}
|
||||
}
|
||||
|
||||
if (autoDetectedPagination?.selector) {
|
||||
const iframeElement = document.querySelector('#browser-window iframe') as HTMLIFrameElement;
|
||||
if (iframeElement?.contentDocument) {
|
||||
try {
|
||||
function evaluateSelector(selector: string, doc: Document): Element[] {
|
||||
if (selector.startsWith('//') || selector.startsWith('(//')) {
|
||||
try {
|
||||
const result = doc.evaluate(selector, doc, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
||||
const elements: Element[] = [];
|
||||
for (let i = 0; i < result.snapshotLength; i++) {
|
||||
const node = result.snapshotItem(i);
|
||||
if (node && node.nodeType === Node.ELEMENT_NODE) {
|
||||
elements.push(node as Element);
|
||||
}
|
||||
}
|
||||
return elements;
|
||||
} catch (err) {
|
||||
return [];
|
||||
}
|
||||
} else {
|
||||
try {
|
||||
return Array.from(doc.querySelectorAll(selector));
|
||||
} catch (err) {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const elements = evaluateSelector(autoDetectedPagination.selector, iframeElement.contentDocument);
|
||||
elements.forEach((el: Element) => {
|
||||
(el as HTMLElement).style.outline = '';
|
||||
(el as HTMLElement).style.outlineOffset = '';
|
||||
(el as HTMLElement).style.zIndex = '';
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('Error removing pagination highlight on discard:', error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
resetListState();
|
||||
stopPaginationMode();
|
||||
stopLimitMode();
|
||||
setShowPaginationOptions(false);
|
||||
setShowLimitOptions(false);
|
||||
setAutoDetectedPagination(null);
|
||||
setCaptureStage('initial');
|
||||
setCurrentListActionId('');
|
||||
clientSelectorGenerator.cleanup();
|
||||
notify('error', t('right_panel.errors.capture_list_discarded'));
|
||||
}, [currentListActionId, browserSteps, stopGetList, deleteStepsByActionId, resetListState, setShowPaginationOptions, setShowLimitOptions, setCaptureStage, notify, t, stopPaginationMode, stopLimitMode, socket]);
|
||||
}, [currentListActionId, browserSteps, stopGetList, deleteStepsByActionId, resetListState, setShowPaginationOptions, setShowLimitOptions, setCaptureStage, notify, t, stopPaginationMode, stopLimitMode, socket, autoDetectedPagination]);
|
||||
|
||||
const captureScreenshot = (fullPage: boolean) => {
|
||||
const screenshotCount = browserSteps.filter(s => s.type === 'screenshot').length + 1;
|
||||
@@ -615,6 +906,114 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
|
||||
{showPaginationOptions && (
|
||||
<Box display="flex" flexDirection="column" gap={2} style={{ margin: '13px' }}>
|
||||
<Typography>{t('right_panel.pagination.title')}</Typography>
|
||||
|
||||
{autoDetectedPagination && autoDetectedPagination.type !== '' && (
|
||||
<Box
|
||||
sx={{
|
||||
p: 2,
|
||||
mb: 1,
|
||||
borderRadius: '8px',
|
||||
backgroundColor: isDarkMode ? '#1a3a1a' : '#e8f5e9',
|
||||
border: `1px solid ${isDarkMode ? '#2e7d32' : '#4caf50'}`,
|
||||
}}
|
||||
>
|
||||
<Typography
|
||||
variant="body2"
|
||||
sx={{
|
||||
color: isDarkMode ? '#81c784' : '#2e7d32',
|
||||
fontWeight: 'bold',
|
||||
mb: 0.5
|
||||
}}
|
||||
>
|
||||
✓ Auto-detected: {
|
||||
autoDetectedPagination.type === 'clickNext' ? 'Click Next' :
|
||||
autoDetectedPagination.type === 'clickLoadMore' ? 'Click Load More' :
|
||||
autoDetectedPagination.type === 'scrollDown' ? 'Scroll Down' :
|
||||
autoDetectedPagination.type === 'scrollUp' ? 'Scroll Up' :
|
||||
autoDetectedPagination.type
|
||||
}
|
||||
</Typography>
|
||||
<Typography
|
||||
variant="caption"
|
||||
sx={{
|
||||
color: isDarkMode ? '#a5d6a7' : '#388e3c',
|
||||
display: 'block',
|
||||
mb: 1
|
||||
}}
|
||||
>
|
||||
You can continue with this or manually select a different pagination type below.
|
||||
</Typography>
|
||||
{autoDetectedPagination.selector && ['clickNext', 'clickLoadMore'].includes(autoDetectedPagination.type) && (
|
||||
<Button
|
||||
size="small"
|
||||
variant="outlined"
|
||||
onClick={() => {
|
||||
const currentListStep = browserSteps.find(
|
||||
step => step.type === 'list' && step.actionId === currentListActionId
|
||||
) as (BrowserStep & { type: 'list' }) | undefined;
|
||||
|
||||
if (currentListStep) {
|
||||
const iframeElement = document.querySelector('#browser-window iframe') as HTMLIFrameElement;
|
||||
if (iframeElement?.contentDocument && autoDetectedPagination.selector) {
|
||||
try {
|
||||
function evaluateSelector(selector: string, doc: Document): Element[] {
|
||||
if (selector.startsWith('//') || selector.startsWith('(//')) {
|
||||
try {
|
||||
const result = doc.evaluate(selector, doc, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
||||
const elements: Element[] = [];
|
||||
for (let i = 0; i < result.snapshotLength; i++) {
|
||||
const node = result.snapshotItem(i);
|
||||
if (node && node.nodeType === Node.ELEMENT_NODE) {
|
||||
elements.push(node as Element);
|
||||
}
|
||||
}
|
||||
return elements;
|
||||
} catch (err) {
|
||||
return [];
|
||||
}
|
||||
} else {
|
||||
try {
|
||||
return Array.from(doc.querySelectorAll(selector));
|
||||
} catch (err) {
|
||||
return [];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const elements = evaluateSelector(autoDetectedPagination.selector, iframeElement.contentDocument);
|
||||
elements.forEach((el: Element) => {
|
||||
(el as HTMLElement).style.outline = '';
|
||||
(el as HTMLElement).style.outlineOffset = '';
|
||||
(el as HTMLElement).style.zIndex = '';
|
||||
});
|
||||
} catch (error) {
|
||||
console.error('Error removing pagination highlight:', error);
|
||||
}
|
||||
}
|
||||
|
||||
updateListStepPagination(currentListStep.id, {
|
||||
type: autoDetectedPagination.type,
|
||||
selector: null,
|
||||
});
|
||||
|
||||
startPaginationMode();
|
||||
notify('info', 'Please select a different pagination element');
|
||||
}
|
||||
}}
|
||||
sx={{
|
||||
color: isDarkMode ? '#81c784' : '#2e7d32',
|
||||
borderColor: isDarkMode ? '#81c784' : '#2e7d32',
|
||||
'&:hover': {
|
||||
borderColor: isDarkMode ? '#a5d6a7' : '#4caf50',
|
||||
backgroundColor: isDarkMode ? '#1a3a1a' : '#f1f8f4',
|
||||
}
|
||||
}}
|
||||
>
|
||||
Choose Different Element
|
||||
</Button>
|
||||
)}
|
||||
</Box>
|
||||
)}
|
||||
<Button
|
||||
variant={paginationType === 'clickNext' ? "contained" : "outlined"}
|
||||
onClick={() => handlePaginationSettingSelect('clickNext')}
|
||||
|
||||
@@ -500,7 +500,7 @@ export const RobotEditPage = ({ handleStart }: RobotSettingsProps) => {
|
||||
{scrapeListLimits.map((limitInfo, index) => {
|
||||
// Get the corresponding scrapeList action to extract its name
|
||||
const scrapeListAction = robot?.recording?.workflow?.[limitInfo.pairIndex]?.what?.[limitInfo.actionIndex];
|
||||
const actionName =
|
||||
const actionName =
|
||||
scrapeListAction?.name ||
|
||||
`List Limit ${index + 1}`;
|
||||
|
||||
@@ -821,10 +821,19 @@ export const RobotEditPage = ({ handleStart }: RobotSettingsProps) => {
|
||||
onChange={(e) => handleTargetUrlChange(e.target.value)}
|
||||
style={{ marginBottom: "20px" }}
|
||||
/>
|
||||
<Divider />
|
||||
{renderScrapeListLimitFields()}
|
||||
<Divider />
|
||||
{renderActionNameFields()}
|
||||
{renderScrapeListLimitFields() && (
|
||||
<>
|
||||
<Divider />
|
||||
{renderScrapeListLimitFields()}
|
||||
</>
|
||||
)}
|
||||
|
||||
{renderActionNameFields() && (
|
||||
<>
|
||||
<Divider />
|
||||
{renderActionNameFields()}
|
||||
</>
|
||||
)}
|
||||
</>
|
||||
)}
|
||||
</Box>
|
||||
|
||||
@@ -10,7 +10,6 @@ import {
|
||||
AccordionSummary,
|
||||
AccordionDetails
|
||||
} from "@mui/material";
|
||||
import Highlight from "react-highlight";
|
||||
import * as React from "react";
|
||||
import { Data } from "./RunsTable";
|
||||
import { TabPanel, TabContext } from "@mui/lab";
|
||||
@@ -22,7 +21,6 @@ import TableCell from '@mui/material/TableCell';
|
||||
import TableContainer from '@mui/material/TableContainer';
|
||||
import TableHead from '@mui/material/TableHead';
|
||||
import TableRow from '@mui/material/TableRow';
|
||||
import 'highlight.js/styles/github.css';
|
||||
import { useTranslation } from "react-i18next";
|
||||
import { useThemeMode } from "../../context/theme-provider";
|
||||
|
||||
|
||||
@@ -80,6 +80,7 @@ interface BrowserStepsContextType {
|
||||
newLabel: string
|
||||
) => void;
|
||||
updateListStepLimit: (listId: number, limit: number) => void;
|
||||
updateListStepPagination: (listId: number, pagination: { type: string; selector: string | null; isShadow?: boolean }) => void;
|
||||
updateListStepData: (listId: number, extractedData: any[]) => void;
|
||||
updateListStepName: (listId: number, name: string) => void;
|
||||
updateScreenshotStepName: (id: number, name: string) => void;
|
||||
@@ -479,6 +480,26 @@ export const BrowserStepsProvider: React.FC<{ children: React.ReactNode }> = ({
|
||||
);
|
||||
};
|
||||
|
||||
const updateListStepPagination = (
|
||||
listId: number,
|
||||
pagination: { type: string; selector: string | null; isShadow?: boolean }
|
||||
) => {
|
||||
setBrowserSteps((prevSteps) =>
|
||||
prevSteps.map((step) => {
|
||||
if (step.type === "list" && step.id === listId) {
|
||||
return {
|
||||
...step,
|
||||
pagination: {
|
||||
...pagination,
|
||||
selector: pagination.selector || "",
|
||||
},
|
||||
};
|
||||
}
|
||||
return step;
|
||||
})
|
||||
);
|
||||
};
|
||||
|
||||
const updateListStepName = (listId: number, name: string) => {
|
||||
setBrowserSteps((prevSteps) =>
|
||||
prevSteps.map((step) => {
|
||||
@@ -533,6 +554,7 @@ export const BrowserStepsProvider: React.FC<{ children: React.ReactNode }> = ({
|
||||
updateBrowserTextStepLabel,
|
||||
updateListTextFieldLabel,
|
||||
updateListStepLimit,
|
||||
updateListStepPagination,
|
||||
updateListStepData,
|
||||
updateListStepName,
|
||||
updateScreenshotStepName,
|
||||
|
||||
586
src/helpers/clientPaginationDetector.ts
Normal file
586
src/helpers/clientPaginationDetector.ts
Normal file
@@ -0,0 +1,586 @@
|
||||
/**
|
||||
* Client-Side Pagination Auto-Detection
|
||||
* Detects pagination type and selector for list extraction
|
||||
* Operates on passed document object (works in DOM mode / iframe)
|
||||
*/
|
||||
|
||||
import type { ClientSelectorGenerator } from './clientSelectorGenerator';
|
||||
|
||||
export type PaginationDetectionResult = {
|
||||
type: 'scrollDown' | 'scrollUp' | 'clickNext' | 'clickLoadMore' | '';
|
||||
selector: string | null;
|
||||
confidence: 'high' | 'medium' | 'low';
|
||||
debug?: any;
|
||||
};
|
||||
|
||||
class ClientPaginationDetector {
|
||||
/**
|
||||
* Auto-detect pagination on a page
|
||||
* @param doc - The document object to analyze (can be iframe document)
|
||||
* @param listSelector - The selector for the list container
|
||||
* @param options - Optional detection options
|
||||
* @returns Pagination detection result
|
||||
*/
|
||||
autoDetectPagination(
|
||||
doc: Document,
|
||||
listSelector: string,
|
||||
selectorGenerator: ClientSelectorGenerator,
|
||||
options?: { disableScrollDetection?: boolean }
|
||||
): PaginationDetectionResult {
|
||||
try {
|
||||
const listElements = this.evaluateSelector(listSelector, doc);
|
||||
|
||||
if (listElements.length === 0) {
|
||||
return { type: '', selector: null, confidence: 'low', debug: 'No list elements found' };
|
||||
}
|
||||
|
||||
const listContainer = listElements[0];
|
||||
|
||||
const nextButtonPatterns = [
|
||||
/next/i,
|
||||
/\bnext\s+page\b/i,
|
||||
/page\s+suivante/i,
|
||||
/siguiente/i,
|
||||
/weiter/i,
|
||||
/>>|›|→|»|⟩/,
|
||||
/\bforward\b/i,
|
||||
/\bnewer\b/i,
|
||||
/\bolder\b/i
|
||||
];
|
||||
|
||||
const loadMorePatterns = [
|
||||
/load\s+more/i,
|
||||
/show\s+more/i,
|
||||
/view\s+more/i,
|
||||
/see\s+more/i,
|
||||
/more\s+results/i,
|
||||
/plus\s+de\s+résultats/i,
|
||||
/más\s+resultados/i,
|
||||
/weitere\s+ergebnisse/i
|
||||
];
|
||||
|
||||
const prevButtonPatterns = [
|
||||
/prev/i,
|
||||
/previous/i,
|
||||
/<<|‹|←|«/,
|
||||
/\bback\b/i
|
||||
];
|
||||
|
||||
const clickableElements = this.getClickableElements(doc);
|
||||
|
||||
let nextButton: HTMLElement | null = null;
|
||||
let nextButtonScore = 0;
|
||||
const nextButtonCandidates: any[] = [];
|
||||
|
||||
for (const element of clickableElements) {
|
||||
if (!this.isVisible(element)) continue;
|
||||
|
||||
const text = (element.textContent || '').trim();
|
||||
const ariaLabel = element.getAttribute('aria-label') || '';
|
||||
const title = element.getAttribute('title') || '';
|
||||
const combinedText = `${text} ${ariaLabel} ${title}`;
|
||||
|
||||
let score = 0;
|
||||
const reasons: string[] = [];
|
||||
|
||||
if (this.matchesAnyPattern(combinedText, nextButtonPatterns)) {
|
||||
score += 10;
|
||||
reasons.push('text match (+10)');
|
||||
}
|
||||
|
||||
if (this.isNearList(element, listContainer)) {
|
||||
score += 5;
|
||||
reasons.push('near list (+5)');
|
||||
}
|
||||
|
||||
if (element.tagName === 'BUTTON') {
|
||||
score += 2;
|
||||
reasons.push('button tag (+2)');
|
||||
}
|
||||
|
||||
const className = element.className || '';
|
||||
if (/pagination|next|forward/i.test(className)) {
|
||||
score += 3;
|
||||
reasons.push('pagination class (+3)');
|
||||
}
|
||||
|
||||
if (score > 0) {
|
||||
nextButtonCandidates.push({
|
||||
element: element,
|
||||
score: score,
|
||||
text: text.substring(0, 50),
|
||||
ariaLabel: ariaLabel,
|
||||
tag: element.tagName,
|
||||
className: className,
|
||||
reasons: reasons
|
||||
});
|
||||
}
|
||||
|
||||
if (score > nextButtonScore) {
|
||||
nextButtonScore = score;
|
||||
nextButton = element;
|
||||
}
|
||||
}
|
||||
|
||||
let loadMoreButton: HTMLElement | null = null;
|
||||
let loadMoreScore = 0;
|
||||
|
||||
for (const element of clickableElements) {
|
||||
if (!this.isVisible(element)) continue;
|
||||
|
||||
const text = (element.textContent || '').trim();
|
||||
const ariaLabel = element.getAttribute('aria-label') || '';
|
||||
const title = element.getAttribute('title') || '';
|
||||
const combinedText = `${text} ${ariaLabel} ${title}`;
|
||||
|
||||
let score = 0;
|
||||
|
||||
if (this.matchesAnyPattern(combinedText, loadMorePatterns)) {
|
||||
score += 10;
|
||||
}
|
||||
|
||||
if (this.isNearList(element, listContainer)) {
|
||||
score += 5;
|
||||
}
|
||||
|
||||
if (element.tagName === 'BUTTON') {
|
||||
score += 2;
|
||||
}
|
||||
|
||||
if (score > loadMoreScore) {
|
||||
loadMoreScore = score;
|
||||
loadMoreButton = element;
|
||||
}
|
||||
}
|
||||
|
||||
let prevButton: HTMLElement | null = null;
|
||||
let prevButtonScore = 0;
|
||||
|
||||
for (const element of clickableElements) {
|
||||
if (!this.isVisible(element)) continue;
|
||||
|
||||
const text = (element.textContent || '').trim();
|
||||
const ariaLabel = element.getAttribute('aria-label') || '';
|
||||
const title = element.getAttribute('title') || '';
|
||||
const combinedText = `${text} ${ariaLabel} ${title}`;
|
||||
|
||||
let score = 0;
|
||||
|
||||
if (this.matchesAnyPattern(combinedText, prevButtonPatterns)) {
|
||||
score += 10;
|
||||
}
|
||||
|
||||
if (this.isNearList(element, listContainer)) {
|
||||
score += 5;
|
||||
}
|
||||
|
||||
if (score > prevButtonScore) {
|
||||
prevButtonScore = score;
|
||||
prevButton = element;
|
||||
}
|
||||
}
|
||||
|
||||
const infiniteScrollScore = options?.disableScrollDetection
|
||||
? 0
|
||||
: this.detectInfiniteScrollIndicators(doc, listElements, listContainer);
|
||||
|
||||
const hasStrongInfiniteScrollSignals = infiniteScrollScore >= 8;
|
||||
const hasMediumInfiniteScrollSignals = infiniteScrollScore >= 5 && infiniteScrollScore < 8;
|
||||
|
||||
if (hasStrongInfiniteScrollSignals) {
|
||||
const confidence = infiniteScrollScore >= 12 ? 'high' : infiniteScrollScore >= 10 ? 'medium' : 'low';
|
||||
return {
|
||||
type: 'scrollDown',
|
||||
selector: null,
|
||||
confidence: confidence
|
||||
};
|
||||
}
|
||||
|
||||
if (loadMoreButton && loadMoreScore >= 15) {
|
||||
const selector = this.generateSelectorsForElement(loadMoreButton, doc, selectorGenerator);
|
||||
return {
|
||||
type: 'clickLoadMore',
|
||||
selector: selector,
|
||||
confidence: 'high'
|
||||
};
|
||||
}
|
||||
|
||||
if (nextButton && nextButtonScore >= 15 && !hasMediumInfiniteScrollSignals) {
|
||||
const selector = this.generateSelectorsForElement(nextButton, doc, selectorGenerator);
|
||||
return {
|
||||
type: 'clickNext',
|
||||
selector: selector,
|
||||
confidence: 'high'
|
||||
};
|
||||
}
|
||||
|
||||
if (hasMediumInfiniteScrollSignals) {
|
||||
const confidence = infiniteScrollScore >= 7 ? 'medium' : 'low';
|
||||
return {
|
||||
type: 'scrollDown',
|
||||
selector: null,
|
||||
confidence: confidence
|
||||
};
|
||||
}
|
||||
|
||||
if (loadMoreButton && loadMoreScore >= 8) {
|
||||
const selector = this.generateSelectorsForElement(loadMoreButton, doc, selectorGenerator);
|
||||
const confidence = loadMoreScore >= 10 ? 'medium' : 'low';
|
||||
return {
|
||||
type: 'clickLoadMore',
|
||||
selector: selector,
|
||||
confidence: confidence
|
||||
};
|
||||
}
|
||||
|
||||
if (nextButton && nextButtonScore >= 8) {
|
||||
const selector = this.generateSelectorsForElement(nextButton, doc, selectorGenerator);
|
||||
const confidence = nextButtonScore >= 10 ? 'medium' : 'low';
|
||||
return {
|
||||
type: 'clickNext',
|
||||
selector: selector,
|
||||
confidence: confidence
|
||||
};
|
||||
}
|
||||
|
||||
if (prevButton && prevButtonScore >= 8) {
|
||||
const confidence = prevButtonScore >= 15 ? 'high' : prevButtonScore >= 10 ? 'medium' : 'low';
|
||||
return {
|
||||
type: 'scrollUp',
|
||||
selector: null,
|
||||
confidence: confidence
|
||||
};
|
||||
}
|
||||
|
||||
return {
|
||||
type: '',
|
||||
selector: null,
|
||||
confidence: 'low',
|
||||
debug: {
|
||||
clickableElementsCount: clickableElements.length,
|
||||
nextCandidatesCount: nextButtonCandidates.length,
|
||||
topNextCandidates: nextButtonCandidates.slice(0, 3).map(c => ({
|
||||
score: c.score,
|
||||
text: c.text,
|
||||
tag: c.tag,
|
||||
reasons: c.reasons
|
||||
})),
|
||||
finalScores: {
|
||||
loadMore: loadMoreScore,
|
||||
next: nextButtonScore,
|
||||
prev: prevButtonScore,
|
||||
infiniteScroll: infiniteScrollScore
|
||||
}
|
||||
}
|
||||
};
|
||||
} catch (error: any) {
|
||||
console.error('Error:', error);
|
||||
return {
|
||||
type: '',
|
||||
selector: null,
|
||||
confidence: 'low',
|
||||
debug: 'Exception: ' + error.message
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Evaluate selector (supports both CSS and XPath)
|
||||
*/
|
||||
private evaluateSelector(selector: string, doc: Document): HTMLElement[] {
|
||||
try {
|
||||
const isXPath = selector.startsWith('//') || selector.startsWith('(//');
|
||||
|
||||
if (isXPath) {
|
||||
const result = doc.evaluate(
|
||||
selector,
|
||||
doc,
|
||||
null,
|
||||
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE,
|
||||
null
|
||||
);
|
||||
|
||||
const elements: HTMLElement[] = [];
|
||||
for (let i = 0; i < result.snapshotLength; i++) {
|
||||
const node = result.snapshotItem(i);
|
||||
if (node && node.nodeType === Node.ELEMENT_NODE) {
|
||||
elements.push(node as HTMLElement);
|
||||
}
|
||||
}
|
||||
return elements;
|
||||
} else {
|
||||
return Array.from(doc.querySelectorAll(selector));
|
||||
}
|
||||
} catch (err) {
|
||||
console.error('Selector evaluation failed:', selector, err);
|
||||
return [];
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get all clickable elements in document
|
||||
*/
|
||||
private getClickableElements(doc: Document): HTMLElement[] {
|
||||
const clickables: HTMLElement[] = [];
|
||||
const selectors = ['button', 'a', '[role="button"]', '[onclick]', '.btn', '.button'];
|
||||
|
||||
for (const selector of selectors) {
|
||||
const elements = doc.querySelectorAll(selector);
|
||||
clickables.push(...Array.from(elements) as HTMLElement[]);
|
||||
}
|
||||
|
||||
return Array.from(new Set(clickables));
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if element is visible
|
||||
*/
|
||||
private isVisible(element: HTMLElement): boolean {
|
||||
try {
|
||||
const style = window.getComputedStyle(element);
|
||||
return style.display !== 'none' &&
|
||||
style.visibility !== 'hidden' &&
|
||||
style.opacity !== '0' &&
|
||||
element.offsetWidth > 0 &&
|
||||
element.offsetHeight > 0;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if text matches any pattern
|
||||
*/
|
||||
private matchesAnyPattern(text: string, patterns: RegExp[]): boolean {
|
||||
return patterns.some(pattern => pattern.test(text));
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if element is near the list container
|
||||
*/
|
||||
private isNearList(element: HTMLElement, listContainer: HTMLElement): boolean {
|
||||
try {
|
||||
const listRect = listContainer.getBoundingClientRect();
|
||||
const elementRect = element.getBoundingClientRect();
|
||||
|
||||
if (elementRect.top >= listRect.bottom && elementRect.top <= listRect.bottom + 500) {
|
||||
return true;
|
||||
}
|
||||
|
||||
if (elementRect.bottom <= listRect.top && elementRect.bottom >= listRect.top - 500) {
|
||||
return true;
|
||||
}
|
||||
|
||||
const verticalOverlap = !(elementRect.bottom < listRect.top || elementRect.top > listRect.bottom);
|
||||
if (verticalOverlap) {
|
||||
const horizontalDistance = Math.min(
|
||||
Math.abs(elementRect.left - listRect.right),
|
||||
Math.abs(elementRect.right - listRect.left)
|
||||
);
|
||||
if (horizontalDistance < 200) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
} catch (error) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Detect infinite scroll indicators
|
||||
*/
|
||||
private detectInfiniteScrollIndicators(doc: Document, listElements: HTMLElement[], listContainer: HTMLElement): number {
|
||||
try {
|
||||
let score = 0;
|
||||
const indicators: string[] = [];
|
||||
|
||||
const initialItemCount = listElements.length;
|
||||
const initialHeight = doc.documentElement.scrollHeight;
|
||||
const viewportHeight = window.innerHeight;
|
||||
|
||||
if (initialHeight <= viewportHeight) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const loadingIndicators = [
|
||||
'[class*="loading"]',
|
||||
'[class*="spinner"]',
|
||||
'[class*="skeleton"]',
|
||||
'[aria-busy="true"]',
|
||||
'[data-loading="true"]',
|
||||
'.loader',
|
||||
'.load-more-spinner',
|
||||
'[class*="load"]',
|
||||
'[id*="loading"]',
|
||||
'[id*="spinner"]'
|
||||
];
|
||||
|
||||
for (const selector of loadingIndicators) {
|
||||
if (doc.querySelector(selector)) {
|
||||
score += 3;
|
||||
indicators.push(`Loading indicator: ${selector} (+3)`);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const sentinelPatterns = [
|
||||
'[class*="sentinel"]',
|
||||
'[class*="trigger"]',
|
||||
'[data-infinite]',
|
||||
'[data-scroll-trigger]',
|
||||
'#infinite-scroll-trigger',
|
||||
'[class*="infinite"]',
|
||||
'[id*="infinite"]'
|
||||
];
|
||||
|
||||
for (const selector of sentinelPatterns) {
|
||||
if (doc.querySelector(selector)) {
|
||||
score += 4;
|
||||
indicators.push(`Sentinel element: ${selector} (+4)`);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const scrollToTopPatterns = [
|
||||
'[class*="scroll"][class*="top"]',
|
||||
'[aria-label*="scroll to top"]',
|
||||
'[title*="back to top"]',
|
||||
'.back-to-top',
|
||||
'#back-to-top',
|
||||
'[class*="scrolltop"]',
|
||||
'[class*="backtotop"]',
|
||||
'button[class*="top"]',
|
||||
'a[href="#top"]',
|
||||
'a[href="#"]'
|
||||
];
|
||||
|
||||
for (const selector of scrollToTopPatterns) {
|
||||
const element = doc.querySelector(selector);
|
||||
if (element && this.isVisible(element as HTMLElement)) {
|
||||
score += 2;
|
||||
indicators.push(`Scroll-to-top button (+2)`);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (initialHeight > viewportHeight * 3) {
|
||||
score += 3;
|
||||
indicators.push(`Very tall page (${(initialHeight / viewportHeight).toFixed(1)}x viewport) (+3)`);
|
||||
} else if (initialHeight > viewportHeight * 2) {
|
||||
score += 2;
|
||||
indicators.push(`Tall page (${(initialHeight / viewportHeight).toFixed(1)}x viewport) (+2)`);
|
||||
}
|
||||
|
||||
if (initialItemCount >= 20) {
|
||||
score += 2;
|
||||
indicators.push(`Many list items (${initialItemCount}) (+2)`);
|
||||
} else if (initialItemCount >= 10) {
|
||||
score += 1;
|
||||
indicators.push(`Good number of list items (${initialItemCount}) (+1)`);
|
||||
}
|
||||
|
||||
const infiniteScrollLibraries = [
|
||||
'.infinite-scroll',
|
||||
'[data-infinite-scroll]',
|
||||
'[data-flickity]',
|
||||
'[data-slick]',
|
||||
'.masonry',
|
||||
'[data-masonry]',
|
||||
'[class*="infinite-scroll"]',
|
||||
'[class*="lazy-load"]',
|
||||
'[data-lazy]'
|
||||
];
|
||||
|
||||
for (const selector of infiniteScrollLibraries) {
|
||||
if (doc.querySelector(selector)) {
|
||||
score += 4;
|
||||
indicators.push(`Infinite scroll library: ${selector} (+4)`);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const lastListItem = listElements[listElements.length - 1];
|
||||
if (lastListItem) {
|
||||
const lastItemRect = lastListItem.getBoundingClientRect();
|
||||
const lastItemY = lastItemRect.bottom + window.scrollY;
|
||||
const viewportBottom = window.scrollY + viewportHeight;
|
||||
|
||||
if (lastItemY > viewportBottom + viewportHeight) {
|
||||
score += 3;
|
||||
indicators.push(`List extends far below viewport (+3)`);
|
||||
} else if (lastItemY > viewportBottom) {
|
||||
score += 2;
|
||||
indicators.push(`List extends below viewport (+2)`);
|
||||
}
|
||||
}
|
||||
|
||||
const hiddenLoadMore = doc.querySelectorAll('[class*="load"], [class*="more"]');
|
||||
for (let i = 0; i < hiddenLoadMore.length; i++) {
|
||||
const el = hiddenLoadMore[i] as HTMLElement;
|
||||
const style = window.getComputedStyle(el);
|
||||
if (style.opacity === '0' || style.visibility === 'hidden') {
|
||||
score += 2;
|
||||
indicators.push(`Hidden load trigger element (+2)`);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const paginationControls = doc.querySelectorAll('[class*="pagination"], [class*="pager"]');
|
||||
if (paginationControls.length === 0) {
|
||||
score += 1;
|
||||
indicators.push(`No pagination controls found (+1)`);
|
||||
}
|
||||
|
||||
|
||||
return score;
|
||||
} catch (error) {
|
||||
console.error('Infinite scroll detection error:', error);
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
/**
|
||||
* Generate selectors for element using ClientSelectorGenerator approach
|
||||
* Returns the primary selector chain
|
||||
*/
|
||||
private generateSelectorsForElement(
|
||||
element: HTMLElement,
|
||||
doc: Document,
|
||||
selectorGenerator: ClientSelectorGenerator
|
||||
): string | null {
|
||||
try {
|
||||
const primary = selectorGenerator.generateSelectorsFromElement(element, doc);
|
||||
|
||||
if (!primary) {
|
||||
console.warn('Could not generate selectors for element');
|
||||
return null;
|
||||
}
|
||||
|
||||
const selectorChain = [
|
||||
primary && 'iframeSelector' in primary && primary.iframeSelector?.full
|
||||
? primary.iframeSelector.full
|
||||
: null,
|
||||
primary && 'shadowSelector' in primary && primary.shadowSelector?.full
|
||||
? primary.shadowSelector.full
|
||||
: null,
|
||||
primary && 'testIdSelector' in primary ? primary.testIdSelector : null,
|
||||
primary && 'id' in primary ? primary.id : null,
|
||||
primary && 'hrefSelector' in primary ? primary.hrefSelector : null,
|
||||
primary && 'relSelector' in primary ? primary.relSelector : null,
|
||||
primary && 'accessibilitySelector' in primary ? primary.accessibilitySelector : null,
|
||||
primary && 'attrSelector' in primary ? primary.attrSelector : null,
|
||||
primary && 'generalSelector' in primary ? primary.generalSelector : null,
|
||||
]
|
||||
.filter(selector => selector !== null && selector !== undefined && selector !== '')
|
||||
.join(',');
|
||||
|
||||
return selectorChain || null;
|
||||
} catch (error) {
|
||||
console.error('Error generating selectors:', error);
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
export const clientPaginationDetector = new ClientPaginationDetector();
|
||||
@@ -2476,6 +2476,46 @@ class ClientSelectorGenerator {
|
||||
return null;
|
||||
};
|
||||
|
||||
/**
|
||||
* Generate selectors directly from an element
|
||||
* Scrolls the element into view within the iframe only (instant scroll)
|
||||
*/
|
||||
public generateSelectorsFromElement = (
|
||||
element: HTMLElement,
|
||||
iframeDoc: Document
|
||||
): any | null => {
|
||||
try {
|
||||
try {
|
||||
const rect = element.getBoundingClientRect();
|
||||
const iframeWindow = iframeDoc.defaultView;
|
||||
|
||||
if (iframeWindow) {
|
||||
const targetY = rect.top + iframeWindow.scrollY - (iframeWindow.innerHeight / 2) + (rect.height / 2);
|
||||
|
||||
iframeWindow.scrollTo({
|
||||
top: targetY,
|
||||
behavior: 'auto'
|
||||
});
|
||||
}
|
||||
} catch (scrollError) {
|
||||
console.warn('[ClientSelectorGenerator] Could not scroll element into view:', scrollError);
|
||||
}
|
||||
|
||||
const rect = element.getBoundingClientRect();
|
||||
const coordinates = {
|
||||
x: rect.left + rect.width / 2,
|
||||
y: rect.top + rect.height / 2
|
||||
};
|
||||
|
||||
return this.getSelectors(iframeDoc, coordinates);
|
||||
} catch (e) {
|
||||
const { message, stack } = e as Error;
|
||||
console.warn(`Error generating selectors from element: ${message}`);
|
||||
console.warn(`Stack: ${stack}`);
|
||||
return null;
|
||||
}
|
||||
};
|
||||
|
||||
public getChildSelectors = (
|
||||
iframeDoc: Document,
|
||||
parentSelector: string
|
||||
@@ -2499,34 +2539,24 @@ class ClientSelectorGenerator {
|
||||
return [];
|
||||
}
|
||||
|
||||
if (parentElements.length > 10) {
|
||||
parentElements = parentElements.slice(0, 10);
|
||||
}
|
||||
const maxItems = 10;
|
||||
const limitedParents = parentElements.slice(0, Math.min(maxItems, parentElements.length));
|
||||
|
||||
const allChildSelectors = new Set<string>();
|
||||
const processedParents = new Set<HTMLElement>();
|
||||
const allChildSelectors: string[] = [];
|
||||
|
||||
for (const parentElement of parentElements) {
|
||||
if (processedParents.has(parentElement)) continue;
|
||||
processedParents.add(parentElement);
|
||||
for (let i = 0; i < limitedParents.length; i++) {
|
||||
const parent = limitedParents[i];
|
||||
const otherListElements = limitedParents.filter((_, index) => index !== i);
|
||||
|
||||
const otherListElements = parentElements.filter(
|
||||
(el) => el !== parentElement
|
||||
);
|
||||
|
||||
const childSelectors = this.generateOptimizedChildXPaths(
|
||||
parentElement,
|
||||
const selectors = this.generateOptimizedChildXPaths(
|
||||
parent,
|
||||
parentSelector,
|
||||
iframeDoc,
|
||||
otherListElements
|
||||
);
|
||||
|
||||
for (const selector of childSelectors) {
|
||||
allChildSelectors.add(selector);
|
||||
}
|
||||
allChildSelectors.push(...selectors);
|
||||
}
|
||||
|
||||
const result = Array.from(allChildSelectors).sort();
|
||||
const result = Array.from(new Set(allChildSelectors)).sort();
|
||||
this.selectorCache.set(cacheKey, result);
|
||||
return result;
|
||||
} catch (error) {
|
||||
@@ -2609,7 +2639,6 @@ class ClientSelectorGenerator {
|
||||
private generateOptimizedChildXPaths(
|
||||
parentElement: HTMLElement,
|
||||
listSelector: string,
|
||||
document: Document,
|
||||
otherListElements: HTMLElement[] = []
|
||||
): string[] {
|
||||
const selectors: string[] = [];
|
||||
@@ -4297,4 +4326,5 @@ class ClientSelectorGenerator {
|
||||
}
|
||||
}
|
||||
|
||||
export { ClientSelectorGenerator };
|
||||
export const clientSelectorGenerator = new ClientSelectorGenerator();
|
||||
|
||||
Reference in New Issue
Block a user