Merge branch 'develop' into browser-service

This commit is contained in:
Rohit
2025-11-30 19:26:42 +05:30
committed by GitHub
14 changed files with 1433 additions and 634 deletions

View File

@@ -460,8 +460,9 @@ export default class Interpreter extends EventEmitter {
for (const link of links) {
// eslint-disable-next-line
this.concurrency.addJob(async () => {
let newPage = null;
try {
const newPage = await context.newPage();
newPage = await context.newPage();
await newPage.goto(link);
await newPage.waitForLoadState('networkidle');
await this.runLoop(newPage, this.initializedWorkflow!);
@@ -470,6 +471,14 @@ export default class Interpreter extends EventEmitter {
// but newPage(), goto() and waitForLoadState() don't (and will kill
// the interpreter by throwing).
this.log(<Error>e, Level.ERROR);
} finally {
if (newPage && !newPage.isClosed()) {
try {
await newPage.close();
} catch (closeError) {
this.log('Failed to close enqueued page', Level.WARN);
}
}
}
});
}
@@ -1463,41 +1472,57 @@ export default class Interpreter extends EventEmitter {
* User-requested concurrency should be entirely managed by the concurrency manager,
* e.g. via `enqueueLinks`.
*/
p.on('popup', (popup) => {
const popupHandler = (popup) => {
this.concurrency.addJob(() => this.runLoop(popup, workflowCopy));
});
};
p.on('popup', popupHandler);
/* eslint no-constant-condition: ["warn", { "checkLoops": false }] */
let loopIterations = 0;
const MAX_LOOP_ITERATIONS = 1000; // Circuit breaker
// Cleanup function to remove popup listener
const cleanup = () => {
try {
if (!p.isClosed()) {
p.removeListener('popup', popupHandler);
}
} catch (cleanupError) {
}
};
while (true) {
if (this.isAborted) {
this.log('Workflow aborted during step execution', Level.WARN);
cleanup();
return;
}
// Circuit breaker to prevent infinite loops
if (++loopIterations > MAX_LOOP_ITERATIONS) {
this.log('Maximum loop iterations reached, terminating to prevent infinite loop', Level.ERROR);
cleanup();
return;
}
// Checks whether the page was closed from outside,
// or the workflow execution has been stopped via `interpreter.stop()`
if (p.isClosed() || !this.stopper) {
cleanup();
return;
}
try {
await p.waitForLoadState();
} catch (e) {
cleanup();
await p.close();
return;
}
if (workflowCopy.length === 0) {
this.log('All actions completed. Workflow finished.', Level.LOG);
cleanup();
return;
}
@@ -1589,6 +1614,7 @@ export default class Interpreter extends EventEmitter {
}
} else {
//await this.disableAdBlocker(p);
cleanup();
return;
}
}
@@ -1681,4 +1707,44 @@ export default class Interpreter extends EventEmitter {
throw new Error('Cannot stop, there is no running workflow!');
}
}
/**
* Cleanup method to release resources and prevent memory leaks
* Call this when the interpreter is no longer needed
*/
public async cleanup(): Promise<void> {
try {
// Stop any running workflows first
if (this.stopper) {
try {
await this.stop();
} catch (error: any) {
this.log(`Error stopping workflow during cleanup: ${error.message}`, Level.WARN);
}
}
// Clear ad-blocker resources
if (this.blocker) {
try {
this.blocker = null;
this.log('Ad-blocker resources cleared', Level.DEBUG);
} catch (error: any) {
this.log(`Error cleaning up ad-blocker: ${error.message}`, Level.WARN);
}
}
// Clear accumulated data to free memory
this.cumulativeResults = [];
this.namedResults = {};
this.serializableDataByType = { scrapeList: {}, scrapeSchema: {} };
// Reset state
this.isAborted = false;
this.initializedWorkflow = null;
this.log('Interpreter cleanup completed', Level.DEBUG);
} catch (error: any) {
this.log(`Error during interpreter cleanup: ${error.message}`, Level.ERROR);
throw error;
}
}
}

View File

@@ -12,8 +12,6 @@
"@mui/material": "^5.6.2",
"@react-oauth/google": "^0.12.1",
"@tanstack/react-query": "^5.90.2",
"@testing-library/react": "^13.1.1",
"@testing-library/user-event": "^13.5.0",
"@types/bcrypt": "^5.0.2",
"@types/body-parser": "^1.19.5",
"@types/csurf": "^1.11.5",
@@ -38,14 +36,12 @@
"dotenv": "^16.0.0",
"express": "^4.17.2",
"express-session": "^1.18.1",
"fortawesome": "^0.0.1-security",
"google-auth-library": "^9.14.1",
"googleapis": "^144.0.0",
"i18next": "^24.0.2",
"i18next-browser-languagedetector": "^8.0.0",
"i18next-http-backend": "^3.0.1",
"idcac-playwright": "^0.1.3",
"ioredis": "^5.4.1",
"joi": "^17.6.0",
"joplin-turndown-plugin-gfm": "^1.0.12",
"jsonwebtoken": "^9.0.2",
@@ -64,11 +60,8 @@
"posthog-node": "^4.2.1",
"react": "^18.0.0",
"react-dom": "^18.0.0",
"react-highlight": "0.15.0",
"react-i18next": "^15.1.3",
"react-router-dom": "^6.26.1",
"react-simple-code-editor": "^0.11.2",
"react-transition-group": "^4.4.2",
"rrweb-snapshot": "^2.0.0-alpha.4",
"sequelize": "^6.37.3",
"sequelize-typescript": "^2.1.6",
@@ -119,9 +112,6 @@
"@types/node": "22.7.9",
"@types/node-cron": "^3.0.11",
"@types/node-fetch": "^2.6.12",
"@types/prismjs": "^1.26.0",
"@types/react-highlight": "^0.12.5",
"@types/react-transition-group": "^4.4.4",
"@types/styled-components": "^5.1.23",
"@types/swagger-jsdoc": "^6.0.4",
"@types/swagger-ui-express": "^4.1.6",

View File

@@ -658,6 +658,16 @@ async function executeRun(id: string, userId: string, requestedFormats?: string[
};
}
browser = browserPool.getRemoteBrowser(plainRun.browserId);
if (!browser) {
throw new Error('Could not access browser');
}
let currentPage = await browser.getCurrentPage();
if (!currentPage) {
throw new Error('Could not create a new page');
}
if (recording.recording_meta.type === 'scrape') {
logger.log('info', `Executing scrape robot for API run ${id}`);
@@ -686,13 +696,13 @@ async function executeRun(id: string, userId: string, requestedFormats?: string[
// Markdown conversion
if (formats.includes('markdown')) {
markdown = await convertPageToMarkdown(url);
markdown = await convertPageToMarkdown(url, currentPage);
serializableOutput.markdown = [{ content: markdown }];
}
// HTML conversion
if (formats.includes('html')) {
html = await convertPageToHTML(url);
html = await convertPageToHTML(url, currentPage);
serializableOutput.html = [{ content: html }];
}
@@ -820,16 +830,6 @@ async function executeRun(id: string, userId: string, requestedFormats?: string[
plainRun.status = 'running';
browser = browserPool.getRemoteBrowser(plainRun.browserId);
if (!browser) {
throw new Error('Could not access browser');
}
let currentPage = await browser.getCurrentPage();
if (!currentPage) {
throw new Error('Could not create a new page');
}
const workflow = AddGeneratedFlags(recording.recording);
browser.interpreter.setRunId(plainRun.runId);

View File

@@ -1,9 +1,27 @@
import { connectToRemoteBrowser } from "../browser-management/browserConnection";
import { parseMarkdown } from "./markdown";
import logger from "../logger";
async function gotoWithFallback(page: any, url: string) {
try {
return await page.goto(url, {
waitUntil: "networkidle",
timeout: 100000,
});
} catch (err) {
// fallback: JS-heavy or unstable sites
return await page.goto(url, {
waitUntil: "domcontentloaded",
timeout: 100000,
});
}
}
/**
* Fetches a webpage, strips scripts/styles/images/etc,
* returns clean Markdown using parser.
* @param url - The URL to convert
* @param existingPage - Optional existing Playwright page instance to reuse
*/
export async function convertPageToMarkdown(url: string): Promise<string> {
const browser = await connectToRemoteBrowser();
@@ -11,7 +29,7 @@ export async function convertPageToMarkdown(url: string): Promise<string> {
await page.goto(url, { waitUntil: "networkidle", timeout: 100000 });
await page.addInitScript(() => {
const cleanedHtml = await page.evaluate(() => {
const selectors = [
"script",
"style",
@@ -42,14 +60,16 @@ export async function convertPageToMarkdown(url: string): Promise<string> {
}
});
});
});
// Re-extract HTML after cleanup
const cleanedHtml = await page.evaluate(() => {
return document.documentElement.outerHTML;
});
await browser.close();
if (shouldCloseBrowser && browser) {
logger.log('info', `[Scrape] Closing browser instance created for markdown conversion`);
await browser.close();
} else {
logger.log('info', `[Scrape] Keeping existing browser instance open after markdown conversion`);
}
// Convert cleaned HTML → Markdown
const markdown = await parseMarkdown(cleanedHtml, url);
@@ -59,6 +79,8 @@ export async function convertPageToMarkdown(url: string): Promise<string> {
/**
* Fetches a webpage, strips scripts/styles/images/etc,
* returns clean HTML.
* @param url - The URL to convert
* @param existingPage - Optional existing Playwright page instance to reuse
*/
export async function convertPageToHTML(url: string): Promise<string> {
const browser = await connectToRemoteBrowser();
@@ -66,7 +88,7 @@ export async function convertPageToHTML(url: string): Promise<string> {
await page.goto(url, { waitUntil: "networkidle", timeout: 100000 });
await page.addInitScript(() => {
const cleanedHtml = await page.evaluate(() => {
const selectors = [
"script",
"style",
@@ -97,14 +119,16 @@ export async function convertPageToHTML(url: string): Promise<string> {
}
});
});
});
// Re-extract HTML after cleanup
const cleanedHtml = await page.evaluate(() => {
return document.documentElement.outerHTML;
});
await browser.close();
if (shouldCloseBrowser && browser) {
logger.log('info', `[Scrape] Closing browser instance created for HTML conversion`);
await browser.close();
} else {
logger.log('info', `[Scrape] Keeping existing browser instance open after HTML conversion`);
}
// Return cleaned HTML directly
return cleanedHtml;

View File

@@ -181,7 +181,7 @@ async function processRunExecution(job: Job<ExecuteRunData>) {
logger.log('info', `Browser ${browserId} found and ready for execution`);
try {
try {
// Find the recording
const recording = await Robot.findOne({ where: { 'recording_meta.id': plainRun.robotMetaId }, raw: true });
@@ -189,6 +189,30 @@ async function processRunExecution(job: Job<ExecuteRunData>) {
throw new Error(`Recording for run ${data.runId} not found`);
}
let currentPage = browser.getCurrentPage();
const pageWaitStart = Date.now();
let lastPageLogTime = 0;
let pageAttempts = 0;
const MAX_PAGE_ATTEMPTS = 15;
while (!currentPage && (Date.now() - pageWaitStart) < BROWSER_PAGE_TIMEOUT && pageAttempts < MAX_PAGE_ATTEMPTS) {
const currentTime = Date.now();
pageAttempts++;
if (currentTime - lastPageLogTime > 5000) {
logger.log('info', `Page not ready for browser ${browserId}, waiting... (${Math.round((currentTime - pageWaitStart) / 1000)}s elapsed)`);
lastPageLogTime = currentTime;
}
await new Promise(resolve => setTimeout(resolve, 1000));
currentPage = browser.getCurrentPage();
}
if (!currentPage) {
throw new Error(`No current page available for browser ${browserId} after ${BROWSER_PAGE_TIMEOUT/1000}s timeout`);
}
if (recording.recording_meta.type === 'scrape') {
logger.log('info', `Executing scrape robot for run ${data.runId}`);
@@ -212,13 +236,13 @@ async function processRunExecution(job: Job<ExecuteRunData>) {
// Markdown conversion
if (formats.includes('markdown')) {
markdown = await convertPageToMarkdown(url);
markdown = await convertPageToMarkdown(url, currentPage);
serializableOutput.markdown = [{ content: markdown }];
}
// HTML conversion
if (formats.includes('html')) {
html = await convertPageToHTML(url);
html = await convertPageToHTML(url, currentPage);
serializableOutput.html = [{ content: html }];
}
@@ -328,30 +352,6 @@ async function processRunExecution(job: Job<ExecuteRunData>) {
}
};
let currentPage = browser.getCurrentPage();
const pageWaitStart = Date.now();
let lastPageLogTime = 0;
let pageAttempts = 0;
const MAX_PAGE_ATTEMPTS = 15;
while (!currentPage && (Date.now() - pageWaitStart) < BROWSER_PAGE_TIMEOUT && pageAttempts < MAX_PAGE_ATTEMPTS) {
const currentTime = Date.now();
pageAttempts++;
if (currentTime - lastPageLogTime > 5000) {
logger.log('info', `Page not ready for browser ${browserId}, waiting... (${Math.round((currentTime - pageWaitStart) / 1000)}s elapsed)`);
lastPageLogTime = currentTime;
}
await new Promise(resolve => setTimeout(resolve, 1000));
currentPage = browser.getCurrentPage();
}
if (!currentPage) {
throw new Error(`No current page available for browser ${browserId} after ${BROWSER_PAGE_TIMEOUT/1000}s timeout`);
}
logger.log('info', `Starting workflow execution for run ${data.runId}`);
await run.update({

View File

@@ -205,6 +205,16 @@ async function executeRun(id: string, userId: string) {
}
}
browser = browserPool.getRemoteBrowser(plainRun.browserId);
if (!browser) {
throw new Error('Could not access browser');
}
let currentPage = await browser.getCurrentPage();
if (!currentPage) {
throw new Error('Could not create a new page');
}
if (recording.recording_meta.type === 'scrape') {
logger.log('info', `Executing scrape robot for scheduled run ${id}`);
@@ -249,13 +259,13 @@ async function executeRun(id: string, userId: string) {
// Markdown conversion
if (formats.includes('markdown')) {
markdown = await convertPageToMarkdown(url);
markdown = await convertPageToMarkdown(url, currentPage);
serializableOutput.markdown = [{ content: markdown }];
}
// HTML conversion
if (formats.includes('html')) {
html = await convertPageToHTML(url);
html = await convertPageToHTML(url, currentPage);
serializableOutput.html = [{ content: html }];
}
@@ -388,16 +398,6 @@ async function executeRun(id: string, userId: string) {
logger.log('warn', `Failed to send run-started notification for run ${plainRun.runId}: ${socketError.message}`);
}
browser = browserPool.getRemoteBrowser(plainRun.browserId);
if (!browser) {
throw new Error('Could not access browser');
}
let currentPage = await browser.getCurrentPage();
if (!currentPage) {
throw new Error('Could not create a new page');
}
const workflow = AddGeneratedFlags(recording.recording);
// Set run ID for real-time data persistence

View File

@@ -304,8 +304,6 @@ export const BrowserWindow = () => {
const createFieldsFromChildSelectors = useCallback(
(childSelectors: string[], listSelector: string) => {
if (!childSelectors.length || !currentSnapshot) return {};
const iframeElement = document.querySelector(
"#dom-browser-iframe"
) as HTMLIFrameElement;
@@ -323,7 +321,6 @@ export const BrowserWindow = () => {
const uniqueChildSelectors = [...new Set(childSelectors)];
// Filter child selectors that occur in at least 2 out of first 10 list elements
const validateChildSelectors = (selectors: string[]): string[] => {
try {
// Get first 10 list elements
@@ -352,13 +349,10 @@ export const BrowserWindow = () => {
// If we can't access the element, it's likely in shadow DOM - include it
if (!testElement) {
console.log(`Including potentially shadow DOM selector: ${selector}`);
validSelectors.push(selector);
continue;
}
} catch (accessError) {
// If there's an error accessing, assume shadow DOM and include it
console.log(`Including selector due to access error: ${selector}`);
validSelectors.push(selector);
continue;
}
@@ -395,7 +389,6 @@ export const BrowserWindow = () => {
}
};
// Enhanced XPath evaluation for multiple elements
const evaluateXPathAllWithShadowSupport = (
document: Document,
xpath: string,
@@ -423,8 +416,6 @@ export const BrowserWindow = () => {
return elements;
}
// If shadow DOM is indicated and regular XPath fails, use shadow DOM traversal
// This is a simplified version - for multiple elements, we'll primarily rely on regular XPath
return elements;
} catch (err) {
console.error("XPath evaluation failed:", xpath, err);
@@ -432,7 +423,9 @@ export const BrowserWindow = () => {
}
};
const validatedChildSelectors = validateChildSelectors(uniqueChildSelectors);
const isValidData = (text: string | null | undefined): boolean => {
return !!text && text.trim().length > 0;
};
const isElementVisible = (element: HTMLElement): boolean => {
try {
@@ -443,443 +436,119 @@ export const BrowserWindow = () => {
}
};
const isValidData = (data: string): boolean => {
if (!data || data.trim().length === 0) return false;
const createFieldData = (element: HTMLElement, selector: string, forceAttribute?: string) => {
const tagName = element.tagName.toLowerCase();
let data = '';
let attribute = forceAttribute || 'innerText';
const trimmed = data.trim();
// Filter out single letters
if (trimmed.length === 1) {
return false;
}
// Filter out pure symbols/punctuation
if (trimmed.length < 3 && /^[^\w\s]+$/.test(trimmed)) {
return false;
}
// Filter out whitespace and punctuation only
if (/^[\s\p{P}\p{S}]*$/u.test(trimmed)) return false;
return trimmed.length > 0;
};
// Enhanced shadow DOM-aware element evaluation
const evaluateXPathWithShadowSupport = (
document: Document,
xpath: string,
isShadow: boolean = false
): Element | null => {
try {
// First try regular XPath evaluation
const result = document.evaluate(
xpath,
document,
null,
XPathResult.FIRST_ORDERED_NODE_TYPE,
null
).singleNodeValue as Element | null;
if (!isShadow || result) {
return result;
if (forceAttribute) {
if (forceAttribute === 'href') {
data = element.getAttribute('href') || '';
} else if (forceAttribute === 'innerText') {
data = (element.textContent || '').trim();
}
// If shadow DOM is indicated and regular XPath fails, use shadow DOM traversal
let cleanPath = xpath;
let isIndexed = false;
const indexedMatch = xpath.match(/^\((.*?)\)\[(\d+)\](.*)$/);
if (indexedMatch) {
cleanPath = indexedMatch[1] + indexedMatch[3];
isIndexed = true;
} else if (tagName === 'img') {
data = element.getAttribute('src') || '';
attribute = 'src';
} else if (tagName === 'a') {
const href = element.getAttribute('href') || '';
const text = (element.textContent || '').trim();
if (href && href !== '#' && !href.startsWith('javascript:')) {
data = href;
attribute = 'href';
} else if (text) {
data = text;
attribute = 'innerText';
}
const pathParts = cleanPath
.replace(/^\/\//, "")
.split("/")
.map((p) => p.trim())
.filter((p) => p.length > 0);
let currentContexts: (Document | Element | ShadowRoot)[] = [document];
for (let i = 0; i < pathParts.length; i++) {
const part = pathParts[i];
const nextContexts: (Element | ShadowRoot)[] = [];
for (const ctx of currentContexts) {
const positionalMatch = part.match(/^([^[]+)\[(\d+)\]$/);
let partWithoutPosition = part;
let requestedPosition: number | null = null;
if (positionalMatch) {
partWithoutPosition = positionalMatch[1];
requestedPosition = parseInt(positionalMatch[2]);
}
const matched = queryInsideContext(ctx, partWithoutPosition);
let elementsToAdd = matched;
if (requestedPosition !== null) {
const index = requestedPosition - 1;
if (index >= 0 && index < matched.length) {
elementsToAdd = [matched[index]];
} else {
elementsToAdd = [];
}
}
elementsToAdd.forEach((el) => {
nextContexts.push(el);
if (el.shadowRoot) {
nextContexts.push(el.shadowRoot);
}
});
}
if (nextContexts.length === 0) {
return null;
}
currentContexts = nextContexts;
}
if (currentContexts.length > 0) {
if (isIndexed && indexedMatch) {
const requestedIndex = parseInt(indexedMatch[2]) - 1;
if (requestedIndex >= 0 && requestedIndex < currentContexts.length) {
return currentContexts[requestedIndex] as Element;
} else {
return null;
}
}
return currentContexts[0] as Element;
}
return null;
} catch (err) {
console.error("XPath evaluation failed:", xpath, err);
return null;
}
};
const queryInsideContext = (
context: Document | Element | ShadowRoot,
part: string
): Element[] => {
try {
const { tagName, conditions } = parseXPathPart(part);
const candidateElements = Array.from(context.querySelectorAll(tagName));
if (candidateElements.length === 0) {
return [];
}
const matchingElements = candidateElements.filter((el) => {
return elementMatchesConditions(el, conditions);
});
return matchingElements;
} catch (err) {
console.error("Error in queryInsideContext:", err);
return [];
}
};
const parseXPathPart = (
part: string
): { tagName: string; conditions: string[] } => {
const tagMatch = part.match(/^([a-zA-Z0-9-]+)/);
const tagName = tagMatch ? tagMatch[1] : "*";
const conditionMatches = part.match(/\[([^\]]+)\]/g);
const conditions = conditionMatches
? conditionMatches.map((c) => c.slice(1, -1))
: [];
return { tagName, conditions };
};
const elementMatchesConditions = (
element: Element,
conditions: string[]
): boolean => {
for (const condition of conditions) {
if (!elementMatchesCondition(element, condition)) {
return false;
}
}
return true;
};
const elementMatchesCondition = (
element: Element,
condition: string
): boolean => {
condition = condition.trim();
if (/^\d+$/.test(condition)) {
return true;
} else {
data = (element.textContent || '').trim();
attribute = 'innerText';
}
// Handle @attribute="value"
const attrMatch = condition.match(/^@([^=]+)=["']([^"']+)["']$/);
if (attrMatch) {
const [, attr, value] = attrMatch;
const elementValue = element.getAttribute(attr);
return elementValue === value;
}
if (!data) return null;
// Handle contains(@class, 'value')
const classContainsMatch = condition.match(
/^contains\(@class,\s*["']([^"']+)["']\)$/
);
if (classContainsMatch) {
const className = classContainsMatch[1];
return element.classList.contains(className);
}
// Handle contains(@attribute, 'value')
const attrContainsMatch = condition.match(
/^contains\(@([^,]+),\s*["']([^"']+)["']\)$/
);
if (attrContainsMatch) {
const [, attr, value] = attrContainsMatch;
const elementValue = element.getAttribute(attr) || "";
return elementValue.includes(value);
}
// Handle text()="value"
const textMatch = condition.match(/^text\(\)=["']([^"']+)["']$/);
if (textMatch) {
const expectedText = textMatch[1];
const elementText = element.textContent?.trim() || "";
return elementText === expectedText;
}
// Handle contains(text(), 'value')
const textContainsMatch = condition.match(
/^contains\(text\(\),\s*["']([^"']+)["']\)$/
);
if (textContainsMatch) {
const expectedText = textContainsMatch[1];
const elementText = element.textContent?.trim() || "";
return elementText.includes(expectedText);
}
// Handle count(*)=0 (element has no children)
if (condition === "count(*)=0") {
return element.children.length === 0;
}
// Handle other count conditions
const countMatch = condition.match(/^count\(\*\)=(\d+)$/);
if (countMatch) {
const expectedCount = parseInt(countMatch[1]);
return element.children.length === expectedCount;
}
return true;
};
// Enhanced value extraction with shadow DOM support
const extractValueWithShadowSupport = (
element: Element,
attribute: string
): string | null => {
if (!element) return null;
const baseURL =
element.ownerDocument?.location?.href || window.location.origin;
// Check shadow DOM content first
if (element.shadowRoot) {
const shadowContent = element.shadowRoot.textContent;
if (shadowContent?.trim()) {
return shadowContent.trim();
return {
data,
selectorObj: {
selector,
attribute,
tag: tagName.toUpperCase(),
isShadow: element.getRootNode() instanceof ShadowRoot
}
}
if (attribute === "innerText") {
let textContent =
(element as HTMLElement).innerText?.trim() ||
(element as HTMLElement).textContent?.trim();
if (!textContent) {
const dataAttributes = [
"data-600",
"data-text",
"data-label",
"data-value",
"data-content",
];
for (const attr of dataAttributes) {
const dataValue = element.getAttribute(attr);
if (dataValue && dataValue.trim()) {
textContent = dataValue.trim();
break;
}
}
}
return textContent || null;
} else if (attribute === "innerHTML") {
return element.innerHTML?.trim() || null;
} else if (attribute === "href") {
let anchorElement = element;
if (element.tagName !== "A") {
anchorElement =
element.closest("a") ||
element.parentElement?.closest("a") ||
element;
}
const hrefValue = anchorElement.getAttribute("href");
if (!hrefValue || hrefValue.trim() === "") {
return null;
}
try {
return new URL(hrefValue, baseURL).href;
} catch (e) {
console.warn("Error creating URL from", hrefValue, e);
return hrefValue;
}
} else if (attribute === "src") {
const attrValue = element.getAttribute(attribute);
const dataAttr = attrValue || element.getAttribute("data-" + attribute);
if (!dataAttr || dataAttr.trim() === "") {
const style = window.getComputedStyle(element as HTMLElement);
const bgImage = style.backgroundImage;
if (bgImage && bgImage !== "none") {
const matches = bgImage.match(/url\(['"]?([^'")]+)['"]?\)/);
return matches ? new URL(matches[1], baseURL).href : null;
}
return null;
}
try {
return new URL(dataAttr, baseURL).href;
} catch (e) {
console.warn("Error creating URL from", dataAttr, e);
return dataAttr;
}
}
return element.getAttribute(attribute);
};
// Simple deepest child finder - limit depth to prevent hanging
const findDeepestChild = (element: HTMLElement): HTMLElement => {
let deepest = element;
let maxDepth = 0;
const traverse = (el: HTMLElement, depth: number) => {
if (depth > 3) return;
const text = el.textContent?.trim() || "";
if (isValidData(text) && depth > maxDepth) {
maxDepth = depth;
deepest = el;
}
const children = Array.from(el.children).slice(0, 3);
children.forEach((child) => {
if (child instanceof HTMLElement) {
traverse(child, depth + 1);
}
});
};
traverse(element, 0);
return deepest;
};
validatedChildSelectors.forEach((childSelector, index) => {
const validatedChildSelectors = validateChildSelectors(uniqueChildSelectors);
validatedChildSelectors.forEach((selector, index) => {
try {
// Detect if this selector should use shadow DOM traversal
const isShadowSelector = childSelector.includes('>>') ||
childSelector.startsWith('//') &&
(listSelector.includes('>>') || currentSnapshot?.snapshot);
const element = evaluateXPathWithShadowSupport(
const elements = evaluateXPathAllWithShadowSupport(
iframeElement.contentDocument!,
childSelector,
isShadowSelector
) as HTMLElement;
selector,
selector.includes(">>") || selector.startsWith("//")
);
if (element && isElementVisible(element)) {
if (elements.length === 0) return;
const element = elements[0] as HTMLElement;
const tagName = element.tagName.toLowerCase();
const isShadow = element.getRootNode() instanceof ShadowRoot;
if (isElementVisible(element)) {
const rect = element.getBoundingClientRect();
const position = { x: rect.left, y: rect.top };
const tagName = element.tagName.toLowerCase();
const isShadow = element.getRootNode() instanceof ShadowRoot;
if (tagName === "a") {
const anchor = element as HTMLAnchorElement;
const href = extractValueWithShadowSupport(anchor, "href");
const text = extractValueWithShadowSupport(anchor, "innerText");
if (
href &&
href.trim() !== "" &&
href !== window.location.href &&
!href.startsWith("javascript:") &&
!href.startsWith("#")
) {
const fieldIdHref = Date.now() + index * 1000;
candidateFields.push({
id: fieldIdHref,
element: element,
isLeaf: true,
depth: 0,
position: position,
field: {
id: fieldIdHref,
type: "text",
label: `Label ${index * 2 + 1}`,
data: href,
selectorObj: {
selector: childSelector,
tag: element.tagName,
isShadow: isShadow,
attribute: "href",
},
},
});
}
const fieldIdText = Date.now() + index * 1000 + 1;
if (tagName === 'a') {
const href = element.getAttribute('href');
const text = (element.textContent || '').trim();
if (text && isValidData(text)) {
candidateFields.push({
id: fieldIdText,
element: element,
isLeaf: true,
depth: 0,
position: position,
field: {
id: fieldIdText,
type: "text",
label: `Label ${index * 2 + 2}`,
data: text,
selectorObj: {
selector: childSelector,
tag: element.tagName,
isShadow: isShadow,
attribute: "innerText",
},
},
});
const textField = createFieldData(element, selector, 'innerText');
if (textField && textField.data) {
const fieldId = Date.now() + index * 1000;
candidateFields.push({
id: fieldId,
element: element,
isLeaf: true,
depth: 0,
position: position,
field: {
id: fieldId,
type: "text",
label: `Label ${index * 2 + 1}`,
data: textField.data,
selectorObj: textField.selectorObj
}
});
}
}
if (href && href !== '#' && !href.startsWith('javascript:')) {
const hrefField = createFieldData(element, selector, 'href');
if (hrefField && hrefField.data) {
const fieldId = Date.now() + index * 1000 + 1;
candidateFields.push({
id: fieldId,
element: element,
isLeaf: true,
depth: 0,
position: position,
field: {
id: fieldId,
type: "text",
label: `Label ${index * 2 + 2}`,
data: hrefField.data,
selectorObj: hrefField.selectorObj
}
});
}
}
} else if (tagName === "img") {
const img = element as HTMLImageElement;
const src = extractValueWithShadowSupport(img, "src");
const alt = extractValueWithShadowSupport(img, "alt");
const src = element.getAttribute("src");
if (src && !src.startsWith("data:") && src.length > 10) {
if (src && isValidData(src)) {
const fieldId = Date.now() + index * 1000;
candidateFields.push({
@@ -894,7 +563,7 @@ export const BrowserWindow = () => {
label: `Label ${index + 1}`,
data: src,
selectorObj: {
selector: childSelector,
selector: selector,
tag: element.tagName,
isShadow: isShadow,
attribute: "src",
@@ -902,9 +571,11 @@ export const BrowserWindow = () => {
},
});
}
} else {
const fieldData = createFieldData(element, selector);
if (alt && isValidData(alt)) {
const fieldId = Date.now() + index * 1000 + 1;
if (fieldData && fieldData.data && isValidData(fieldData.data)) {
const fieldId = Date.now() + index * 1000;
candidateFields.push({
id: fieldId,
@@ -912,127 +583,39 @@ export const BrowserWindow = () => {
isLeaf: true,
depth: 0,
position: position,
field: {
id: fieldId,
type: "text",
label: `Label ${index + 2}`,
data: alt,
selectorObj: {
selector: childSelector,
tag: element.tagName,
isShadow: isShadow,
attribute: "alt",
},
},
});
}
} else {
const deepestElement = findDeepestChild(element);
const data = extractValueWithShadowSupport(deepestElement, "innerText");
if (data && isValidData(data)) {
const isLeaf = isLeafElement(deepestElement);
const depth = getElementDepthFromList(
deepestElement,
listSelector,
iframeElement.contentDocument!
);
const fieldId = Date.now() + index;
candidateFields.push({
id: fieldId,
element: deepestElement,
isLeaf: isLeaf,
depth: depth,
position: position,
field: {
id: fieldId,
type: "text",
label: `Label ${index + 1}`,
data: data,
selectorObj: {
selector: childSelector,
tag: deepestElement.tagName,
isShadow: deepestElement.getRootNode() instanceof ShadowRoot,
attribute: "innerText",
},
},
data: fieldData.data,
selectorObj: fieldData.selectorObj
}
});
}
}
}
} catch (error) {
console.warn(
`Failed to process child selector ${childSelector}:`,
error
);
console.warn(`Failed to process child selector ${selector}:`, error);
}
});
candidateFields.sort((a, b) => {
const yDiff = a.position.y - b.position.y;
if (Math.abs(yDiff) <= 5) {
return a.position.x - b.position.x;
}
return yDiff;
});
const filteredCandidates = removeParentChildDuplicates(candidateFields);
const finalFields = removeDuplicateContent(filteredCandidates);
return finalFields;
},
[currentSnapshot]
);
const isLeafElement = (element: HTMLElement): boolean => {
const children = Array.from(element.children) as HTMLElement[];
if (children.length === 0) return true;
const hasContentfulChildren = children.some((child) => {
const text = child.textContent?.trim() || "";
return text.length > 0 && text !== element.textContent?.trim();
});
return !hasContentfulChildren;
};
const getElementDepthFromList = (
element: HTMLElement,
listSelector: string,
document: Document
): number => {
try {
const listResult = document.evaluate(
listSelector,
document,
null,
XPathResult.FIRST_ORDERED_NODE_TYPE,
null
);
const listElement = listResult.singleNodeValue as HTMLElement;
if (!listElement) return 0;
let depth = 0;
let current = element;
while (current && current !== listElement && current.parentElement) {
depth++;
current = current.parentElement;
if (depth > 20) break;
}
return current === listElement ? depth : 0;
} catch (error) {
return 0;
}
};
const removeParentChildDuplicates = (
candidates: Array<{
id: number;
@@ -1242,6 +825,29 @@ export const BrowserWindow = () => {
}
}, [browserSteps, getList, listSelector, initialAutoFieldIds, currentListActionId, manuallyAddedFieldIds]);
useEffect(() => {
if (currentListActionId && browserSteps.length > 0) {
const activeStep = browserSteps.find(
s => s.type === 'list' && s.actionId === currentListActionId
) as ListStep | undefined;
if (activeStep) {
if (currentListId !== activeStep.id) {
setCurrentListId(activeStep.id);
}
if (listSelector !== activeStep.listSelector) {
setListSelector(activeStep.listSelector);
}
if (JSON.stringify(fields) !== JSON.stringify(activeStep.fields)) {
setFields(activeStep.fields);
}
if (activeStep.pagination?.selector && paginationSelector !== activeStep.pagination.selector) {
setPaginationSelector(activeStep.pagination.selector);
}
}
}
}, [currentListActionId, browserSteps, currentListId, listSelector, fields, paginationSelector]);
useEffect(() => {
if (!isDOMMode) {
capturedElementHighlighter.clearHighlights();
@@ -1637,6 +1243,22 @@ export const BrowserWindow = () => {
paginationType !== "scrollUp" &&
paginationType !== "none"
) {
let targetListId = currentListId;
let targetFields = fields;
if ((!targetListId || targetListId === 0) && currentListActionId) {
const activeStep = browserSteps.find(
s => s.type === 'list' && s.actionId === currentListActionId
) as ListStep | undefined;
if (activeStep) {
targetListId = activeStep.id;
if (Object.keys(targetFields).length === 0 && Object.keys(activeStep.fields).length > 0) {
targetFields = activeStep.fields;
}
}
}
setPaginationSelector(highlighterData.selector);
notify(
`info`,
@@ -1646,8 +1268,8 @@ export const BrowserWindow = () => {
);
addListStep(
listSelector!,
fields,
currentListId || 0,
targetFields,
targetListId || 0,
currentListActionId || `list-${crypto.randomUUID()}`,
{
type: paginationType,
@@ -1812,6 +1434,8 @@ export const BrowserWindow = () => {
socket,
t,
paginationSelector,
highlighterData,
browserSteps
]
);
@@ -1864,6 +1488,22 @@ export const BrowserWindow = () => {
paginationType !== "scrollUp" &&
paginationType !== "none"
) {
let targetListId = currentListId;
let targetFields = fields;
if ((!targetListId || targetListId === 0) && currentListActionId) {
const activeStep = browserSteps.find(
s => s.type === 'list' && s.actionId === currentListActionId
) as ListStep | undefined;
if (activeStep) {
targetListId = activeStep.id;
if (Object.keys(targetFields).length === 0 && Object.keys(activeStep.fields).length > 0) {
targetFields = activeStep.fields;
}
}
}
setPaginationSelector(highlighterData.selector);
notify(
`info`,
@@ -1873,8 +1513,8 @@ export const BrowserWindow = () => {
);
addListStep(
listSelector!,
fields,
currentListId || 0,
targetFields,
targetListId || 0,
currentListActionId || `list-${crypto.randomUUID()}`,
{ type: paginationType, selector: highlighterData.selector, isShadow: highlighterData.isShadow },
undefined,
@@ -2046,6 +1686,31 @@ export const BrowserWindow = () => {
}
}, [paginationMode, resetPaginationSelector]);
useEffect(() => {
if (paginationMode && currentListActionId) {
const currentListStep = browserSteps.find(
step => step.type === 'list' && step.actionId === currentListActionId
) as (ListStep & { type: 'list' }) | undefined;
const currentSelector = currentListStep?.pagination?.selector;
const currentType = currentListStep?.pagination?.type;
if (['clickNext', 'clickLoadMore'].includes(paginationType)) {
if (!currentSelector || (currentType && currentType !== paginationType)) {
setPaginationSelector('');
}
}
const stepSelector = currentListStep?.pagination?.selector;
if (stepSelector && !paginationSelector) {
setPaginationSelector(stepSelector);
} else if (!stepSelector && paginationSelector) {
setPaginationSelector('');
}
}
}, [browserSteps, paginationMode, currentListActionId, paginationSelector]);
return (
<div
onClick={handleClick}
@@ -2310,6 +1975,7 @@ export const BrowserWindow = () => {
listSelector={listSelector}
cachedChildSelectors={cachedChildSelectors}
paginationMode={paginationMode}
paginationSelector={paginationSelector}
paginationType={paginationType}
limitMode={limitMode}
isCachingChildSelectors={isCachingChildSelectors}

View File

@@ -100,6 +100,7 @@ interface RRWebDOMBrowserRendererProps {
listSelector?: string | null;
cachedChildSelectors?: string[];
paginationMode?: boolean;
paginationSelector?: string;
paginationType?: string;
limitMode?: boolean;
isCachingChildSelectors?: boolean;
@@ -153,6 +154,7 @@ export const DOMBrowserRenderer: React.FC<RRWebDOMBrowserRendererProps> = ({
listSelector = null,
cachedChildSelectors = [],
paginationMode = false,
paginationSelector = "",
paginationType = "",
limitMode = false,
isCachingChildSelectors = false,
@@ -257,6 +259,13 @@ export const DOMBrowserRenderer: React.FC<RRWebDOMBrowserRendererProps> = ({
else if (listSelector) {
if (limitMode) {
shouldHighlight = false;
} else if (
paginationMode &&
paginationSelector &&
paginationType !== "" &&
!["none", "scrollDown", "scrollUp"].includes(paginationType)
) {
shouldHighlight = false;
} else if (
paginationMode &&
paginationType !== "" &&

View File

@@ -1,4 +1,4 @@
import React, { useState, useCallback, useEffect, useMemo } from 'react';
import React, { useState, useCallback, useEffect, useRef, useMemo } from 'react';
import { Button, Paper, Box, TextField, IconButton, Tooltip } from "@mui/material";
import { WorkflowFile } from "maxun-core";
import Typography from "@mui/material/Typography";
@@ -15,9 +15,9 @@ import ActionDescriptionBox from '../action/ActionDescriptionBox';
import { useThemeMode } from '../../context/theme-provider';
import { useTranslation } from 'react-i18next';
import { useBrowserDimensionsStore } from '../../context/browserDimensions';
import { emptyWorkflow } from '../../shared/constants';
import { clientListExtractor } from '../../helpers/clientListExtractor';
import { clientSelectorGenerator } from '../../helpers/clientSelectorGenerator';
import { clientPaginationDetector } from '../../helpers/clientPaginationDetector';
const fetchWorkflow = (id: string, callback: (response: WorkflowFile) => void) => {
getActiveWorkflow(id).then(
@@ -45,6 +45,13 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
const [showCaptureText, setShowCaptureText] = useState(true);
const { panelHeight } = useBrowserDimensionsStore();
const [autoDetectedPagination, setAutoDetectedPagination] = useState<{
type: PaginationType;
selector: string | null;
confidence: 'high' | 'medium' | 'low';
} | null>(null);
const autoDetectionRunRef = useRef<string | null>(null);
const { lastAction, notify, currentWorkflowActionsState, setCurrentWorkflowActionsState, resetInterpretationLog, currentListActionId, setCurrentListActionId, currentTextActionId, setCurrentTextActionId, currentScreenshotActionId, setCurrentScreenshotActionId, isDOMMode, setIsDOMMode, currentSnapshot, setCurrentSnapshot, updateDOMMode, initialUrl, setRecordingUrl, currentTextGroupName } = useGlobalInfoStore();
const {
getText, startGetText, stopGetText,
@@ -62,7 +69,7 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
startAction, finishAction
} = useActionContext();
const { browserSteps, updateBrowserTextStepLabel, deleteBrowserStep, addScreenshotStep, updateListTextFieldLabel, removeListTextField, updateListStepLimit, deleteStepsByActionId, updateListStepData, updateScreenshotStepData, emitActionForStep } = useBrowserSteps();
const { browserSteps, addScreenshotStep, updateListStepLimit, updateListStepPagination, deleteStepsByActionId, updateListStepData, updateScreenshotStepData, emitActionForStep } = useBrowserSteps();
const { id, socket } = useSocketStore();
const { t } = useTranslation();
@@ -72,6 +79,73 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
setWorkflow(data);
}, [setWorkflow]);
useEffect(() => {
if (!paginationType || !currentListActionId) return;
const currentListStep = browserSteps.find(
step => step.type === 'list' && step.actionId === currentListActionId
) as (BrowserStep & { type: 'list' }) | undefined;
const currentSelector = currentListStep?.pagination?.selector;
const currentType = currentListStep?.pagination?.type;
if (['clickNext', 'clickLoadMore'].includes(paginationType)) {
const needsSelector = !currentSelector && !currentType;
const typeChanged = currentType && currentType !== paginationType;
if (typeChanged) {
const iframeElement = document.querySelector('#browser-window iframe') as HTMLIFrameElement;
if (iframeElement?.contentDocument && currentSelector) {
try {
function evaluateSelector(selector: string, doc: Document): Element[] {
if (selector.startsWith('//') || selector.startsWith('(//')) {
try {
const result = doc.evaluate(selector, doc, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
const elements: Element[] = [];
for (let i = 0; i < result.snapshotLength; i++) {
const node = result.snapshotItem(i);
if (node && node.nodeType === Node.ELEMENT_NODE) {
elements.push(node as Element);
}
}
return elements;
} catch (err) {
return [];
}
} else {
try {
return Array.from(doc.querySelectorAll(selector));
} catch (err) {
return [];
}
}
}
const elements = evaluateSelector(currentSelector, iframeElement.contentDocument);
elements.forEach((el: Element) => {
(el as HTMLElement).style.outline = '';
(el as HTMLElement).style.outlineOffset = '';
(el as HTMLElement).style.zIndex = '';
});
} catch (error) {
console.error('Error removing pagination highlight:', error);
}
}
if (currentListStep) {
updateListStepPagination(currentListStep.id, {
type: paginationType,
selector: null,
});
}
startPaginationMode();
} else if (needsSelector) {
startPaginationMode();
}
}
}, [paginationType, currentListActionId, browserSteps, updateListStepPagination, startPaginationMode]);
useEffect(() => {
if (socket) {
const domModeHandler = (data: any) => {
@@ -391,7 +465,182 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
return;
}
startPaginationMode();
const currentListStepForAutoDetect = browserSteps.find(
step => step.type === 'list' && step.actionId === currentListActionId
) as (BrowserStep & { type: 'list'; listSelector?: string }) | undefined;
if (currentListStepForAutoDetect?.listSelector) {
if (autoDetectionRunRef.current !== currentListActionId) {
autoDetectionRunRef.current = currentListActionId;
notify('info', 'Detecting pagination...');
try {
socket?.emit('testPaginationScroll', {
listSelector: currentListStepForAutoDetect.listSelector
});
const handleScrollTestResult = (result: any) => {
if (result.success && result.contentLoaded) {
setAutoDetectedPagination({
type: 'scrollDown',
selector: null,
confidence: 'high'
});
updatePaginationType('scrollDown');
const latestListStep = browserSteps.find(
step => step.type === 'list' && step.actionId === currentListActionId
);
if (latestListStep) {
updateListStepPagination(latestListStep.id, {
type: 'scrollDown',
selector: null,
isShadow: false
});
}
} else if (result.success && !result.contentLoaded) {
const iframeElement = document.querySelector('#browser-window iframe') as HTMLIFrameElement;
const iframeDoc = iframeElement?.contentDocument;
if (iframeDoc) {
const detectionResult = clientPaginationDetector.autoDetectPagination(
iframeDoc,
currentListStepForAutoDetect.listSelector!,
clientSelectorGenerator,
{ disableScrollDetection: true }
);
if (detectionResult.type) {
setAutoDetectedPagination({
type: detectionResult.type,
selector: detectionResult.selector,
confidence: detectionResult.confidence
});
const latestListStep = browserSteps.find(
step => step.type === 'list' && step.actionId === currentListActionId
);
if (latestListStep) {
updateListStepPagination(latestListStep.id, {
type: detectionResult.type,
selector: detectionResult.selector,
isShadow: false
});
}
updatePaginationType(detectionResult.type);
if (detectionResult.selector && (detectionResult.type === 'clickNext' || detectionResult.type === 'clickLoadMore')) {
try {
function evaluateSelector(selector: string, doc: Document): Element[] {
try {
const isXPath = selector.startsWith('//') || selector.startsWith('(//');
if (isXPath) {
const result = doc.evaluate(
selector,
doc,
null,
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE,
null
);
const elements: Element[] = [];
for (let i = 0; i < result.snapshotLength; i++) {
const node = result.snapshotItem(i);
if (node && node.nodeType === Node.ELEMENT_NODE) {
elements.push(node as Element);
}
}
return elements;
} else {
try {
const allElements = Array.from(doc.querySelectorAll(selector));
if (allElements.length > 0) {
return allElements;
}
} catch (err) {
console.warn('[RightSidePanel] Full chained selector failed, trying individual selectors:', err);
}
const selectorParts = selector.split(',');
for (const part of selectorParts) {
try {
const elements = Array.from(doc.querySelectorAll(part.trim()));
if (elements.length > 0) {
return elements;
}
} catch (err) {
console.warn('[RightSidePanel] Selector part failed:', part.trim(), err);
continue;
}
}
return [];
}
} catch (err) {
console.error('[RightSidePanel] Selector evaluation failed:', selector, err);
return [];
}
}
const elements = evaluateSelector(detectionResult.selector, iframeDoc);
if (elements.length > 0) {
elements.forEach((el: Element) => {
(el as HTMLElement).style.outline = '3px dashed #ff00c3';
(el as HTMLElement).style.outlineOffset = '2px';
(el as HTMLElement).style.zIndex = '9999';
});
const firstElement = elements[0] as HTMLElement;
const elementRect = firstElement.getBoundingClientRect();
const iframeWindow = iframeElement.contentWindow;
if (iframeWindow) {
const targetY = elementRect.top + iframeWindow.scrollY - (iframeWindow.innerHeight / 2) + (elementRect.height / 2);
iframeWindow.scrollTo({ top: targetY, behavior: 'smooth' });
}
const paginationTypeLabel = detectionResult.type === 'clickNext' ? 'Next Button' : 'Load More Button';
notify('info', `${paginationTypeLabel} has been auto-detected and highlighted on the page`);
} else {
console.warn(' No elements found for selector:', detectionResult.selector);
}
} catch (error) {
console.error('Error highlighting pagination button:', error);
}
}
} else {
setAutoDetectedPagination(null);
}
}
} else {
console.error('Scroll test failed:', result.error);
setAutoDetectedPagination(null);
}
socket?.off('paginationScrollTestResult', handleScrollTestResult);
};
socket?.on('paginationScrollTestResult', handleScrollTestResult);
setTimeout(() => {
socket?.off('paginationScrollTestResult', handleScrollTestResult);
}, 5000);
} catch (error) {
console.error('Scroll test failed:', error);
setAutoDetectedPagination(null);
}
}
}
const shouldSkipPaginationMode = autoDetectedPagination && (
['scrollDown', 'scrollUp'].includes(autoDetectedPagination.type) ||
(['clickNext', 'clickLoadMore'].includes(autoDetectedPagination.type) && autoDetectedPagination.selector)
);
if (!shouldSkipPaginationMode) {
startPaginationMode();
}
setShowPaginationOptions(true);
setCaptureStage('pagination');
break;
@@ -460,6 +709,7 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
case 'pagination':
stopPaginationMode();
setShowPaginationOptions(false);
setAutoDetectedPagination(null);
setCaptureStage('initial');
break;
}
@@ -495,17 +745,58 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
socket.emit('removeAction', { actionId: currentListActionId });
}
}
if (autoDetectedPagination?.selector) {
const iframeElement = document.querySelector('#browser-window iframe') as HTMLIFrameElement;
if (iframeElement?.contentDocument) {
try {
function evaluateSelector(selector: string, doc: Document): Element[] {
if (selector.startsWith('//') || selector.startsWith('(//')) {
try {
const result = doc.evaluate(selector, doc, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
const elements: Element[] = [];
for (let i = 0; i < result.snapshotLength; i++) {
const node = result.snapshotItem(i);
if (node && node.nodeType === Node.ELEMENT_NODE) {
elements.push(node as Element);
}
}
return elements;
} catch (err) {
return [];
}
} else {
try {
return Array.from(doc.querySelectorAll(selector));
} catch (err) {
return [];
}
}
}
const elements = evaluateSelector(autoDetectedPagination.selector, iframeElement.contentDocument);
elements.forEach((el: Element) => {
(el as HTMLElement).style.outline = '';
(el as HTMLElement).style.outlineOffset = '';
(el as HTMLElement).style.zIndex = '';
});
} catch (error) {
console.error('Error removing pagination highlight on discard:', error);
}
}
}
resetListState();
stopPaginationMode();
stopLimitMode();
setShowPaginationOptions(false);
setShowLimitOptions(false);
setAutoDetectedPagination(null);
setCaptureStage('initial');
setCurrentListActionId('');
clientSelectorGenerator.cleanup();
notify('error', t('right_panel.errors.capture_list_discarded'));
}, [currentListActionId, browserSteps, stopGetList, deleteStepsByActionId, resetListState, setShowPaginationOptions, setShowLimitOptions, setCaptureStage, notify, t, stopPaginationMode, stopLimitMode, socket]);
}, [currentListActionId, browserSteps, stopGetList, deleteStepsByActionId, resetListState, setShowPaginationOptions, setShowLimitOptions, setCaptureStage, notify, t, stopPaginationMode, stopLimitMode, socket, autoDetectedPagination]);
const captureScreenshot = (fullPage: boolean) => {
const screenshotCount = browserSteps.filter(s => s.type === 'screenshot').length + 1;
@@ -615,6 +906,114 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
{showPaginationOptions && (
<Box display="flex" flexDirection="column" gap={2} style={{ margin: '13px' }}>
<Typography>{t('right_panel.pagination.title')}</Typography>
{autoDetectedPagination && autoDetectedPagination.type !== '' && (
<Box
sx={{
p: 2,
mb: 1,
borderRadius: '8px',
backgroundColor: isDarkMode ? '#1a3a1a' : '#e8f5e9',
border: `1px solid ${isDarkMode ? '#2e7d32' : '#4caf50'}`,
}}
>
<Typography
variant="body2"
sx={{
color: isDarkMode ? '#81c784' : '#2e7d32',
fontWeight: 'bold',
mb: 0.5
}}
>
Auto-detected: {
autoDetectedPagination.type === 'clickNext' ? 'Click Next' :
autoDetectedPagination.type === 'clickLoadMore' ? 'Click Load More' :
autoDetectedPagination.type === 'scrollDown' ? 'Scroll Down' :
autoDetectedPagination.type === 'scrollUp' ? 'Scroll Up' :
autoDetectedPagination.type
}
</Typography>
<Typography
variant="caption"
sx={{
color: isDarkMode ? '#a5d6a7' : '#388e3c',
display: 'block',
mb: 1
}}
>
You can continue with this or manually select a different pagination type below.
</Typography>
{autoDetectedPagination.selector && ['clickNext', 'clickLoadMore'].includes(autoDetectedPagination.type) && (
<Button
size="small"
variant="outlined"
onClick={() => {
const currentListStep = browserSteps.find(
step => step.type === 'list' && step.actionId === currentListActionId
) as (BrowserStep & { type: 'list' }) | undefined;
if (currentListStep) {
const iframeElement = document.querySelector('#browser-window iframe') as HTMLIFrameElement;
if (iframeElement?.contentDocument && autoDetectedPagination.selector) {
try {
function evaluateSelector(selector: string, doc: Document): Element[] {
if (selector.startsWith('//') || selector.startsWith('(//')) {
try {
const result = doc.evaluate(selector, doc, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
const elements: Element[] = [];
for (let i = 0; i < result.snapshotLength; i++) {
const node = result.snapshotItem(i);
if (node && node.nodeType === Node.ELEMENT_NODE) {
elements.push(node as Element);
}
}
return elements;
} catch (err) {
return [];
}
} else {
try {
return Array.from(doc.querySelectorAll(selector));
} catch (err) {
return [];
}
}
}
const elements = evaluateSelector(autoDetectedPagination.selector, iframeElement.contentDocument);
elements.forEach((el: Element) => {
(el as HTMLElement).style.outline = '';
(el as HTMLElement).style.outlineOffset = '';
(el as HTMLElement).style.zIndex = '';
});
} catch (error) {
console.error('Error removing pagination highlight:', error);
}
}
updateListStepPagination(currentListStep.id, {
type: autoDetectedPagination.type,
selector: null,
});
startPaginationMode();
notify('info', 'Please select a different pagination element');
}
}}
sx={{
color: isDarkMode ? '#81c784' : '#2e7d32',
borderColor: isDarkMode ? '#81c784' : '#2e7d32',
'&:hover': {
borderColor: isDarkMode ? '#a5d6a7' : '#4caf50',
backgroundColor: isDarkMode ? '#1a3a1a' : '#f1f8f4',
}
}}
>
Choose Different Element
</Button>
)}
</Box>
)}
<Button
variant={paginationType === 'clickNext' ? "contained" : "outlined"}
onClick={() => handlePaginationSettingSelect('clickNext')}

View File

@@ -500,7 +500,7 @@ export const RobotEditPage = ({ handleStart }: RobotSettingsProps) => {
{scrapeListLimits.map((limitInfo, index) => {
// Get the corresponding scrapeList action to extract its name
const scrapeListAction = robot?.recording?.workflow?.[limitInfo.pairIndex]?.what?.[limitInfo.actionIndex];
const actionName =
const actionName =
scrapeListAction?.name ||
`List Limit ${index + 1}`;
@@ -821,10 +821,19 @@ export const RobotEditPage = ({ handleStart }: RobotSettingsProps) => {
onChange={(e) => handleTargetUrlChange(e.target.value)}
style={{ marginBottom: "20px" }}
/>
<Divider />
{renderScrapeListLimitFields()}
<Divider />
{renderActionNameFields()}
{renderScrapeListLimitFields() && (
<>
<Divider />
{renderScrapeListLimitFields()}
</>
)}
{renderActionNameFields() && (
<>
<Divider />
{renderActionNameFields()}
</>
)}
</>
)}
</Box>

View File

@@ -10,7 +10,6 @@ import {
AccordionSummary,
AccordionDetails
} from "@mui/material";
import Highlight from "react-highlight";
import * as React from "react";
import { Data } from "./RunsTable";
import { TabPanel, TabContext } from "@mui/lab";
@@ -22,7 +21,6 @@ import TableCell from '@mui/material/TableCell';
import TableContainer from '@mui/material/TableContainer';
import TableHead from '@mui/material/TableHead';
import TableRow from '@mui/material/TableRow';
import 'highlight.js/styles/github.css';
import { useTranslation } from "react-i18next";
import { useThemeMode } from "../../context/theme-provider";

View File

@@ -80,6 +80,7 @@ interface BrowserStepsContextType {
newLabel: string
) => void;
updateListStepLimit: (listId: number, limit: number) => void;
updateListStepPagination: (listId: number, pagination: { type: string; selector: string | null; isShadow?: boolean }) => void;
updateListStepData: (listId: number, extractedData: any[]) => void;
updateListStepName: (listId: number, name: string) => void;
updateScreenshotStepName: (id: number, name: string) => void;
@@ -479,6 +480,26 @@ export const BrowserStepsProvider: React.FC<{ children: React.ReactNode }> = ({
);
};
const updateListStepPagination = (
listId: number,
pagination: { type: string; selector: string | null; isShadow?: boolean }
) => {
setBrowserSteps((prevSteps) =>
prevSteps.map((step) => {
if (step.type === "list" && step.id === listId) {
return {
...step,
pagination: {
...pagination,
selector: pagination.selector || "",
},
};
}
return step;
})
);
};
const updateListStepName = (listId: number, name: string) => {
setBrowserSteps((prevSteps) =>
prevSteps.map((step) => {
@@ -533,6 +554,7 @@ export const BrowserStepsProvider: React.FC<{ children: React.ReactNode }> = ({
updateBrowserTextStepLabel,
updateListTextFieldLabel,
updateListStepLimit,
updateListStepPagination,
updateListStepData,
updateListStepName,
updateScreenshotStepName,

View File

@@ -0,0 +1,586 @@
/**
* Client-Side Pagination Auto-Detection
* Detects pagination type and selector for list extraction
* Operates on passed document object (works in DOM mode / iframe)
*/
import type { ClientSelectorGenerator } from './clientSelectorGenerator';
export type PaginationDetectionResult = {
type: 'scrollDown' | 'scrollUp' | 'clickNext' | 'clickLoadMore' | '';
selector: string | null;
confidence: 'high' | 'medium' | 'low';
debug?: any;
};
class ClientPaginationDetector {
/**
* Auto-detect pagination on a page
* @param doc - The document object to analyze (can be iframe document)
* @param listSelector - The selector for the list container
* @param options - Optional detection options
* @returns Pagination detection result
*/
autoDetectPagination(
doc: Document,
listSelector: string,
selectorGenerator: ClientSelectorGenerator,
options?: { disableScrollDetection?: boolean }
): PaginationDetectionResult {
try {
const listElements = this.evaluateSelector(listSelector, doc);
if (listElements.length === 0) {
return { type: '', selector: null, confidence: 'low', debug: 'No list elements found' };
}
const listContainer = listElements[0];
const nextButtonPatterns = [
/next/i,
/\bnext\s+page\b/i,
/page\s+suivante/i,
/siguiente/i,
/weiter/i,
/>>||→|»|⟩/,
/\bforward\b/i,
/\bnewer\b/i,
/\bolder\b/i
];
const loadMorePatterns = [
/load\s+more/i,
/show\s+more/i,
/view\s+more/i,
/see\s+more/i,
/more\s+results/i,
/plus\s+de\s+résultats/i,
/más\s+resultados/i,
/weitere\s+ergebnisse/i
];
const prevButtonPatterns = [
/prev/i,
/previous/i,
/<<||←|«/,
/\bback\b/i
];
const clickableElements = this.getClickableElements(doc);
let nextButton: HTMLElement | null = null;
let nextButtonScore = 0;
const nextButtonCandidates: any[] = [];
for (const element of clickableElements) {
if (!this.isVisible(element)) continue;
const text = (element.textContent || '').trim();
const ariaLabel = element.getAttribute('aria-label') || '';
const title = element.getAttribute('title') || '';
const combinedText = `${text} ${ariaLabel} ${title}`;
let score = 0;
const reasons: string[] = [];
if (this.matchesAnyPattern(combinedText, nextButtonPatterns)) {
score += 10;
reasons.push('text match (+10)');
}
if (this.isNearList(element, listContainer)) {
score += 5;
reasons.push('near list (+5)');
}
if (element.tagName === 'BUTTON') {
score += 2;
reasons.push('button tag (+2)');
}
const className = element.className || '';
if (/pagination|next|forward/i.test(className)) {
score += 3;
reasons.push('pagination class (+3)');
}
if (score > 0) {
nextButtonCandidates.push({
element: element,
score: score,
text: text.substring(0, 50),
ariaLabel: ariaLabel,
tag: element.tagName,
className: className,
reasons: reasons
});
}
if (score > nextButtonScore) {
nextButtonScore = score;
nextButton = element;
}
}
let loadMoreButton: HTMLElement | null = null;
let loadMoreScore = 0;
for (const element of clickableElements) {
if (!this.isVisible(element)) continue;
const text = (element.textContent || '').trim();
const ariaLabel = element.getAttribute('aria-label') || '';
const title = element.getAttribute('title') || '';
const combinedText = `${text} ${ariaLabel} ${title}`;
let score = 0;
if (this.matchesAnyPattern(combinedText, loadMorePatterns)) {
score += 10;
}
if (this.isNearList(element, listContainer)) {
score += 5;
}
if (element.tagName === 'BUTTON') {
score += 2;
}
if (score > loadMoreScore) {
loadMoreScore = score;
loadMoreButton = element;
}
}
let prevButton: HTMLElement | null = null;
let prevButtonScore = 0;
for (const element of clickableElements) {
if (!this.isVisible(element)) continue;
const text = (element.textContent || '').trim();
const ariaLabel = element.getAttribute('aria-label') || '';
const title = element.getAttribute('title') || '';
const combinedText = `${text} ${ariaLabel} ${title}`;
let score = 0;
if (this.matchesAnyPattern(combinedText, prevButtonPatterns)) {
score += 10;
}
if (this.isNearList(element, listContainer)) {
score += 5;
}
if (score > prevButtonScore) {
prevButtonScore = score;
prevButton = element;
}
}
const infiniteScrollScore = options?.disableScrollDetection
? 0
: this.detectInfiniteScrollIndicators(doc, listElements, listContainer);
const hasStrongInfiniteScrollSignals = infiniteScrollScore >= 8;
const hasMediumInfiniteScrollSignals = infiniteScrollScore >= 5 && infiniteScrollScore < 8;
if (hasStrongInfiniteScrollSignals) {
const confidence = infiniteScrollScore >= 12 ? 'high' : infiniteScrollScore >= 10 ? 'medium' : 'low';
return {
type: 'scrollDown',
selector: null,
confidence: confidence
};
}
if (loadMoreButton && loadMoreScore >= 15) {
const selector = this.generateSelectorsForElement(loadMoreButton, doc, selectorGenerator);
return {
type: 'clickLoadMore',
selector: selector,
confidence: 'high'
};
}
if (nextButton && nextButtonScore >= 15 && !hasMediumInfiniteScrollSignals) {
const selector = this.generateSelectorsForElement(nextButton, doc, selectorGenerator);
return {
type: 'clickNext',
selector: selector,
confidence: 'high'
};
}
if (hasMediumInfiniteScrollSignals) {
const confidence = infiniteScrollScore >= 7 ? 'medium' : 'low';
return {
type: 'scrollDown',
selector: null,
confidence: confidence
};
}
if (loadMoreButton && loadMoreScore >= 8) {
const selector = this.generateSelectorsForElement(loadMoreButton, doc, selectorGenerator);
const confidence = loadMoreScore >= 10 ? 'medium' : 'low';
return {
type: 'clickLoadMore',
selector: selector,
confidence: confidence
};
}
if (nextButton && nextButtonScore >= 8) {
const selector = this.generateSelectorsForElement(nextButton, doc, selectorGenerator);
const confidence = nextButtonScore >= 10 ? 'medium' : 'low';
return {
type: 'clickNext',
selector: selector,
confidence: confidence
};
}
if (prevButton && prevButtonScore >= 8) {
const confidence = prevButtonScore >= 15 ? 'high' : prevButtonScore >= 10 ? 'medium' : 'low';
return {
type: 'scrollUp',
selector: null,
confidence: confidence
};
}
return {
type: '',
selector: null,
confidence: 'low',
debug: {
clickableElementsCount: clickableElements.length,
nextCandidatesCount: nextButtonCandidates.length,
topNextCandidates: nextButtonCandidates.slice(0, 3).map(c => ({
score: c.score,
text: c.text,
tag: c.tag,
reasons: c.reasons
})),
finalScores: {
loadMore: loadMoreScore,
next: nextButtonScore,
prev: prevButtonScore,
infiniteScroll: infiniteScrollScore
}
}
};
} catch (error: any) {
console.error('Error:', error);
return {
type: '',
selector: null,
confidence: 'low',
debug: 'Exception: ' + error.message
};
}
}
/**
* Evaluate selector (supports both CSS and XPath)
*/
private evaluateSelector(selector: string, doc: Document): HTMLElement[] {
try {
const isXPath = selector.startsWith('//') || selector.startsWith('(//');
if (isXPath) {
const result = doc.evaluate(
selector,
doc,
null,
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE,
null
);
const elements: HTMLElement[] = [];
for (let i = 0; i < result.snapshotLength; i++) {
const node = result.snapshotItem(i);
if (node && node.nodeType === Node.ELEMENT_NODE) {
elements.push(node as HTMLElement);
}
}
return elements;
} else {
return Array.from(doc.querySelectorAll(selector));
}
} catch (err) {
console.error('Selector evaluation failed:', selector, err);
return [];
}
}
/**
* Get all clickable elements in document
*/
private getClickableElements(doc: Document): HTMLElement[] {
const clickables: HTMLElement[] = [];
const selectors = ['button', 'a', '[role="button"]', '[onclick]', '.btn', '.button'];
for (const selector of selectors) {
const elements = doc.querySelectorAll(selector);
clickables.push(...Array.from(elements) as HTMLElement[]);
}
return Array.from(new Set(clickables));
}
/**
* Check if element is visible
*/
private isVisible(element: HTMLElement): boolean {
try {
const style = window.getComputedStyle(element);
return style.display !== 'none' &&
style.visibility !== 'hidden' &&
style.opacity !== '0' &&
element.offsetWidth > 0 &&
element.offsetHeight > 0;
} catch {
return false;
}
}
/**
* Check if text matches any pattern
*/
private matchesAnyPattern(text: string, patterns: RegExp[]): boolean {
return patterns.some(pattern => pattern.test(text));
}
/**
* Check if element is near the list container
*/
private isNearList(element: HTMLElement, listContainer: HTMLElement): boolean {
try {
const listRect = listContainer.getBoundingClientRect();
const elementRect = element.getBoundingClientRect();
if (elementRect.top >= listRect.bottom && elementRect.top <= listRect.bottom + 500) {
return true;
}
if (elementRect.bottom <= listRect.top && elementRect.bottom >= listRect.top - 500) {
return true;
}
const verticalOverlap = !(elementRect.bottom < listRect.top || elementRect.top > listRect.bottom);
if (verticalOverlap) {
const horizontalDistance = Math.min(
Math.abs(elementRect.left - listRect.right),
Math.abs(elementRect.right - listRect.left)
);
if (horizontalDistance < 200) {
return true;
}
}
return false;
} catch (error) {
return false;
}
}
/**
* Detect infinite scroll indicators
*/
private detectInfiniteScrollIndicators(doc: Document, listElements: HTMLElement[], listContainer: HTMLElement): number {
try {
let score = 0;
const indicators: string[] = [];
const initialItemCount = listElements.length;
const initialHeight = doc.documentElement.scrollHeight;
const viewportHeight = window.innerHeight;
if (initialHeight <= viewportHeight) {
return 0;
}
const loadingIndicators = [
'[class*="loading"]',
'[class*="spinner"]',
'[class*="skeleton"]',
'[aria-busy="true"]',
'[data-loading="true"]',
'.loader',
'.load-more-spinner',
'[class*="load"]',
'[id*="loading"]',
'[id*="spinner"]'
];
for (const selector of loadingIndicators) {
if (doc.querySelector(selector)) {
score += 3;
indicators.push(`Loading indicator: ${selector} (+3)`);
break;
}
}
const sentinelPatterns = [
'[class*="sentinel"]',
'[class*="trigger"]',
'[data-infinite]',
'[data-scroll-trigger]',
'#infinite-scroll-trigger',
'[class*="infinite"]',
'[id*="infinite"]'
];
for (const selector of sentinelPatterns) {
if (doc.querySelector(selector)) {
score += 4;
indicators.push(`Sentinel element: ${selector} (+4)`);
break;
}
}
const scrollToTopPatterns = [
'[class*="scroll"][class*="top"]',
'[aria-label*="scroll to top"]',
'[title*="back to top"]',
'.back-to-top',
'#back-to-top',
'[class*="scrolltop"]',
'[class*="backtotop"]',
'button[class*="top"]',
'a[href="#top"]',
'a[href="#"]'
];
for (const selector of scrollToTopPatterns) {
const element = doc.querySelector(selector);
if (element && this.isVisible(element as HTMLElement)) {
score += 2;
indicators.push(`Scroll-to-top button (+2)`);
break;
}
}
if (initialHeight > viewportHeight * 3) {
score += 3;
indicators.push(`Very tall page (${(initialHeight / viewportHeight).toFixed(1)}x viewport) (+3)`);
} else if (initialHeight > viewportHeight * 2) {
score += 2;
indicators.push(`Tall page (${(initialHeight / viewportHeight).toFixed(1)}x viewport) (+2)`);
}
if (initialItemCount >= 20) {
score += 2;
indicators.push(`Many list items (${initialItemCount}) (+2)`);
} else if (initialItemCount >= 10) {
score += 1;
indicators.push(`Good number of list items (${initialItemCount}) (+1)`);
}
const infiniteScrollLibraries = [
'.infinite-scroll',
'[data-infinite-scroll]',
'[data-flickity]',
'[data-slick]',
'.masonry',
'[data-masonry]',
'[class*="infinite-scroll"]',
'[class*="lazy-load"]',
'[data-lazy]'
];
for (const selector of infiniteScrollLibraries) {
if (doc.querySelector(selector)) {
score += 4;
indicators.push(`Infinite scroll library: ${selector} (+4)`);
break;
}
}
const lastListItem = listElements[listElements.length - 1];
if (lastListItem) {
const lastItemRect = lastListItem.getBoundingClientRect();
const lastItemY = lastItemRect.bottom + window.scrollY;
const viewportBottom = window.scrollY + viewportHeight;
if (lastItemY > viewportBottom + viewportHeight) {
score += 3;
indicators.push(`List extends far below viewport (+3)`);
} else if (lastItemY > viewportBottom) {
score += 2;
indicators.push(`List extends below viewport (+2)`);
}
}
const hiddenLoadMore = doc.querySelectorAll('[class*="load"], [class*="more"]');
for (let i = 0; i < hiddenLoadMore.length; i++) {
const el = hiddenLoadMore[i] as HTMLElement;
const style = window.getComputedStyle(el);
if (style.opacity === '0' || style.visibility === 'hidden') {
score += 2;
indicators.push(`Hidden load trigger element (+2)`);
break;
}
}
const paginationControls = doc.querySelectorAll('[class*="pagination"], [class*="pager"]');
if (paginationControls.length === 0) {
score += 1;
indicators.push(`No pagination controls found (+1)`);
}
return score;
} catch (error) {
console.error('Infinite scroll detection error:', error);
return 0;
}
}
/**
* Generate selectors for element using ClientSelectorGenerator approach
* Returns the primary selector chain
*/
private generateSelectorsForElement(
element: HTMLElement,
doc: Document,
selectorGenerator: ClientSelectorGenerator
): string | null {
try {
const primary = selectorGenerator.generateSelectorsFromElement(element, doc);
if (!primary) {
console.warn('Could not generate selectors for element');
return null;
}
const selectorChain = [
primary && 'iframeSelector' in primary && primary.iframeSelector?.full
? primary.iframeSelector.full
: null,
primary && 'shadowSelector' in primary && primary.shadowSelector?.full
? primary.shadowSelector.full
: null,
primary && 'testIdSelector' in primary ? primary.testIdSelector : null,
primary && 'id' in primary ? primary.id : null,
primary && 'hrefSelector' in primary ? primary.hrefSelector : null,
primary && 'relSelector' in primary ? primary.relSelector : null,
primary && 'accessibilitySelector' in primary ? primary.accessibilitySelector : null,
primary && 'attrSelector' in primary ? primary.attrSelector : null,
primary && 'generalSelector' in primary ? primary.generalSelector : null,
]
.filter(selector => selector !== null && selector !== undefined && selector !== '')
.join(',');
return selectorChain || null;
} catch (error) {
console.error('Error generating selectors:', error);
return null;
}
}
}
export const clientPaginationDetector = new ClientPaginationDetector();

View File

@@ -2476,6 +2476,46 @@ class ClientSelectorGenerator {
return null;
};
/**
* Generate selectors directly from an element
* Scrolls the element into view within the iframe only (instant scroll)
*/
public generateSelectorsFromElement = (
element: HTMLElement,
iframeDoc: Document
): any | null => {
try {
try {
const rect = element.getBoundingClientRect();
const iframeWindow = iframeDoc.defaultView;
if (iframeWindow) {
const targetY = rect.top + iframeWindow.scrollY - (iframeWindow.innerHeight / 2) + (rect.height / 2);
iframeWindow.scrollTo({
top: targetY,
behavior: 'auto'
});
}
} catch (scrollError) {
console.warn('[ClientSelectorGenerator] Could not scroll element into view:', scrollError);
}
const rect = element.getBoundingClientRect();
const coordinates = {
x: rect.left + rect.width / 2,
y: rect.top + rect.height / 2
};
return this.getSelectors(iframeDoc, coordinates);
} catch (e) {
const { message, stack } = e as Error;
console.warn(`Error generating selectors from element: ${message}`);
console.warn(`Stack: ${stack}`);
return null;
}
};
public getChildSelectors = (
iframeDoc: Document,
parentSelector: string
@@ -2499,34 +2539,24 @@ class ClientSelectorGenerator {
return [];
}
if (parentElements.length > 10) {
parentElements = parentElements.slice(0, 10);
}
const maxItems = 10;
const limitedParents = parentElements.slice(0, Math.min(maxItems, parentElements.length));
const allChildSelectors = new Set<string>();
const processedParents = new Set<HTMLElement>();
const allChildSelectors: string[] = [];
for (const parentElement of parentElements) {
if (processedParents.has(parentElement)) continue;
processedParents.add(parentElement);
for (let i = 0; i < limitedParents.length; i++) {
const parent = limitedParents[i];
const otherListElements = limitedParents.filter((_, index) => index !== i);
const otherListElements = parentElements.filter(
(el) => el !== parentElement
);
const childSelectors = this.generateOptimizedChildXPaths(
parentElement,
const selectors = this.generateOptimizedChildXPaths(
parent,
parentSelector,
iframeDoc,
otherListElements
);
for (const selector of childSelectors) {
allChildSelectors.add(selector);
}
allChildSelectors.push(...selectors);
}
const result = Array.from(allChildSelectors).sort();
const result = Array.from(new Set(allChildSelectors)).sort();
this.selectorCache.set(cacheKey, result);
return result;
} catch (error) {
@@ -2609,7 +2639,6 @@ class ClientSelectorGenerator {
private generateOptimizedChildXPaths(
parentElement: HTMLElement,
listSelector: string,
document: Document,
otherListElements: HTMLElement[] = []
): string[] {
const selectors: string[] = [];
@@ -4297,4 +4326,5 @@ class ClientSelectorGenerator {
}
}
export { ClientSelectorGenerator };
export const clientSelectorGenerator = new ClientSelectorGenerator();