diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 1dcc0849..b909376a 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -460,8 +460,9 @@ export default class Interpreter extends EventEmitter { for (const link of links) { // eslint-disable-next-line this.concurrency.addJob(async () => { + let newPage = null; try { - const newPage = await context.newPage(); + newPage = await context.newPage(); await newPage.goto(link); await newPage.waitForLoadState('networkidle'); await this.runLoop(newPage, this.initializedWorkflow!); @@ -470,6 +471,14 @@ export default class Interpreter extends EventEmitter { // but newPage(), goto() and waitForLoadState() don't (and will kill // the interpreter by throwing). this.log(e, Level.ERROR); + } finally { + if (newPage && !newPage.isClosed()) { + try { + await newPage.close(); + } catch (closeError) { + this.log('Failed to close enqueued page', Level.WARN); + } + } } }); } @@ -1463,41 +1472,57 @@ export default class Interpreter extends EventEmitter { * User-requested concurrency should be entirely managed by the concurrency manager, * e.g. via `enqueueLinks`. */ - p.on('popup', (popup) => { + const popupHandler = (popup) => { this.concurrency.addJob(() => this.runLoop(popup, workflowCopy)); - }); + }; + p.on('popup', popupHandler); /* eslint no-constant-condition: ["warn", { "checkLoops": false }] */ let loopIterations = 0; const MAX_LOOP_ITERATIONS = 1000; // Circuit breaker + + // Cleanup function to remove popup listener + const cleanup = () => { + try { + if (!p.isClosed()) { + p.removeListener('popup', popupHandler); + } + } catch (cleanupError) { + } + }; while (true) { if (this.isAborted) { this.log('Workflow aborted during step execution', Level.WARN); + cleanup(); return; } // Circuit breaker to prevent infinite loops if (++loopIterations > MAX_LOOP_ITERATIONS) { this.log('Maximum loop iterations reached, terminating to prevent infinite loop', Level.ERROR); + cleanup(); return; } // Checks whether the page was closed from outside, // or the workflow execution has been stopped via `interpreter.stop()` if (p.isClosed() || !this.stopper) { + cleanup(); return; } try { await p.waitForLoadState(); } catch (e) { + cleanup(); await p.close(); return; } if (workflowCopy.length === 0) { this.log('All actions completed. Workflow finished.', Level.LOG); + cleanup(); return; } @@ -1589,6 +1614,7 @@ export default class Interpreter extends EventEmitter { } } else { //await this.disableAdBlocker(p); + cleanup(); return; } } @@ -1681,4 +1707,44 @@ export default class Interpreter extends EventEmitter { throw new Error('Cannot stop, there is no running workflow!'); } } + /** + * Cleanup method to release resources and prevent memory leaks + * Call this when the interpreter is no longer needed + */ + public async cleanup(): Promise { + try { + // Stop any running workflows first + if (this.stopper) { + try { + await this.stop(); + } catch (error: any) { + this.log(`Error stopping workflow during cleanup: ${error.message}`, Level.WARN); + } + } + + // Clear ad-blocker resources + if (this.blocker) { + try { + this.blocker = null; + this.log('Ad-blocker resources cleared', Level.DEBUG); + } catch (error: any) { + this.log(`Error cleaning up ad-blocker: ${error.message}`, Level.WARN); + } + } + + // Clear accumulated data to free memory + this.cumulativeResults = []; + this.namedResults = {}; + this.serializableDataByType = { scrapeList: {}, scrapeSchema: {} }; + + // Reset state + this.isAborted = false; + this.initializedWorkflow = null; + + this.log('Interpreter cleanup completed', Level.DEBUG); + } catch (error: any) { + this.log(`Error during interpreter cleanup: ${error.message}`, Level.ERROR); + throw error; + } + } } \ No newline at end of file diff --git a/package.json b/package.json index 46eb302a..5813506f 100644 --- a/package.json +++ b/package.json @@ -12,8 +12,6 @@ "@mui/material": "^5.6.2", "@react-oauth/google": "^0.12.1", "@tanstack/react-query": "^5.90.2", - "@testing-library/react": "^13.1.1", - "@testing-library/user-event": "^13.5.0", "@types/bcrypt": "^5.0.2", "@types/body-parser": "^1.19.5", "@types/csurf": "^1.11.5", @@ -38,14 +36,12 @@ "dotenv": "^16.0.0", "express": "^4.17.2", "express-session": "^1.18.1", - "fortawesome": "^0.0.1-security", "google-auth-library": "^9.14.1", "googleapis": "^144.0.0", "i18next": "^24.0.2", "i18next-browser-languagedetector": "^8.0.0", "i18next-http-backend": "^3.0.1", "idcac-playwright": "^0.1.3", - "ioredis": "^5.4.1", "joi": "^17.6.0", "joplin-turndown-plugin-gfm": "^1.0.12", "jsonwebtoken": "^9.0.2", @@ -64,11 +60,8 @@ "posthog-node": "^4.2.1", "react": "^18.0.0", "react-dom": "^18.0.0", - "react-highlight": "0.15.0", "react-i18next": "^15.1.3", "react-router-dom": "^6.26.1", - "react-simple-code-editor": "^0.11.2", - "react-transition-group": "^4.4.2", "rrweb-snapshot": "^2.0.0-alpha.4", "sequelize": "^6.37.3", "sequelize-typescript": "^2.1.6", @@ -119,9 +112,6 @@ "@types/node": "22.7.9", "@types/node-cron": "^3.0.11", "@types/node-fetch": "^2.6.12", - "@types/prismjs": "^1.26.0", - "@types/react-highlight": "^0.12.5", - "@types/react-transition-group": "^4.4.4", "@types/styled-components": "^5.1.23", "@types/swagger-jsdoc": "^6.0.4", "@types/swagger-ui-express": "^4.1.6", diff --git a/server/src/api/record.ts b/server/src/api/record.ts index 5bcf41e7..9431a426 100644 --- a/server/src/api/record.ts +++ b/server/src/api/record.ts @@ -658,6 +658,16 @@ async function executeRun(id: string, userId: string, requestedFormats?: string[ }; } + browser = browserPool.getRemoteBrowser(plainRun.browserId); + if (!browser) { + throw new Error('Could not access browser'); + } + + let currentPage = await browser.getCurrentPage(); + if (!currentPage) { + throw new Error('Could not create a new page'); + } + if (recording.recording_meta.type === 'scrape') { logger.log('info', `Executing scrape robot for API run ${id}`); @@ -686,13 +696,13 @@ async function executeRun(id: string, userId: string, requestedFormats?: string[ // Markdown conversion if (formats.includes('markdown')) { - markdown = await convertPageToMarkdown(url); + markdown = await convertPageToMarkdown(url, currentPage); serializableOutput.markdown = [{ content: markdown }]; } // HTML conversion if (formats.includes('html')) { - html = await convertPageToHTML(url); + html = await convertPageToHTML(url, currentPage); serializableOutput.html = [{ content: html }]; } @@ -820,16 +830,6 @@ async function executeRun(id: string, userId: string, requestedFormats?: string[ plainRun.status = 'running'; - browser = browserPool.getRemoteBrowser(plainRun.browserId); - if (!browser) { - throw new Error('Could not access browser'); - } - - let currentPage = await browser.getCurrentPage(); - if (!currentPage) { - throw new Error('Could not create a new page'); - } - const workflow = AddGeneratedFlags(recording.recording); browser.interpreter.setRunId(plainRun.runId); diff --git a/server/src/markdownify/scrape.ts b/server/src/markdownify/scrape.ts index dfcd4b39..52ae19bf 100644 --- a/server/src/markdownify/scrape.ts +++ b/server/src/markdownify/scrape.ts @@ -1,9 +1,27 @@ import { connectToRemoteBrowser } from "../browser-management/browserConnection"; import { parseMarkdown } from "./markdown"; +import logger from "../logger"; + +async function gotoWithFallback(page: any, url: string) { + try { + return await page.goto(url, { + waitUntil: "networkidle", + timeout: 100000, + }); + } catch (err) { + // fallback: JS-heavy or unstable sites + return await page.goto(url, { + waitUntil: "domcontentloaded", + timeout: 100000, + }); + } +} /** * Fetches a webpage, strips scripts/styles/images/etc, * returns clean Markdown using parser. + * @param url - The URL to convert + * @param existingPage - Optional existing Playwright page instance to reuse */ export async function convertPageToMarkdown(url: string): Promise { const browser = await connectToRemoteBrowser(); @@ -11,7 +29,7 @@ export async function convertPageToMarkdown(url: string): Promise { await page.goto(url, { waitUntil: "networkidle", timeout: 100000 }); - await page.addInitScript(() => { + const cleanedHtml = await page.evaluate(() => { const selectors = [ "script", "style", @@ -42,14 +60,16 @@ export async function convertPageToMarkdown(url: string): Promise { } }); }); - }); - // Re-extract HTML after cleanup - const cleanedHtml = await page.evaluate(() => { return document.documentElement.outerHTML; }); - await browser.close(); + if (shouldCloseBrowser && browser) { + logger.log('info', `[Scrape] Closing browser instance created for markdown conversion`); + await browser.close(); + } else { + logger.log('info', `[Scrape] Keeping existing browser instance open after markdown conversion`); + } // Convert cleaned HTML → Markdown const markdown = await parseMarkdown(cleanedHtml, url); @@ -59,6 +79,8 @@ export async function convertPageToMarkdown(url: string): Promise { /** * Fetches a webpage, strips scripts/styles/images/etc, * returns clean HTML. + * @param url - The URL to convert + * @param existingPage - Optional existing Playwright page instance to reuse */ export async function convertPageToHTML(url: string): Promise { const browser = await connectToRemoteBrowser(); @@ -66,7 +88,7 @@ export async function convertPageToHTML(url: string): Promise { await page.goto(url, { waitUntil: "networkidle", timeout: 100000 }); - await page.addInitScript(() => { + const cleanedHtml = await page.evaluate(() => { const selectors = [ "script", "style", @@ -97,14 +119,16 @@ export async function convertPageToHTML(url: string): Promise { } }); }); - }); - // Re-extract HTML after cleanup - const cleanedHtml = await page.evaluate(() => { return document.documentElement.outerHTML; }); - await browser.close(); + if (shouldCloseBrowser && browser) { + logger.log('info', `[Scrape] Closing browser instance created for HTML conversion`); + await browser.close(); + } else { + logger.log('info', `[Scrape] Keeping existing browser instance open after HTML conversion`); + } // Return cleaned HTML directly return cleanedHtml; diff --git a/server/src/pgboss-worker.ts b/server/src/pgboss-worker.ts index 8f5c03db..aeb0421d 100644 --- a/server/src/pgboss-worker.ts +++ b/server/src/pgboss-worker.ts @@ -181,7 +181,7 @@ async function processRunExecution(job: Job) { logger.log('info', `Browser ${browserId} found and ready for execution`); - try { + try { // Find the recording const recording = await Robot.findOne({ where: { 'recording_meta.id': plainRun.robotMetaId }, raw: true }); @@ -189,6 +189,30 @@ async function processRunExecution(job: Job) { throw new Error(`Recording for run ${data.runId} not found`); } + let currentPage = browser.getCurrentPage(); + + const pageWaitStart = Date.now(); + let lastPageLogTime = 0; + let pageAttempts = 0; + const MAX_PAGE_ATTEMPTS = 15; + + while (!currentPage && (Date.now() - pageWaitStart) < BROWSER_PAGE_TIMEOUT && pageAttempts < MAX_PAGE_ATTEMPTS) { + const currentTime = Date.now(); + pageAttempts++; + + if (currentTime - lastPageLogTime > 5000) { + logger.log('info', `Page not ready for browser ${browserId}, waiting... (${Math.round((currentTime - pageWaitStart) / 1000)}s elapsed)`); + lastPageLogTime = currentTime; + } + + await new Promise(resolve => setTimeout(resolve, 1000)); + currentPage = browser.getCurrentPage(); + } + + if (!currentPage) { + throw new Error(`No current page available for browser ${browserId} after ${BROWSER_PAGE_TIMEOUT/1000}s timeout`); + } + if (recording.recording_meta.type === 'scrape') { logger.log('info', `Executing scrape robot for run ${data.runId}`); @@ -212,13 +236,13 @@ async function processRunExecution(job: Job) { // Markdown conversion if (formats.includes('markdown')) { - markdown = await convertPageToMarkdown(url); + markdown = await convertPageToMarkdown(url, currentPage); serializableOutput.markdown = [{ content: markdown }]; } // HTML conversion if (formats.includes('html')) { - html = await convertPageToHTML(url); + html = await convertPageToHTML(url, currentPage); serializableOutput.html = [{ content: html }]; } @@ -328,30 +352,6 @@ async function processRunExecution(job: Job) { } }; - let currentPage = browser.getCurrentPage(); - - const pageWaitStart = Date.now(); - let lastPageLogTime = 0; - let pageAttempts = 0; - const MAX_PAGE_ATTEMPTS = 15; - - while (!currentPage && (Date.now() - pageWaitStart) < BROWSER_PAGE_TIMEOUT && pageAttempts < MAX_PAGE_ATTEMPTS) { - const currentTime = Date.now(); - pageAttempts++; - - if (currentTime - lastPageLogTime > 5000) { - logger.log('info', `Page not ready for browser ${browserId}, waiting... (${Math.round((currentTime - pageWaitStart) / 1000)}s elapsed)`); - lastPageLogTime = currentTime; - } - - await new Promise(resolve => setTimeout(resolve, 1000)); - currentPage = browser.getCurrentPage(); - } - - if (!currentPage) { - throw new Error(`No current page available for browser ${browserId} after ${BROWSER_PAGE_TIMEOUT/1000}s timeout`); - } - logger.log('info', `Starting workflow execution for run ${data.runId}`); await run.update({ diff --git a/server/src/workflow-management/scheduler/index.ts b/server/src/workflow-management/scheduler/index.ts index a3571520..c10edf2c 100644 --- a/server/src/workflow-management/scheduler/index.ts +++ b/server/src/workflow-management/scheduler/index.ts @@ -205,6 +205,16 @@ async function executeRun(id: string, userId: string) { } } + browser = browserPool.getRemoteBrowser(plainRun.browserId); + if (!browser) { + throw new Error('Could not access browser'); + } + + let currentPage = await browser.getCurrentPage(); + if (!currentPage) { + throw new Error('Could not create a new page'); + } + if (recording.recording_meta.type === 'scrape') { logger.log('info', `Executing scrape robot for scheduled run ${id}`); @@ -249,13 +259,13 @@ async function executeRun(id: string, userId: string) { // Markdown conversion if (formats.includes('markdown')) { - markdown = await convertPageToMarkdown(url); + markdown = await convertPageToMarkdown(url, currentPage); serializableOutput.markdown = [{ content: markdown }]; } // HTML conversion if (formats.includes('html')) { - html = await convertPageToHTML(url); + html = await convertPageToHTML(url, currentPage); serializableOutput.html = [{ content: html }]; } @@ -388,16 +398,6 @@ async function executeRun(id: string, userId: string) { logger.log('warn', `Failed to send run-started notification for run ${plainRun.runId}: ${socketError.message}`); } - browser = browserPool.getRemoteBrowser(plainRun.browserId); - if (!browser) { - throw new Error('Could not access browser'); - } - - let currentPage = await browser.getCurrentPage(); - if (!currentPage) { - throw new Error('Could not create a new page'); - } - const workflow = AddGeneratedFlags(recording.recording); // Set run ID for real-time data persistence diff --git a/src/components/browser/BrowserWindow.tsx b/src/components/browser/BrowserWindow.tsx index 8bdabaae..7dd8b2cf 100644 --- a/src/components/browser/BrowserWindow.tsx +++ b/src/components/browser/BrowserWindow.tsx @@ -304,8 +304,6 @@ export const BrowserWindow = () => { const createFieldsFromChildSelectors = useCallback( (childSelectors: string[], listSelector: string) => { - if (!childSelectors.length || !currentSnapshot) return {}; - const iframeElement = document.querySelector( "#dom-browser-iframe" ) as HTMLIFrameElement; @@ -323,7 +321,6 @@ export const BrowserWindow = () => { const uniqueChildSelectors = [...new Set(childSelectors)]; - // Filter child selectors that occur in at least 2 out of first 10 list elements const validateChildSelectors = (selectors: string[]): string[] => { try { // Get first 10 list elements @@ -352,13 +349,10 @@ export const BrowserWindow = () => { // If we can't access the element, it's likely in shadow DOM - include it if (!testElement) { - console.log(`Including potentially shadow DOM selector: ${selector}`); validSelectors.push(selector); continue; } } catch (accessError) { - // If there's an error accessing, assume shadow DOM and include it - console.log(`Including selector due to access error: ${selector}`); validSelectors.push(selector); continue; } @@ -395,7 +389,6 @@ export const BrowserWindow = () => { } }; - // Enhanced XPath evaluation for multiple elements const evaluateXPathAllWithShadowSupport = ( document: Document, xpath: string, @@ -423,8 +416,6 @@ export const BrowserWindow = () => { return elements; } - // If shadow DOM is indicated and regular XPath fails, use shadow DOM traversal - // This is a simplified version - for multiple elements, we'll primarily rely on regular XPath return elements; } catch (err) { console.error("XPath evaluation failed:", xpath, err); @@ -432,7 +423,9 @@ export const BrowserWindow = () => { } }; - const validatedChildSelectors = validateChildSelectors(uniqueChildSelectors); + const isValidData = (text: string | null | undefined): boolean => { + return !!text && text.trim().length > 0; + }; const isElementVisible = (element: HTMLElement): boolean => { try { @@ -443,443 +436,119 @@ export const BrowserWindow = () => { } }; - const isValidData = (data: string): boolean => { - if (!data || data.trim().length === 0) return false; + const createFieldData = (element: HTMLElement, selector: string, forceAttribute?: string) => { + const tagName = element.tagName.toLowerCase(); + let data = ''; + let attribute = forceAttribute || 'innerText'; - const trimmed = data.trim(); - - // Filter out single letters - if (trimmed.length === 1) { - return false; - } - - // Filter out pure symbols/punctuation - if (trimmed.length < 3 && /^[^\w\s]+$/.test(trimmed)) { - return false; - } - - // Filter out whitespace and punctuation only - if (/^[\s\p{P}\p{S}]*$/u.test(trimmed)) return false; - - return trimmed.length > 0; - }; - - // Enhanced shadow DOM-aware element evaluation - const evaluateXPathWithShadowSupport = ( - document: Document, - xpath: string, - isShadow: boolean = false - ): Element | null => { - try { - // First try regular XPath evaluation - const result = document.evaluate( - xpath, - document, - null, - XPathResult.FIRST_ORDERED_NODE_TYPE, - null - ).singleNodeValue as Element | null; - - if (!isShadow || result) { - return result; + if (forceAttribute) { + if (forceAttribute === 'href') { + data = element.getAttribute('href') || ''; + } else if (forceAttribute === 'innerText') { + data = (element.textContent || '').trim(); } - - // If shadow DOM is indicated and regular XPath fails, use shadow DOM traversal - let cleanPath = xpath; - let isIndexed = false; - - const indexedMatch = xpath.match(/^\((.*?)\)\[(\d+)\](.*)$/); - if (indexedMatch) { - cleanPath = indexedMatch[1] + indexedMatch[3]; - isIndexed = true; + } else if (tagName === 'img') { + data = element.getAttribute('src') || ''; + attribute = 'src'; + } else if (tagName === 'a') { + const href = element.getAttribute('href') || ''; + const text = (element.textContent || '').trim(); + if (href && href !== '#' && !href.startsWith('javascript:')) { + data = href; + attribute = 'href'; + } else if (text) { + data = text; + attribute = 'innerText'; } - - const pathParts = cleanPath - .replace(/^\/\//, "") - .split("/") - .map((p) => p.trim()) - .filter((p) => p.length > 0); - - let currentContexts: (Document | Element | ShadowRoot)[] = [document]; - - for (let i = 0; i < pathParts.length; i++) { - const part = pathParts[i]; - const nextContexts: (Element | ShadowRoot)[] = []; - - for (const ctx of currentContexts) { - const positionalMatch = part.match(/^([^[]+)\[(\d+)\]$/); - let partWithoutPosition = part; - let requestedPosition: number | null = null; - - if (positionalMatch) { - partWithoutPosition = positionalMatch[1]; - requestedPosition = parseInt(positionalMatch[2]); - } - - const matched = queryInsideContext(ctx, partWithoutPosition); - - let elementsToAdd = matched; - if (requestedPosition !== null) { - const index = requestedPosition - 1; - if (index >= 0 && index < matched.length) { - elementsToAdd = [matched[index]]; - } else { - elementsToAdd = []; - } - } - - elementsToAdd.forEach((el) => { - nextContexts.push(el); - if (el.shadowRoot) { - nextContexts.push(el.shadowRoot); - } - }); - } - - if (nextContexts.length === 0) { - return null; - } - - currentContexts = nextContexts; - } - - if (currentContexts.length > 0) { - if (isIndexed && indexedMatch) { - const requestedIndex = parseInt(indexedMatch[2]) - 1; - if (requestedIndex >= 0 && requestedIndex < currentContexts.length) { - return currentContexts[requestedIndex] as Element; - } else { - return null; - } - } - - return currentContexts[0] as Element; - } - - return null; - } catch (err) { - console.error("XPath evaluation failed:", xpath, err); - return null; - } - }; - - const queryInsideContext = ( - context: Document | Element | ShadowRoot, - part: string - ): Element[] => { - try { - const { tagName, conditions } = parseXPathPart(part); - - const candidateElements = Array.from(context.querySelectorAll(tagName)); - if (candidateElements.length === 0) { - return []; - } - - const matchingElements = candidateElements.filter((el) => { - return elementMatchesConditions(el, conditions); - }); - - return matchingElements; - } catch (err) { - console.error("Error in queryInsideContext:", err); - return []; - } - }; - - const parseXPathPart = ( - part: string - ): { tagName: string; conditions: string[] } => { - const tagMatch = part.match(/^([a-zA-Z0-9-]+)/); - const tagName = tagMatch ? tagMatch[1] : "*"; - - const conditionMatches = part.match(/\[([^\]]+)\]/g); - const conditions = conditionMatches - ? conditionMatches.map((c) => c.slice(1, -1)) - : []; - - return { tagName, conditions }; - }; - - const elementMatchesConditions = ( - element: Element, - conditions: string[] - ): boolean => { - for (const condition of conditions) { - if (!elementMatchesCondition(element, condition)) { - return false; - } - } - return true; - }; - - const elementMatchesCondition = ( - element: Element, - condition: string - ): boolean => { - condition = condition.trim(); - - if (/^\d+$/.test(condition)) { - return true; + } else { + data = (element.textContent || '').trim(); + attribute = 'innerText'; } - // Handle @attribute="value" - const attrMatch = condition.match(/^@([^=]+)=["']([^"']+)["']$/); - if (attrMatch) { - const [, attr, value] = attrMatch; - const elementValue = element.getAttribute(attr); - return elementValue === value; - } + if (!data) return null; - // Handle contains(@class, 'value') - const classContainsMatch = condition.match( - /^contains\(@class,\s*["']([^"']+)["']\)$/ - ); - if (classContainsMatch) { - const className = classContainsMatch[1]; - return element.classList.contains(className); - } - - // Handle contains(@attribute, 'value') - const attrContainsMatch = condition.match( - /^contains\(@([^,]+),\s*["']([^"']+)["']\)$/ - ); - if (attrContainsMatch) { - const [, attr, value] = attrContainsMatch; - const elementValue = element.getAttribute(attr) || ""; - return elementValue.includes(value); - } - - // Handle text()="value" - const textMatch = condition.match(/^text\(\)=["']([^"']+)["']$/); - if (textMatch) { - const expectedText = textMatch[1]; - const elementText = element.textContent?.trim() || ""; - return elementText === expectedText; - } - - // Handle contains(text(), 'value') - const textContainsMatch = condition.match( - /^contains\(text\(\),\s*["']([^"']+)["']\)$/ - ); - if (textContainsMatch) { - const expectedText = textContainsMatch[1]; - const elementText = element.textContent?.trim() || ""; - return elementText.includes(expectedText); - } - - // Handle count(*)=0 (element has no children) - if (condition === "count(*)=0") { - return element.children.length === 0; - } - - // Handle other count conditions - const countMatch = condition.match(/^count\(\*\)=(\d+)$/); - if (countMatch) { - const expectedCount = parseInt(countMatch[1]); - return element.children.length === expectedCount; - } - - return true; - }; - - // Enhanced value extraction with shadow DOM support - const extractValueWithShadowSupport = ( - element: Element, - attribute: string - ): string | null => { - if (!element) return null; - - const baseURL = - element.ownerDocument?.location?.href || window.location.origin; - - // Check shadow DOM content first - if (element.shadowRoot) { - const shadowContent = element.shadowRoot.textContent; - if (shadowContent?.trim()) { - return shadowContent.trim(); + return { + data, + selectorObj: { + selector, + attribute, + tag: tagName.toUpperCase(), + isShadow: element.getRootNode() instanceof ShadowRoot } - } - - if (attribute === "innerText") { - let textContent = - (element as HTMLElement).innerText?.trim() || - (element as HTMLElement).textContent?.trim(); - - if (!textContent) { - const dataAttributes = [ - "data-600", - "data-text", - "data-label", - "data-value", - "data-content", - ]; - for (const attr of dataAttributes) { - const dataValue = element.getAttribute(attr); - if (dataValue && dataValue.trim()) { - textContent = dataValue.trim(); - break; - } - } - } - - return textContent || null; - } else if (attribute === "innerHTML") { - return element.innerHTML?.trim() || null; - } else if (attribute === "href") { - let anchorElement = element; - - if (element.tagName !== "A") { - anchorElement = - element.closest("a") || - element.parentElement?.closest("a") || - element; - } - - const hrefValue = anchorElement.getAttribute("href"); - if (!hrefValue || hrefValue.trim() === "") { - return null; - } - - try { - return new URL(hrefValue, baseURL).href; - } catch (e) { - console.warn("Error creating URL from", hrefValue, e); - return hrefValue; - } - } else if (attribute === "src") { - const attrValue = element.getAttribute(attribute); - const dataAttr = attrValue || element.getAttribute("data-" + attribute); - - if (!dataAttr || dataAttr.trim() === "") { - const style = window.getComputedStyle(element as HTMLElement); - const bgImage = style.backgroundImage; - if (bgImage && bgImage !== "none") { - const matches = bgImage.match(/url\(['"]?([^'")]+)['"]?\)/); - return matches ? new URL(matches[1], baseURL).href : null; - } - return null; - } - - try { - return new URL(dataAttr, baseURL).href; - } catch (e) { - console.warn("Error creating URL from", dataAttr, e); - return dataAttr; - } - } - return element.getAttribute(attribute); - }; - - // Simple deepest child finder - limit depth to prevent hanging - const findDeepestChild = (element: HTMLElement): HTMLElement => { - let deepest = element; - let maxDepth = 0; - - const traverse = (el: HTMLElement, depth: number) => { - if (depth > 3) return; - - const text = el.textContent?.trim() || ""; - if (isValidData(text) && depth > maxDepth) { - maxDepth = depth; - deepest = el; - } - - const children = Array.from(el.children).slice(0, 3); - children.forEach((child) => { - if (child instanceof HTMLElement) { - traverse(child, depth + 1); - } - }); }; - - traverse(element, 0); - return deepest; }; - validatedChildSelectors.forEach((childSelector, index) => { + const validatedChildSelectors = validateChildSelectors(uniqueChildSelectors); + + validatedChildSelectors.forEach((selector, index) => { try { - // Detect if this selector should use shadow DOM traversal - const isShadowSelector = childSelector.includes('>>') || - childSelector.startsWith('//') && - (listSelector.includes('>>') || currentSnapshot?.snapshot); - - const element = evaluateXPathWithShadowSupport( + const elements = evaluateXPathAllWithShadowSupport( iframeElement.contentDocument!, - childSelector, - isShadowSelector - ) as HTMLElement; + selector, + selector.includes(">>") || selector.startsWith("//") + ); - if (element && isElementVisible(element)) { + if (elements.length === 0) return; + + const element = elements[0] as HTMLElement; + const tagName = element.tagName.toLowerCase(); + const isShadow = element.getRootNode() instanceof ShadowRoot; + + if (isElementVisible(element)) { const rect = element.getBoundingClientRect(); const position = { x: rect.left, y: rect.top }; - const tagName = element.tagName.toLowerCase(); - const isShadow = element.getRootNode() instanceof ShadowRoot; - - if (tagName === "a") { - const anchor = element as HTMLAnchorElement; - const href = extractValueWithShadowSupport(anchor, "href"); - const text = extractValueWithShadowSupport(anchor, "innerText"); - - if ( - href && - href.trim() !== "" && - href !== window.location.href && - !href.startsWith("javascript:") && - !href.startsWith("#") - ) { - const fieldIdHref = Date.now() + index * 1000; - - candidateFields.push({ - id: fieldIdHref, - element: element, - isLeaf: true, - depth: 0, - position: position, - field: { - id: fieldIdHref, - type: "text", - label: `Label ${index * 2 + 1}`, - data: href, - selectorObj: { - selector: childSelector, - tag: element.tagName, - isShadow: isShadow, - attribute: "href", - }, - }, - }); - } - - const fieldIdText = Date.now() + index * 1000 + 1; + if (tagName === 'a') { + const href = element.getAttribute('href'); + const text = (element.textContent || '').trim(); if (text && isValidData(text)) { - candidateFields.push({ - id: fieldIdText, - element: element, - isLeaf: true, - depth: 0, - position: position, - field: { - id: fieldIdText, - type: "text", - label: `Label ${index * 2 + 2}`, - data: text, - selectorObj: { - selector: childSelector, - tag: element.tagName, - isShadow: isShadow, - attribute: "innerText", - }, - }, - }); + const textField = createFieldData(element, selector, 'innerText'); + if (textField && textField.data) { + const fieldId = Date.now() + index * 1000; + + candidateFields.push({ + id: fieldId, + element: element, + isLeaf: true, + depth: 0, + position: position, + field: { + id: fieldId, + type: "text", + label: `Label ${index * 2 + 1}`, + data: textField.data, + selectorObj: textField.selectorObj + } + }); + } + } + + if (href && href !== '#' && !href.startsWith('javascript:')) { + const hrefField = createFieldData(element, selector, 'href'); + if (hrefField && hrefField.data) { + const fieldId = Date.now() + index * 1000 + 1; + + candidateFields.push({ + id: fieldId, + element: element, + isLeaf: true, + depth: 0, + position: position, + field: { + id: fieldId, + type: "text", + label: `Label ${index * 2 + 2}`, + data: hrefField.data, + selectorObj: hrefField.selectorObj + } + }); + } } } else if (tagName === "img") { - const img = element as HTMLImageElement; - const src = extractValueWithShadowSupport(img, "src"); - const alt = extractValueWithShadowSupport(img, "alt"); + const src = element.getAttribute("src"); - if (src && !src.startsWith("data:") && src.length > 10) { + if (src && isValidData(src)) { const fieldId = Date.now() + index * 1000; candidateFields.push({ @@ -894,7 +563,7 @@ export const BrowserWindow = () => { label: `Label ${index + 1}`, data: src, selectorObj: { - selector: childSelector, + selector: selector, tag: element.tagName, isShadow: isShadow, attribute: "src", @@ -902,9 +571,11 @@ export const BrowserWindow = () => { }, }); } + } else { + const fieldData = createFieldData(element, selector); - if (alt && isValidData(alt)) { - const fieldId = Date.now() + index * 1000 + 1; + if (fieldData && fieldData.data && isValidData(fieldData.data)) { + const fieldId = Date.now() + index * 1000; candidateFields.push({ id: fieldId, @@ -912,127 +583,39 @@ export const BrowserWindow = () => { isLeaf: true, depth: 0, position: position, - field: { - id: fieldId, - type: "text", - label: `Label ${index + 2}`, - data: alt, - selectorObj: { - selector: childSelector, - tag: element.tagName, - isShadow: isShadow, - attribute: "alt", - }, - }, - }); - } - } else { - const deepestElement = findDeepestChild(element); - const data = extractValueWithShadowSupport(deepestElement, "innerText"); - - if (data && isValidData(data)) { - const isLeaf = isLeafElement(deepestElement); - const depth = getElementDepthFromList( - deepestElement, - listSelector, - iframeElement.contentDocument! - ); - - const fieldId = Date.now() + index; - - candidateFields.push({ - id: fieldId, - element: deepestElement, - isLeaf: isLeaf, - depth: depth, - position: position, field: { id: fieldId, type: "text", label: `Label ${index + 1}`, - data: data, - selectorObj: { - selector: childSelector, - tag: deepestElement.tagName, - isShadow: deepestElement.getRootNode() instanceof ShadowRoot, - attribute: "innerText", - }, - }, + data: fieldData.data, + selectorObj: fieldData.selectorObj + } }); } } } } catch (error) { - console.warn( - `Failed to process child selector ${childSelector}:`, - error - ); + console.warn(`Failed to process child selector ${selector}:`, error); } }); candidateFields.sort((a, b) => { const yDiff = a.position.y - b.position.y; - + if (Math.abs(yDiff) <= 5) { return a.position.x - b.position.x; } - + return yDiff; }); const filteredCandidates = removeParentChildDuplicates(candidateFields); - const finalFields = removeDuplicateContent(filteredCandidates); return finalFields; }, [currentSnapshot] ); - const isLeafElement = (element: HTMLElement): boolean => { - const children = Array.from(element.children) as HTMLElement[]; - - if (children.length === 0) return true; - - const hasContentfulChildren = children.some((child) => { - const text = child.textContent?.trim() || ""; - return text.length > 0 && text !== element.textContent?.trim(); - }); - - return !hasContentfulChildren; - }; - - const getElementDepthFromList = ( - element: HTMLElement, - listSelector: string, - document: Document - ): number => { - try { - const listResult = document.evaluate( - listSelector, - document, - null, - XPathResult.FIRST_ORDERED_NODE_TYPE, - null - ); - - const listElement = listResult.singleNodeValue as HTMLElement; - if (!listElement) return 0; - - let depth = 0; - let current = element; - - while (current && current !== listElement && current.parentElement) { - depth++; - current = current.parentElement; - if (depth > 20) break; - } - - return current === listElement ? depth : 0; - } catch (error) { - return 0; - } - }; - const removeParentChildDuplicates = ( candidates: Array<{ id: number; @@ -1242,6 +825,29 @@ export const BrowserWindow = () => { } }, [browserSteps, getList, listSelector, initialAutoFieldIds, currentListActionId, manuallyAddedFieldIds]); + useEffect(() => { + if (currentListActionId && browserSteps.length > 0) { + const activeStep = browserSteps.find( + s => s.type === 'list' && s.actionId === currentListActionId + ) as ListStep | undefined; + + if (activeStep) { + if (currentListId !== activeStep.id) { + setCurrentListId(activeStep.id); + } + if (listSelector !== activeStep.listSelector) { + setListSelector(activeStep.listSelector); + } + if (JSON.stringify(fields) !== JSON.stringify(activeStep.fields)) { + setFields(activeStep.fields); + } + if (activeStep.pagination?.selector && paginationSelector !== activeStep.pagination.selector) { + setPaginationSelector(activeStep.pagination.selector); + } + } + } + }, [currentListActionId, browserSteps, currentListId, listSelector, fields, paginationSelector]); + useEffect(() => { if (!isDOMMode) { capturedElementHighlighter.clearHighlights(); @@ -1637,6 +1243,22 @@ export const BrowserWindow = () => { paginationType !== "scrollUp" && paginationType !== "none" ) { + let targetListId = currentListId; + let targetFields = fields; + + if ((!targetListId || targetListId === 0) && currentListActionId) { + const activeStep = browserSteps.find( + s => s.type === 'list' && s.actionId === currentListActionId + ) as ListStep | undefined; + + if (activeStep) { + targetListId = activeStep.id; + if (Object.keys(targetFields).length === 0 && Object.keys(activeStep.fields).length > 0) { + targetFields = activeStep.fields; + } + } + } + setPaginationSelector(highlighterData.selector); notify( `info`, @@ -1646,8 +1268,8 @@ export const BrowserWindow = () => { ); addListStep( listSelector!, - fields, - currentListId || 0, + targetFields, + targetListId || 0, currentListActionId || `list-${crypto.randomUUID()}`, { type: paginationType, @@ -1812,6 +1434,8 @@ export const BrowserWindow = () => { socket, t, paginationSelector, + highlighterData, + browserSteps ] ); @@ -1864,6 +1488,22 @@ export const BrowserWindow = () => { paginationType !== "scrollUp" && paginationType !== "none" ) { + let targetListId = currentListId; + let targetFields = fields; + + if ((!targetListId || targetListId === 0) && currentListActionId) { + const activeStep = browserSteps.find( + s => s.type === 'list' && s.actionId === currentListActionId + ) as ListStep | undefined; + + if (activeStep) { + targetListId = activeStep.id; + if (Object.keys(targetFields).length === 0 && Object.keys(activeStep.fields).length > 0) { + targetFields = activeStep.fields; + } + } + } + setPaginationSelector(highlighterData.selector); notify( `info`, @@ -1873,8 +1513,8 @@ export const BrowserWindow = () => { ); addListStep( listSelector!, - fields, - currentListId || 0, + targetFields, + targetListId || 0, currentListActionId || `list-${crypto.randomUUID()}`, { type: paginationType, selector: highlighterData.selector, isShadow: highlighterData.isShadow }, undefined, @@ -2046,6 +1686,31 @@ export const BrowserWindow = () => { } }, [paginationMode, resetPaginationSelector]); + useEffect(() => { + if (paginationMode && currentListActionId) { + const currentListStep = browserSteps.find( + step => step.type === 'list' && step.actionId === currentListActionId + ) as (ListStep & { type: 'list' }) | undefined; + + const currentSelector = currentListStep?.pagination?.selector; + const currentType = currentListStep?.pagination?.type; + + if (['clickNext', 'clickLoadMore'].includes(paginationType)) { + if (!currentSelector || (currentType && currentType !== paginationType)) { + setPaginationSelector(''); + } + } + + const stepSelector = currentListStep?.pagination?.selector; + + if (stepSelector && !paginationSelector) { + setPaginationSelector(stepSelector); + } else if (!stepSelector && paginationSelector) { + setPaginationSelector(''); + } + } + }, [browserSteps, paginationMode, currentListActionId, paginationSelector]); + return (
{ listSelector={listSelector} cachedChildSelectors={cachedChildSelectors} paginationMode={paginationMode} + paginationSelector={paginationSelector} paginationType={paginationType} limitMode={limitMode} isCachingChildSelectors={isCachingChildSelectors} diff --git a/src/components/recorder/DOMBrowserRenderer.tsx b/src/components/recorder/DOMBrowserRenderer.tsx index 9e818e31..10fa4742 100644 --- a/src/components/recorder/DOMBrowserRenderer.tsx +++ b/src/components/recorder/DOMBrowserRenderer.tsx @@ -100,6 +100,7 @@ interface RRWebDOMBrowserRendererProps { listSelector?: string | null; cachedChildSelectors?: string[]; paginationMode?: boolean; + paginationSelector?: string; paginationType?: string; limitMode?: boolean; isCachingChildSelectors?: boolean; @@ -153,6 +154,7 @@ export const DOMBrowserRenderer: React.FC = ({ listSelector = null, cachedChildSelectors = [], paginationMode = false, + paginationSelector = "", paginationType = "", limitMode = false, isCachingChildSelectors = false, @@ -257,6 +259,13 @@ export const DOMBrowserRenderer: React.FC = ({ else if (listSelector) { if (limitMode) { shouldHighlight = false; + } else if ( + paginationMode && + paginationSelector && + paginationType !== "" && + !["none", "scrollDown", "scrollUp"].includes(paginationType) + ) { + shouldHighlight = false; } else if ( paginationMode && paginationType !== "" && diff --git a/src/components/recorder/RightSidePanel.tsx b/src/components/recorder/RightSidePanel.tsx index d5a7c29c..8159e149 100644 --- a/src/components/recorder/RightSidePanel.tsx +++ b/src/components/recorder/RightSidePanel.tsx @@ -1,4 +1,4 @@ -import React, { useState, useCallback, useEffect, useMemo } from 'react'; +import React, { useState, useCallback, useEffect, useRef, useMemo } from 'react'; import { Button, Paper, Box, TextField, IconButton, Tooltip } from "@mui/material"; import { WorkflowFile } from "maxun-core"; import Typography from "@mui/material/Typography"; @@ -15,9 +15,9 @@ import ActionDescriptionBox from '../action/ActionDescriptionBox'; import { useThemeMode } from '../../context/theme-provider'; import { useTranslation } from 'react-i18next'; import { useBrowserDimensionsStore } from '../../context/browserDimensions'; -import { emptyWorkflow } from '../../shared/constants'; import { clientListExtractor } from '../../helpers/clientListExtractor'; import { clientSelectorGenerator } from '../../helpers/clientSelectorGenerator'; +import { clientPaginationDetector } from '../../helpers/clientPaginationDetector'; const fetchWorkflow = (id: string, callback: (response: WorkflowFile) => void) => { getActiveWorkflow(id).then( @@ -45,6 +45,13 @@ export const RightSidePanel: React.FC = ({ onFinishCapture const [showCaptureText, setShowCaptureText] = useState(true); const { panelHeight } = useBrowserDimensionsStore(); + const [autoDetectedPagination, setAutoDetectedPagination] = useState<{ + type: PaginationType; + selector: string | null; + confidence: 'high' | 'medium' | 'low'; + } | null>(null); + const autoDetectionRunRef = useRef(null); + const { lastAction, notify, currentWorkflowActionsState, setCurrentWorkflowActionsState, resetInterpretationLog, currentListActionId, setCurrentListActionId, currentTextActionId, setCurrentTextActionId, currentScreenshotActionId, setCurrentScreenshotActionId, isDOMMode, setIsDOMMode, currentSnapshot, setCurrentSnapshot, updateDOMMode, initialUrl, setRecordingUrl, currentTextGroupName } = useGlobalInfoStore(); const { getText, startGetText, stopGetText, @@ -62,7 +69,7 @@ export const RightSidePanel: React.FC = ({ onFinishCapture startAction, finishAction } = useActionContext(); - const { browserSteps, updateBrowserTextStepLabel, deleteBrowserStep, addScreenshotStep, updateListTextFieldLabel, removeListTextField, updateListStepLimit, deleteStepsByActionId, updateListStepData, updateScreenshotStepData, emitActionForStep } = useBrowserSteps(); + const { browserSteps, addScreenshotStep, updateListStepLimit, updateListStepPagination, deleteStepsByActionId, updateListStepData, updateScreenshotStepData, emitActionForStep } = useBrowserSteps(); const { id, socket } = useSocketStore(); const { t } = useTranslation(); @@ -72,6 +79,73 @@ export const RightSidePanel: React.FC = ({ onFinishCapture setWorkflow(data); }, [setWorkflow]); + useEffect(() => { + if (!paginationType || !currentListActionId) return; + + const currentListStep = browserSteps.find( + step => step.type === 'list' && step.actionId === currentListActionId + ) as (BrowserStep & { type: 'list' }) | undefined; + + const currentSelector = currentListStep?.pagination?.selector; + const currentType = currentListStep?.pagination?.type; + + if (['clickNext', 'clickLoadMore'].includes(paginationType)) { + const needsSelector = !currentSelector && !currentType; + const typeChanged = currentType && currentType !== paginationType; + + if (typeChanged) { + const iframeElement = document.querySelector('#browser-window iframe') as HTMLIFrameElement; + if (iframeElement?.contentDocument && currentSelector) { + try { + function evaluateSelector(selector: string, doc: Document): Element[] { + if (selector.startsWith('//') || selector.startsWith('(//')) { + try { + const result = doc.evaluate(selector, doc, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null); + const elements: Element[] = []; + for (let i = 0; i < result.snapshotLength; i++) { + const node = result.snapshotItem(i); + if (node && node.nodeType === Node.ELEMENT_NODE) { + elements.push(node as Element); + } + } + return elements; + } catch (err) { + return []; + } + } else { + try { + return Array.from(doc.querySelectorAll(selector)); + } catch (err) { + return []; + } + } + } + + const elements = evaluateSelector(currentSelector, iframeElement.contentDocument); + elements.forEach((el: Element) => { + (el as HTMLElement).style.outline = ''; + (el as HTMLElement).style.outlineOffset = ''; + (el as HTMLElement).style.zIndex = ''; + }); + } catch (error) { + console.error('Error removing pagination highlight:', error); + } + } + + if (currentListStep) { + updateListStepPagination(currentListStep.id, { + type: paginationType, + selector: null, + }); + } + + startPaginationMode(); + } else if (needsSelector) { + startPaginationMode(); + } + } + }, [paginationType, currentListActionId, browserSteps, updateListStepPagination, startPaginationMode]); + useEffect(() => { if (socket) { const domModeHandler = (data: any) => { @@ -391,7 +465,182 @@ export const RightSidePanel: React.FC = ({ onFinishCapture return; } - startPaginationMode(); + const currentListStepForAutoDetect = browserSteps.find( + step => step.type === 'list' && step.actionId === currentListActionId + ) as (BrowserStep & { type: 'list'; listSelector?: string }) | undefined; + + if (currentListStepForAutoDetect?.listSelector) { + if (autoDetectionRunRef.current !== currentListActionId) { + autoDetectionRunRef.current = currentListActionId; + + notify('info', 'Detecting pagination...'); + + try { + socket?.emit('testPaginationScroll', { + listSelector: currentListStepForAutoDetect.listSelector + }); + + const handleScrollTestResult = (result: any) => { + if (result.success && result.contentLoaded) { + setAutoDetectedPagination({ + type: 'scrollDown', + selector: null, + confidence: 'high' + }); + updatePaginationType('scrollDown'); + + const latestListStep = browserSteps.find( + step => step.type === 'list' && step.actionId === currentListActionId + ); + if (latestListStep) { + updateListStepPagination(latestListStep.id, { + type: 'scrollDown', + selector: null, + isShadow: false + }); + } + } else if (result.success && !result.contentLoaded) { + const iframeElement = document.querySelector('#browser-window iframe') as HTMLIFrameElement; + const iframeDoc = iframeElement?.contentDocument; + + if (iframeDoc) { + const detectionResult = clientPaginationDetector.autoDetectPagination( + iframeDoc, + currentListStepForAutoDetect.listSelector!, + clientSelectorGenerator, + { disableScrollDetection: true } + ); + + if (detectionResult.type) { + setAutoDetectedPagination({ + type: detectionResult.type, + selector: detectionResult.selector, + confidence: detectionResult.confidence + }); + + const latestListStep = browserSteps.find( + step => step.type === 'list' && step.actionId === currentListActionId + ); + if (latestListStep) { + updateListStepPagination(latestListStep.id, { + type: detectionResult.type, + selector: detectionResult.selector, + isShadow: false + }); + } + + updatePaginationType(detectionResult.type); + + if (detectionResult.selector && (detectionResult.type === 'clickNext' || detectionResult.type === 'clickLoadMore')) { + try { + function evaluateSelector(selector: string, doc: Document): Element[] { + try { + const isXPath = selector.startsWith('//') || selector.startsWith('(//'); + if (isXPath) { + const result = doc.evaluate( + selector, + doc, + null, + XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, + null + ); + const elements: Element[] = []; + for (let i = 0; i < result.snapshotLength; i++) { + const node = result.snapshotItem(i); + if (node && node.nodeType === Node.ELEMENT_NODE) { + elements.push(node as Element); + } + } + return elements; + } else { + try { + const allElements = Array.from(doc.querySelectorAll(selector)); + if (allElements.length > 0) { + return allElements; + } + } catch (err) { + console.warn('[RightSidePanel] Full chained selector failed, trying individual selectors:', err); + } + + const selectorParts = selector.split(','); + for (const part of selectorParts) { + try { + const elements = Array.from(doc.querySelectorAll(part.trim())); + if (elements.length > 0) { + return elements; + } + } catch (err) { + console.warn('[RightSidePanel] Selector part failed:', part.trim(), err); + continue; + } + } + return []; + } + } catch (err) { + console.error('[RightSidePanel] Selector evaluation failed:', selector, err); + return []; + } + } + + const elements = evaluateSelector(detectionResult.selector, iframeDoc); + if (elements.length > 0) { + elements.forEach((el: Element) => { + (el as HTMLElement).style.outline = '3px dashed #ff00c3'; + (el as HTMLElement).style.outlineOffset = '2px'; + (el as HTMLElement).style.zIndex = '9999'; + }); + + const firstElement = elements[0] as HTMLElement; + const elementRect = firstElement.getBoundingClientRect(); + const iframeWindow = iframeElement.contentWindow; + if (iframeWindow) { + const targetY = elementRect.top + iframeWindow.scrollY - (iframeWindow.innerHeight / 2) + (elementRect.height / 2); + iframeWindow.scrollTo({ top: targetY, behavior: 'smooth' }); + } + + const paginationTypeLabel = detectionResult.type === 'clickNext' ? 'Next Button' : 'Load More Button'; + notify('info', `${paginationTypeLabel} has been auto-detected and highlighted on the page`); + } else { + console.warn(' No elements found for selector:', detectionResult.selector); + } + } catch (error) { + console.error('Error highlighting pagination button:', error); + } + } + } else { + setAutoDetectedPagination(null); + } + } + } else { + console.error('Scroll test failed:', result.error); + setAutoDetectedPagination(null); + } + + socket?.off('paginationScrollTestResult', handleScrollTestResult); + }; + + socket?.on('paginationScrollTestResult', handleScrollTestResult); + + setTimeout(() => { + socket?.off('paginationScrollTestResult', handleScrollTestResult); + }, 5000); + + } catch (error) { + console.error('Scroll test failed:', error); + setAutoDetectedPagination(null); + } + } + } + + const shouldSkipPaginationMode = autoDetectedPagination && ( + ['scrollDown', 'scrollUp'].includes(autoDetectedPagination.type) || + (['clickNext', 'clickLoadMore'].includes(autoDetectedPagination.type) && autoDetectedPagination.selector) + ); + + if (!shouldSkipPaginationMode) { + startPaginationMode(); + } + setShowPaginationOptions(true); setCaptureStage('pagination'); break; @@ -460,6 +709,7 @@ export const RightSidePanel: React.FC = ({ onFinishCapture case 'pagination': stopPaginationMode(); setShowPaginationOptions(false); + setAutoDetectedPagination(null); setCaptureStage('initial'); break; } @@ -495,17 +745,58 @@ export const RightSidePanel: React.FC = ({ onFinishCapture socket.emit('removeAction', { actionId: currentListActionId }); } } + + if (autoDetectedPagination?.selector) { + const iframeElement = document.querySelector('#browser-window iframe') as HTMLIFrameElement; + if (iframeElement?.contentDocument) { + try { + function evaluateSelector(selector: string, doc: Document): Element[] { + if (selector.startsWith('//') || selector.startsWith('(//')) { + try { + const result = doc.evaluate(selector, doc, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null); + const elements: Element[] = []; + for (let i = 0; i < result.snapshotLength; i++) { + const node = result.snapshotItem(i); + if (node && node.nodeType === Node.ELEMENT_NODE) { + elements.push(node as Element); + } + } + return elements; + } catch (err) { + return []; + } + } else { + try { + return Array.from(doc.querySelectorAll(selector)); + } catch (err) { + return []; + } + } + } + + const elements = evaluateSelector(autoDetectedPagination.selector, iframeElement.contentDocument); + elements.forEach((el: Element) => { + (el as HTMLElement).style.outline = ''; + (el as HTMLElement).style.outlineOffset = ''; + (el as HTMLElement).style.zIndex = ''; + }); + } catch (error) { + console.error('Error removing pagination highlight on discard:', error); + } + } + } resetListState(); stopPaginationMode(); stopLimitMode(); setShowPaginationOptions(false); setShowLimitOptions(false); + setAutoDetectedPagination(null); setCaptureStage('initial'); setCurrentListActionId(''); clientSelectorGenerator.cleanup(); notify('error', t('right_panel.errors.capture_list_discarded')); - }, [currentListActionId, browserSteps, stopGetList, deleteStepsByActionId, resetListState, setShowPaginationOptions, setShowLimitOptions, setCaptureStage, notify, t, stopPaginationMode, stopLimitMode, socket]); + }, [currentListActionId, browserSteps, stopGetList, deleteStepsByActionId, resetListState, setShowPaginationOptions, setShowLimitOptions, setCaptureStage, notify, t, stopPaginationMode, stopLimitMode, socket, autoDetectedPagination]); const captureScreenshot = (fullPage: boolean) => { const screenshotCount = browserSteps.filter(s => s.type === 'screenshot').length + 1; @@ -615,6 +906,114 @@ export const RightSidePanel: React.FC = ({ onFinishCapture {showPaginationOptions && ( {t('right_panel.pagination.title')} + + {autoDetectedPagination && autoDetectedPagination.type !== '' && ( + + + ✓ Auto-detected: { + autoDetectedPagination.type === 'clickNext' ? 'Click Next' : + autoDetectedPagination.type === 'clickLoadMore' ? 'Click Load More' : + autoDetectedPagination.type === 'scrollDown' ? 'Scroll Down' : + autoDetectedPagination.type === 'scrollUp' ? 'Scroll Up' : + autoDetectedPagination.type + } + + + You can continue with this or manually select a different pagination type below. + + {autoDetectedPagination.selector && ['clickNext', 'clickLoadMore'].includes(autoDetectedPagination.type) && ( + + )} + + )}