From 459218ae13e4e92ccb37f6053cc574224fe1d0c3 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Sun, 30 Nov 2025 20:01:18 +0530 Subject: [PATCH 01/14] fix: remove integration router exports --- server/src/routes/index.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/server/src/routes/index.ts b/server/src/routes/index.ts index 3d8a3644..a6ec206f 100644 --- a/server/src/routes/index.ts +++ b/server/src/routes/index.ts @@ -2,7 +2,6 @@ import { router as record } from './record'; import { router as workflow } from './workflow'; import { router as storage } from './storage'; import { router as auth } from './auth'; -import { router as integration } from './integration'; import { router as proxy } from './proxy'; import { router as webhook } from './webhook'; @@ -11,7 +10,6 @@ export { workflow, storage, auth, - integration, proxy, webhook }; From 2e6e89e45391d6c7decfc501f27a0873e0634a21 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Sun, 30 Nov 2025 20:05:27 +0530 Subject: [PATCH 02/14] fix: remove integration router exports --- server/src/server.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/server/src/server.ts b/server/src/server.ts index 88dc74c5..e6beb1c1 100644 --- a/server/src/server.ts +++ b/server/src/server.ts @@ -5,7 +5,7 @@ import { Server } from "socket.io"; import cors from 'cors'; import dotenv from 'dotenv'; dotenv.config(); -import { record, workflow, storage, auth, integration, proxy, webhook } from './routes'; +import { record, workflow, storage, auth, proxy, webhook } from './routes'; import { BrowserPool } from "./browser-management/classes/BrowserPool"; import logger from './logger'; import sequelize, { connectDB, syncDB } from './storage/db' @@ -107,7 +107,6 @@ app.use('/record', record); app.use('/workflow', workflow); app.use('/storage', storage); app.use('/auth', auth); -app.use('/integration', integration); app.use('/proxy', proxy); app.use('/api-docs', swaggerUi.serve, swaggerUi.setup(swaggerSpec)); From eb512d8df3559dd69e7b9f5bdcd6f44a9f573fce Mon Sep 17 00:00:00 2001 From: Rohit Rajan Date: Sun, 30 Nov 2025 20:10:25 +0530 Subject: [PATCH 03/14] fix: required page instance scrape action --- .../classes/RemoteBrowser.ts | 4 +- server/src/markdownify/scrape.ts | 170 +++++++++--------- server/src/routes/storage.ts | 1 - 3 files changed, 82 insertions(+), 93 deletions(-) diff --git a/server/src/browser-management/classes/RemoteBrowser.ts b/server/src/browser-management/classes/RemoteBrowser.ts index 266a0978..41a59176 100644 --- a/server/src/browser-management/classes/RemoteBrowser.ts +++ b/server/src/browser-management/classes/RemoteBrowser.ts @@ -550,9 +550,9 @@ export class RemoteBrowser { try { const blocker = await PlaywrightBlocker.fromLists(fetch, ['https://easylist.to/easylist/easylist.txt']); - await blocker.enableBlockingInPage(this.currentPage); + await blocker.enableBlockingInPage(this.currentPage as any); this.client = await this.currentPage.context().newCDPSession(this.currentPage); - await blocker.disableBlockingInPage(this.currentPage); + await blocker.disableBlockingInPage(this.currentPage as any); console.log('Adblocker initialized'); } catch (error: any) { console.warn('Failed to initialize adblocker, continuing without it:', error.message); diff --git a/server/src/markdownify/scrape.ts b/server/src/markdownify/scrape.ts index 52ae19bf..09df4276 100644 --- a/server/src/markdownify/scrape.ts +++ b/server/src/markdownify/scrape.ts @@ -1,4 +1,4 @@ -import { connectToRemoteBrowser } from "../browser-management/browserConnection"; +import { Page } from "playwright-core"; import { parseMarkdown } from "./markdown"; import logger from "../logger"; @@ -21,115 +21,105 @@ async function gotoWithFallback(page: any, url: string) { * Fetches a webpage, strips scripts/styles/images/etc, * returns clean Markdown using parser. * @param url - The URL to convert - * @param existingPage - Optional existing Playwright page instance to reuse + * @param page - Existing Playwright page instance to use */ -export async function convertPageToMarkdown(url: string): Promise { - const browser = await connectToRemoteBrowser(); - const page = await browser.newPage(); +export async function convertPageToMarkdown(url: string, page: Page): Promise { + try { + logger.log('info', `[Scrape] Using existing page instance for markdown conversion of ${url}`); - await page.goto(url, { waitUntil: "networkidle", timeout: 100000 }); + await gotoWithFallback(page, url); - const cleanedHtml = await page.evaluate(() => { - const selectors = [ - "script", - "style", - "link[rel='stylesheet']", - "noscript", - "meta", - "svg", - "img", - "picture", - "source", - "video", - "audio", - "iframe", - "object", - "embed" - ]; + const cleanedHtml = await page.evaluate(() => { + const selectors = [ + "script", + "style", + "link[rel='stylesheet']", + "noscript", + "meta", + "svg", + "img", + "picture", + "source", + "video", + "audio", + "iframe", + "object", + "embed" + ]; - selectors.forEach(sel => { - document.querySelectorAll(sel).forEach(e => e.remove()); - }); - - // Remove inline event handlers (onclick, onload…) - const all = document.querySelectorAll("*"); - all.forEach(el => { - [...el.attributes].forEach(attr => { - if (attr.name.startsWith("on")) { - el.removeAttribute(attr.name); - } + selectors.forEach(sel => { + document.querySelectorAll(sel).forEach(e => e.remove()); }); + + const all = document.querySelectorAll("*"); + all.forEach(el => { + [...el.attributes].forEach(attr => { + if (attr.name.startsWith("on")) { + el.removeAttribute(attr.name); + } + }); + }); + + return document.documentElement.outerHTML; }); - return document.documentElement.outerHTML; - }); - - if (shouldCloseBrowser && browser) { - logger.log('info', `[Scrape] Closing browser instance created for markdown conversion`); - await browser.close(); - } else { - logger.log('info', `[Scrape] Keeping existing browser instance open after markdown conversion`); + const markdown = await parseMarkdown(cleanedHtml, url); + return markdown; + } catch (error: any) { + logger.error(`[Scrape] Error during markdown conversion: ${error.message}`); + throw error; } - - // Convert cleaned HTML → Markdown - const markdown = await parseMarkdown(cleanedHtml, url); - return markdown; } /** * Fetches a webpage, strips scripts/styles/images/etc, * returns clean HTML. * @param url - The URL to convert - * @param existingPage - Optional existing Playwright page instance to reuse + * @param page - Existing Playwright page instance to use */ -export async function convertPageToHTML(url: string): Promise { - const browser = await connectToRemoteBrowser(); - const page = await browser.newPage(); +export async function convertPageToHTML(url: string, page: Page): Promise { + try { + logger.log('info', `[Scrape] Using existing page instance for HTML conversion of ${url}`); - await page.goto(url, { waitUntil: "networkidle", timeout: 100000 }); + await gotoWithFallback(page, url); - const cleanedHtml = await page.evaluate(() => { - const selectors = [ - "script", - "style", - "link[rel='stylesheet']", - "noscript", - "meta", - "svg", - "img", - "picture", - "source", - "video", - "audio", - "iframe", - "object", - "embed" - ]; + const cleanedHtml = await page.evaluate(() => { + const selectors = [ + "script", + "style", + "link[rel='stylesheet']", + "noscript", + "meta", + "svg", + "img", + "picture", + "source", + "video", + "audio", + "iframe", + "object", + "embed" + ]; - selectors.forEach(sel => { - document.querySelectorAll(sel).forEach(e => e.remove()); - }); - - // Remove inline event handlers (onclick, onload…) - const all = document.querySelectorAll("*"); - all.forEach(el => { - [...el.attributes].forEach(attr => { - if (attr.name.startsWith("on")) { - el.removeAttribute(attr.name); - } + selectors.forEach(sel => { + document.querySelectorAll(sel).forEach(e => e.remove()); }); + + const all = document.querySelectorAll("*"); + all.forEach(el => { + [...el.attributes].forEach(attr => { + if (attr.name.startsWith("on")) { + el.removeAttribute(attr.name); + } + }); + }); + + return document.documentElement.outerHTML; }); - return document.documentElement.outerHTML; - }); - - if (shouldCloseBrowser && browser) { - logger.log('info', `[Scrape] Closing browser instance created for HTML conversion`); - await browser.close(); - } else { - logger.log('info', `[Scrape] Keeping existing browser instance open after HTML conversion`); + return cleanedHtml; + } catch (error: any) { + logger.error(`[Scrape] Error during HTML conversion: ${error.message}`); + throw error; } - - // Return cleaned HTML directly - return cleanedHtml; } diff --git a/server/src/routes/storage.ts b/server/src/routes/storage.ts index 72518c7b..45d4bc53 100644 --- a/server/src/routes/storage.ts +++ b/server/src/routes/storage.ts @@ -15,7 +15,6 @@ import { encrypt, decrypt } from '../utils/auth'; import { WorkflowFile } from 'maxun-core'; import { cancelScheduledWorkflow, scheduleWorkflow } from '../storage/schedule'; import { pgBossClient } from '../storage/pgboss'; -chromium.use(stealthPlugin()); export const router = Router(); From d538419d700c7cb3a62975cee94a2ff5fe5ed66b Mon Sep 17 00:00:00 2001 From: Rohit Rajan Date: Sun, 30 Nov 2025 20:18:02 +0530 Subject: [PATCH 04/14] fix: rm duplicate io export --- server/src/server.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/server/src/server.ts b/server/src/server.ts index e6beb1c1..61f57756 100644 --- a/server/src/server.ts +++ b/server/src/server.ts @@ -178,8 +178,6 @@ if (require.main === module) { await startWorkers(); - io = new Server(server); - io.of('/queued-run').on('connection', (socket) => { const userId = socket.handshake.query.userId as string; From 2bbf5d657ff4c6180f756a1ad6c52dbdce803ba0 Mon Sep 17 00:00:00 2001 From: Rohit Rajan Date: Sun, 30 Nov 2025 20:35:12 +0530 Subject: [PATCH 05/14] chore: mv ci to install --- browser/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/browser/Dockerfile b/browser/Dockerfile index 9f2ea838..7dd27033 100644 --- a/browser/Dockerfile +++ b/browser/Dockerfile @@ -6,7 +6,7 @@ WORKDIR /app COPY browser/package*.json ./ # Install dependencies -RUN npm ci +RUN npm install # Copy TypeScript source and config COPY browser/server.ts ./ From 560b2e352d7cb821b7e1cb332d985563c29c8e5e Mon Sep 17 00:00:00 2001 From: Rohit Rajan Date: Sun, 30 Nov 2025 20:45:51 +0530 Subject: [PATCH 06/14] chore: rm chromium backend image --- Dockerfile.backend | 27 +-------------------------- docker-compose.yml | 12 ++++++------ 2 files changed, 7 insertions(+), 32 deletions(-) diff --git a/Dockerfile.backend b/Dockerfile.backend index 85ee4b83..d0143a3d 100644 --- a/Dockerfile.backend +++ b/Dockerfile.backend @@ -1,4 +1,4 @@ -FROM --platform=$BUILDPLATFORM mcr.microsoft.com/playwright:v1.46.0-noble +FROM --platform=$BUILDPLATFORM node:20-slim # Set working directory WORKDIR /app @@ -18,31 +18,6 @@ COPY server/tsconfig.json ./server/ # Install dependencies RUN npm install --legacy-peer-deps -# Create the Chromium data directory with necessary permissions -RUN mkdir -p /tmp/chromium-data-dir && \ - chmod -R 777 /tmp/chromium-data-dir - -# Install dependencies -RUN apt-get update && apt-get install -y \ - libgbm1 \ - libnss3 \ - libatk1.0-0 \ - libatk-bridge2.0-0 \ - libdrm2 \ - libxkbcommon0 \ - libglib2.0-0 \ - libdbus-1-3 \ - libx11-xcb1 \ - libxcb1 \ - libxcomposite1 \ - libxcursor1 \ - libxdamage1 \ - libxext6 \ - libxi6 \ - libxtst6 \ - && rm -rf /var/lib/apt/lists/* \ - && mkdir -p /tmp/.X11-unix && chmod 1777 /tmp/.X11-unix - # Expose backend port EXPOSE ${BACKEND_PORT:-8080} diff --git a/docker-compose.yml b/docker-compose.yml index dbb147b7..381c6671 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -30,9 +30,9 @@ services: - minio_data:/data backend: - #build: - #context: . - #dockerfile: server/Dockerfile + # build: + # context: . + # dockerfile: Dockerfile.backend image: getmaxun/maxun-backend:latest restart: unless-stopped ports: @@ -60,9 +60,9 @@ services: - /var/run/dbus:/var/run/dbus frontend: - #build: - #context: . - #dockerfile: Dockerfile + # build: + # context: . + # dockerfile: Dockerfile.frontend image: getmaxun/maxun-frontend:latest restart: unless-stopped ports: From c963febbe643fd288c1baebd90e7a19a45614df2 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Sun, 30 Nov 2025 20:52:26 +0530 Subject: [PATCH 07/14] chore: core v0.0.28 --- maxun-core/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/maxun-core/package.json b/maxun-core/package.json index 21b51e37..9a8c003d 100644 --- a/maxun-core/package.json +++ b/maxun-core/package.json @@ -1,6 +1,6 @@ { "name": "maxun-core", - "version": "0.0.27", + "version": "0.0.28", "description": "Core package for Maxun, responsible for data extraction", "main": "build/index.js", "typings": "build/index.d.ts", From 53a34908f1af08139ccbc516315e55b171cf7657 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Sun, 30 Nov 2025 20:52:54 +0530 Subject: [PATCH 08/14] chore: v0.0.28 --- package.json | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/package.json b/package.json index 79f6f966..8a5cd986 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "maxun", - "version": "0.0.27", + "version": "0.0.28", "author": "Maxun", "license": "AGPL-3.0-or-later", "dependencies": { @@ -52,7 +52,6 @@ "lodash": "^4.17.21", "loglevel": "^1.8.0", "loglevel-plugin-remote": "^0.6.8", - "maxun-core": "^0.0.27", "minio": "^8.0.1", "moment-timezone": "^0.5.45", "node-cron": "^3.0.3", From f0272831937bf6cc3d4406362a1a2393ad0ba55a Mon Sep 17 00:00:00 2001 From: amhsirak Date: Sun, 30 Nov 2025 20:56:56 +0530 Subject: [PATCH 09/14] chore: use maxun-core v0.0.28 --- package.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/package.json b/package.json index 8a5cd986..41b1255c 100644 --- a/package.json +++ b/package.json @@ -52,6 +52,7 @@ "lodash": "^4.17.21", "loglevel": "^1.8.0", "loglevel-plugin-remote": "^0.6.8", + "maxun-core": "^0.0.28", "minio": "^8.0.1", "moment-timezone": "^0.5.45", "node-cron": "^3.0.3", @@ -130,4 +131,4 @@ "vite": "^5.4.10", "zod": "^3.25.62" } -} \ No newline at end of file +} From 2f0516f7f94d14a3cbe301a2eb5cdb9c5260c435 Mon Sep 17 00:00:00 2001 From: Rohit Rajan Date: Sun, 30 Nov 2025 21:28:09 +0530 Subject: [PATCH 10/14] chore: mv pw to pw-core --- server/src/api/record.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/api/record.ts b/server/src/api/record.ts index 04f4ab15..b2c2422a 100644 --- a/server/src/api/record.ts +++ b/server/src/api/record.ts @@ -11,7 +11,7 @@ import { io, Socket } from "socket.io-client"; import { BinaryOutputService } from "../storage/mino"; import { AuthenticatedRequest } from "../routes/record" import {capture} from "../utils/analytics"; -import { Page } from "playwright"; +import { Page } from "playwright-core"; import { WorkflowFile } from "maxun-core"; import { addGoogleSheetUpdateTask, googleSheetUpdateTasks, processGoogleSheetUpdates } from "../workflow-management/integrations/gsheet"; import { addAirtableUpdateTask, airtableUpdateTasks, processAirtableUpdates } from "../workflow-management/integrations/airtable"; From b2fcf021f2a9f9de04eaa7b441bd848f97ef7e78 Mon Sep 17 00:00:00 2001 From: Rohit Rajan Date: Sun, 30 Nov 2025 22:11:01 +0530 Subject: [PATCH 11/14] fix: pagination ui --- src/components/browser/BrowserWindow.tsx | 9 ++++++++- src/components/recorder/DOMBrowserRenderer.tsx | 7 ++++--- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/src/components/browser/BrowserWindow.tsx b/src/components/browser/BrowserWindow.tsx index 7dd8b2cf..769e9048 100644 --- a/src/components/browser/BrowserWindow.tsx +++ b/src/components/browser/BrowserWindow.tsx @@ -1686,6 +1686,12 @@ export const BrowserWindow = () => { } }, [paginationMode, resetPaginationSelector]); + useEffect(() => { + if (!paginationMode || !getList) { + setHighlighterData(null); + } + }, [paginationMode, getList]); + useEffect(() => { if (paginationMode && currentListActionId) { const currentListStep = browserSteps.find( @@ -1841,7 +1847,7 @@ export const BrowserWindow = () => { > {/* Individual element highlight (for non-group or hovered element) */} {((getText && !listSelector) || - (getList && paginationMode && paginationType !== "" && + (getList && paginationMode && !paginationSelector && paginationType !== "" && !["none", "scrollDown", "scrollUp"].includes(paginationType))) && (
{ listSelector && !paginationMode && !limitMode && + captureStage === 'initial' && highlighterData.similarElements?.rects?.map((rect, index) => (
= ({ shouldHighlight = false; } else if ( paginationMode && + !paginationSelector && paginationType !== "" && !["none", "scrollDown", "scrollUp"].includes(paginationType) ) { @@ -353,7 +354,7 @@ export const DOMBrowserRenderer: React.FC = ({ const options: boolean | AddEventListenerOptions = ['wheel', 'touchstart', 'touchmove'].includes(event) ? { passive: false } : false; - iframeDoc.removeEventListener(event, handler as EventListener, options); + iframeDoc.removeEventListener(event, handler as EventListener, options); }); } @@ -588,7 +589,7 @@ export const DOMBrowserRenderer: React.FC = ({ const elementRect = element.getBoundingClientRect(); const relativeX = iframeX - elementRect.left; const relativeY = iframeY - elementRect.top; - + socket.emit("dom:click", { selector, url: snapshot.baseUrl, @@ -636,7 +637,7 @@ export const DOMBrowserRenderer: React.FC = ({ if (iframe) { const focusedElement = iframeDoc.activeElement as HTMLElement; let coordinates = { x: 0, y: 0 }; - + if (focusedElement && focusedElement !== iframeDoc.body) { // Get coordinates from the focused element const rect = focusedElement.getBoundingClientRect(); From bbfa78ea9e04aacf6467ce4adff9fb91b4aab169 Mon Sep 17 00:00:00 2001 From: Rohit Rajan Date: Sun, 30 Nov 2025 22:13:49 +0530 Subject: [PATCH 12/14] fix: wss launch config --- browser/server.ts | 7 +++++-- docker-compose.yml | 2 ++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/browser/server.ts b/browser/server.ts index 2a70beef..8ee6ca6a 100644 --- a/browser/server.ts +++ b/browser/server.ts @@ -11,6 +11,7 @@ let browserServer: BrowserServer | null = null; // Configurable ports with defaults const BROWSER_WS_PORT = parseInt(process.env.BROWSER_WS_PORT || '3001', 10); const BROWSER_HEALTH_PORT = parseInt(process.env.BROWSER_HEALTH_PORT || '3002', 10); +const BROWSER_WS_HOST = process.env.BROWSER_WS_HOST || 'localhost'; async function start(): Promise { console.log('Starting Maxun Browser Service...'); @@ -44,17 +45,19 @@ async function start(): Promise { // Health check HTTP server const healthServer = http.createServer((req, res) => { if (req.url === '/health') { + const wsEndpoint = browserServer?.wsEndpoint(); res.writeHead(200, { 'Content-Type': 'application/json' }); res.end(JSON.stringify({ status: 'healthy', - wsEndpoint: browserServer?.wsEndpoint(), + wsEndpoint, wsPort: BROWSER_WS_PORT, healthPort: BROWSER_HEALTH_PORT, timestamp: new Date().toISOString() })); } else if (req.url === '/') { res.writeHead(200, { 'Content-Type': 'text/plain' }); - res.end(`Maxun Browser Service\nWebSocket: ${browserServer?.wsEndpoint()}\nHealth: http://localhost:${BROWSER_HEALTH_PORT}/health`); + const wsEndpoint = browserServer?.wsEndpoint().replace('localhost', BROWSER_WS_HOST) || ''; + res.end(`Maxun Browser Service\nWebSocket: ${wsEndpoint}\nHealth: http://localhost:${BROWSER_HEALTH_PORT}/health`); } else { res.writeHead(404); res.end('Not Found'); diff --git a/docker-compose.yml b/docker-compose.yml index 381c6671..700303fd 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -89,6 +89,8 @@ services: - DEBUG=pw:browser* - BROWSER_WS_PORT=${BROWSER_WS_PORT:-3001} - BROWSER_HEALTH_PORT=${BROWSER_HEALTH_PORT:-3002} + - BROWSER_WS_HOST=${BROWSER_WS_HOST:-browser} + - PLAYWRIGHT_BROWSERS_PATH=/ms-playwright restart: unless-stopped healthcheck: test: ["CMD", "curl", "-f", "http://localhost:${BROWSER_HEALTH_PORT:-3002}/health"] From ace57d8ee7e8a0f5f23f61a49f4d0afb6fa30b14 Mon Sep 17 00:00:00 2001 From: Rohit Rajan Date: Sun, 30 Nov 2025 22:21:36 +0530 Subject: [PATCH 13/14] fix: wss endpoint --- browser/server.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/browser/server.ts b/browser/server.ts index 8ee6ca6a..e12cd79a 100644 --- a/browser/server.ts +++ b/browser/server.ts @@ -45,7 +45,7 @@ async function start(): Promise { // Health check HTTP server const healthServer = http.createServer((req, res) => { if (req.url === '/health') { - const wsEndpoint = browserServer?.wsEndpoint(); + const wsEndpoint = browserServer?.wsEndpoint().replace('localhost', BROWSER_WS_HOST) || ''; res.writeHead(200, { 'Content-Type': 'application/json' }); res.end(JSON.stringify({ status: 'healthy', From 068a28a3624bce576d0ff4e7b4ad66236c05c52b Mon Sep 17 00:00:00 2001 From: Rohit Rajan Date: Sun, 30 Nov 2025 22:37:10 +0530 Subject: [PATCH 14/14] fix: ui remove pagination --- src/components/recorder/RightSidePanel.tsx | 44 +++++++++++++++++++++- 1 file changed, 42 insertions(+), 2 deletions(-) diff --git a/src/components/recorder/RightSidePanel.tsx b/src/components/recorder/RightSidePanel.tsx index 8159e149..a596e8f9 100644 --- a/src/components/recorder/RightSidePanel.tsx +++ b/src/components/recorder/RightSidePanel.tsx @@ -415,6 +415,46 @@ export const RightSidePanel: React.FC = ({ onFinishCapture }, [stopGetList, resetListState]); const stopCaptureAndEmitGetListSettings = useCallback(() => { + if (autoDetectedPagination?.selector) { + const iframeElement = document.querySelector('#browser-window iframe') as HTMLIFrameElement; + if (iframeElement?.contentDocument) { + try { + function evaluateSelector(selector: string, doc: Document): Element[] { + if (selector.startsWith('//') || selector.startsWith('(//')) { + try { + const result = doc.evaluate(selector, doc, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null); + const elements: Element[] = []; + for (let i = 0; i < result.snapshotLength; i++) { + const node = result.snapshotItem(i); + if (node && node.nodeType === Node.ELEMENT_NODE) { + elements.push(node as Element); + } + } + return elements; + } catch (err) { + return []; + } + } else { + try { + return Array.from(doc.querySelectorAll(selector)); + } catch (err) { + return []; + } + } + } + + const elements = evaluateSelector(autoDetectedPagination.selector, iframeElement.contentDocument); + elements.forEach((el: Element) => { + (el as HTMLElement).style.outline = ''; + (el as HTMLElement).style.outlineOffset = ''; + (el as HTMLElement).style.zIndex = ''; + }); + } catch (error) { + console.error('Error removing pagination highlight on completion:', error); + } + } + } + const latestListStep = getLatestListStep(browserSteps); if (latestListStep) { extractDataClientSide(latestListStep.listSelector!, latestListStep.fields, latestListStep.id); @@ -423,7 +463,7 @@ export const RightSidePanel: React.FC = ({ onFinishCapture ...currentWorkflowActionsState, hasScrapeListAction: true }); - + emitActionForStep(latestListStep); handleStopGetList(); @@ -441,7 +481,7 @@ export const RightSidePanel: React.FC = ({ onFinishCapture onFinishCapture(); clientSelectorGenerator.cleanup(); } - }, [socket, notify, handleStopGetList, resetInterpretationLog, finishAction, onFinishCapture, t, browserSteps, extractDataClientSide, setCurrentWorkflowActionsState, currentWorkflowActionsState, emitActionForStep]); + }, [socket, notify, handleStopGetList, resetInterpretationLog, finishAction, onFinishCapture, t, browserSteps, extractDataClientSide, setCurrentWorkflowActionsState, currentWorkflowActionsState, emitActionForStep, autoDetectedPagination]); const getLatestListStep = (steps: BrowserStep[]) => { const listSteps = steps.filter(step => step.type === 'list');