Merge pull request #907 from getmaxun/pre-release-28
chore: pre-release v0.0.28
This commit is contained in:
@@ -1,4 +1,4 @@
|
|||||||
FROM --platform=$BUILDPLATFORM mcr.microsoft.com/playwright:v1.46.0-noble
|
FROM --platform=$BUILDPLATFORM node:20-slim
|
||||||
|
|
||||||
# Set working directory
|
# Set working directory
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
@@ -18,31 +18,6 @@ COPY server/tsconfig.json ./server/
|
|||||||
# Install dependencies
|
# Install dependencies
|
||||||
RUN npm install --legacy-peer-deps
|
RUN npm install --legacy-peer-deps
|
||||||
|
|
||||||
# Create the Chromium data directory with necessary permissions
|
|
||||||
RUN mkdir -p /tmp/chromium-data-dir && \
|
|
||||||
chmod -R 777 /tmp/chromium-data-dir
|
|
||||||
|
|
||||||
# Install dependencies
|
|
||||||
RUN apt-get update && apt-get install -y \
|
|
||||||
libgbm1 \
|
|
||||||
libnss3 \
|
|
||||||
libatk1.0-0 \
|
|
||||||
libatk-bridge2.0-0 \
|
|
||||||
libdrm2 \
|
|
||||||
libxkbcommon0 \
|
|
||||||
libglib2.0-0 \
|
|
||||||
libdbus-1-3 \
|
|
||||||
libx11-xcb1 \
|
|
||||||
libxcb1 \
|
|
||||||
libxcomposite1 \
|
|
||||||
libxcursor1 \
|
|
||||||
libxdamage1 \
|
|
||||||
libxext6 \
|
|
||||||
libxi6 \
|
|
||||||
libxtst6 \
|
|
||||||
&& rm -rf /var/lib/apt/lists/* \
|
|
||||||
&& mkdir -p /tmp/.X11-unix && chmod 1777 /tmp/.X11-unix
|
|
||||||
|
|
||||||
# Expose backend port
|
# Expose backend port
|
||||||
EXPOSE ${BACKEND_PORT:-8080}
|
EXPOSE ${BACKEND_PORT:-8080}
|
||||||
|
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ WORKDIR /app
|
|||||||
COPY browser/package*.json ./
|
COPY browser/package*.json ./
|
||||||
|
|
||||||
# Install dependencies
|
# Install dependencies
|
||||||
RUN npm ci
|
RUN npm install
|
||||||
|
|
||||||
# Copy TypeScript source and config
|
# Copy TypeScript source and config
|
||||||
COPY browser/server.ts ./
|
COPY browser/server.ts ./
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ let browserServer: BrowserServer | null = null;
|
|||||||
// Configurable ports with defaults
|
// Configurable ports with defaults
|
||||||
const BROWSER_WS_PORT = parseInt(process.env.BROWSER_WS_PORT || '3001', 10);
|
const BROWSER_WS_PORT = parseInt(process.env.BROWSER_WS_PORT || '3001', 10);
|
||||||
const BROWSER_HEALTH_PORT = parseInt(process.env.BROWSER_HEALTH_PORT || '3002', 10);
|
const BROWSER_HEALTH_PORT = parseInt(process.env.BROWSER_HEALTH_PORT || '3002', 10);
|
||||||
|
const BROWSER_WS_HOST = process.env.BROWSER_WS_HOST || 'localhost';
|
||||||
|
|
||||||
async function start(): Promise<void> {
|
async function start(): Promise<void> {
|
||||||
console.log('Starting Maxun Browser Service...');
|
console.log('Starting Maxun Browser Service...');
|
||||||
@@ -44,17 +45,19 @@ async function start(): Promise<void> {
|
|||||||
// Health check HTTP server
|
// Health check HTTP server
|
||||||
const healthServer = http.createServer((req, res) => {
|
const healthServer = http.createServer((req, res) => {
|
||||||
if (req.url === '/health') {
|
if (req.url === '/health') {
|
||||||
|
const wsEndpoint = browserServer?.wsEndpoint().replace('localhost', BROWSER_WS_HOST) || '';
|
||||||
res.writeHead(200, { 'Content-Type': 'application/json' });
|
res.writeHead(200, { 'Content-Type': 'application/json' });
|
||||||
res.end(JSON.stringify({
|
res.end(JSON.stringify({
|
||||||
status: 'healthy',
|
status: 'healthy',
|
||||||
wsEndpoint: browserServer?.wsEndpoint(),
|
wsEndpoint,
|
||||||
wsPort: BROWSER_WS_PORT,
|
wsPort: BROWSER_WS_PORT,
|
||||||
healthPort: BROWSER_HEALTH_PORT,
|
healthPort: BROWSER_HEALTH_PORT,
|
||||||
timestamp: new Date().toISOString()
|
timestamp: new Date().toISOString()
|
||||||
}));
|
}));
|
||||||
} else if (req.url === '/') {
|
} else if (req.url === '/') {
|
||||||
res.writeHead(200, { 'Content-Type': 'text/plain' });
|
res.writeHead(200, { 'Content-Type': 'text/plain' });
|
||||||
res.end(`Maxun Browser Service\nWebSocket: ${browserServer?.wsEndpoint()}\nHealth: http://localhost:${BROWSER_HEALTH_PORT}/health`);
|
const wsEndpoint = browserServer?.wsEndpoint().replace('localhost', BROWSER_WS_HOST) || '';
|
||||||
|
res.end(`Maxun Browser Service\nWebSocket: ${wsEndpoint}\nHealth: http://localhost:${BROWSER_HEALTH_PORT}/health`);
|
||||||
} else {
|
} else {
|
||||||
res.writeHead(404);
|
res.writeHead(404);
|
||||||
res.end('Not Found');
|
res.end('Not Found');
|
||||||
|
|||||||
@@ -30,9 +30,9 @@ services:
|
|||||||
- minio_data:/data
|
- minio_data:/data
|
||||||
|
|
||||||
backend:
|
backend:
|
||||||
#build:
|
# build:
|
||||||
#context: .
|
# context: .
|
||||||
#dockerfile: server/Dockerfile
|
# dockerfile: Dockerfile.backend
|
||||||
image: getmaxun/maxun-backend:latest
|
image: getmaxun/maxun-backend:latest
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
ports:
|
ports:
|
||||||
@@ -60,9 +60,9 @@ services:
|
|||||||
- /var/run/dbus:/var/run/dbus
|
- /var/run/dbus:/var/run/dbus
|
||||||
|
|
||||||
frontend:
|
frontend:
|
||||||
#build:
|
# build:
|
||||||
#context: .
|
# context: .
|
||||||
#dockerfile: Dockerfile
|
# dockerfile: Dockerfile.frontend
|
||||||
image: getmaxun/maxun-frontend:latest
|
image: getmaxun/maxun-frontend:latest
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
ports:
|
ports:
|
||||||
@@ -89,6 +89,8 @@ services:
|
|||||||
- DEBUG=pw:browser*
|
- DEBUG=pw:browser*
|
||||||
- BROWSER_WS_PORT=${BROWSER_WS_PORT:-3001}
|
- BROWSER_WS_PORT=${BROWSER_WS_PORT:-3001}
|
||||||
- BROWSER_HEALTH_PORT=${BROWSER_HEALTH_PORT:-3002}
|
- BROWSER_HEALTH_PORT=${BROWSER_HEALTH_PORT:-3002}
|
||||||
|
- BROWSER_WS_HOST=${BROWSER_WS_HOST:-browser}
|
||||||
|
- PLAYWRIGHT_BROWSERS_PATH=/ms-playwright
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
healthcheck:
|
healthcheck:
|
||||||
test: ["CMD", "curl", "-f", "http://localhost:${BROWSER_HEALTH_PORT:-3002}/health"]
|
test: ["CMD", "curl", "-f", "http://localhost:${BROWSER_HEALTH_PORT:-3002}/health"]
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "maxun-core",
|
"name": "maxun-core",
|
||||||
"version": "0.0.27",
|
"version": "0.0.28",
|
||||||
"description": "Core package for Maxun, responsible for data extraction",
|
"description": "Core package for Maxun, responsible for data extraction",
|
||||||
"main": "build/index.js",
|
"main": "build/index.js",
|
||||||
"typings": "build/index.d.ts",
|
"typings": "build/index.d.ts",
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{
|
{
|
||||||
"name": "maxun",
|
"name": "maxun",
|
||||||
"version": "0.0.27",
|
"version": "0.0.28",
|
||||||
"author": "Maxun",
|
"author": "Maxun",
|
||||||
"license": "AGPL-3.0-or-later",
|
"license": "AGPL-3.0-or-later",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
@@ -52,7 +52,7 @@
|
|||||||
"lodash": "^4.17.21",
|
"lodash": "^4.17.21",
|
||||||
"loglevel": "^1.8.0",
|
"loglevel": "^1.8.0",
|
||||||
"loglevel-plugin-remote": "^0.6.8",
|
"loglevel-plugin-remote": "^0.6.8",
|
||||||
"maxun-core": "^0.0.27",
|
"maxun-core": "^0.0.28",
|
||||||
"minio": "^8.0.1",
|
"minio": "^8.0.1",
|
||||||
"moment-timezone": "^0.5.45",
|
"moment-timezone": "^0.5.45",
|
||||||
"node-cron": "^3.0.3",
|
"node-cron": "^3.0.3",
|
||||||
@@ -131,4 +131,4 @@
|
|||||||
"vite": "^5.4.10",
|
"vite": "^5.4.10",
|
||||||
"zod": "^3.25.62"
|
"zod": "^3.25.62"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -11,7 +11,7 @@ import { io, Socket } from "socket.io-client";
|
|||||||
import { BinaryOutputService } from "../storage/mino";
|
import { BinaryOutputService } from "../storage/mino";
|
||||||
import { AuthenticatedRequest } from "../routes/record"
|
import { AuthenticatedRequest } from "../routes/record"
|
||||||
import {capture} from "../utils/analytics";
|
import {capture} from "../utils/analytics";
|
||||||
import { Page } from "playwright";
|
import { Page } from "playwright-core";
|
||||||
import { WorkflowFile } from "maxun-core";
|
import { WorkflowFile } from "maxun-core";
|
||||||
import { addGoogleSheetUpdateTask, googleSheetUpdateTasks, processGoogleSheetUpdates } from "../workflow-management/integrations/gsheet";
|
import { addGoogleSheetUpdateTask, googleSheetUpdateTasks, processGoogleSheetUpdates } from "../workflow-management/integrations/gsheet";
|
||||||
import { addAirtableUpdateTask, airtableUpdateTasks, processAirtableUpdates } from "../workflow-management/integrations/airtable";
|
import { addAirtableUpdateTask, airtableUpdateTasks, processAirtableUpdates } from "../workflow-management/integrations/airtable";
|
||||||
|
|||||||
@@ -550,9 +550,9 @@ export class RemoteBrowser {
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
const blocker = await PlaywrightBlocker.fromLists(fetch, ['https://easylist.to/easylist/easylist.txt']);
|
const blocker = await PlaywrightBlocker.fromLists(fetch, ['https://easylist.to/easylist/easylist.txt']);
|
||||||
await blocker.enableBlockingInPage(this.currentPage);
|
await blocker.enableBlockingInPage(this.currentPage as any);
|
||||||
this.client = await this.currentPage.context().newCDPSession(this.currentPage);
|
this.client = await this.currentPage.context().newCDPSession(this.currentPage);
|
||||||
await blocker.disableBlockingInPage(this.currentPage);
|
await blocker.disableBlockingInPage(this.currentPage as any);
|
||||||
console.log('Adblocker initialized');
|
console.log('Adblocker initialized');
|
||||||
} catch (error: any) {
|
} catch (error: any) {
|
||||||
console.warn('Failed to initialize adblocker, continuing without it:', error.message);
|
console.warn('Failed to initialize adblocker, continuing without it:', error.message);
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
import { connectToRemoteBrowser } from "../browser-management/browserConnection";
|
import { Page } from "playwright-core";
|
||||||
import { parseMarkdown } from "./markdown";
|
import { parseMarkdown } from "./markdown";
|
||||||
import logger from "../logger";
|
import logger from "../logger";
|
||||||
|
|
||||||
@@ -21,115 +21,105 @@ async function gotoWithFallback(page: any, url: string) {
|
|||||||
* Fetches a webpage, strips scripts/styles/images/etc,
|
* Fetches a webpage, strips scripts/styles/images/etc,
|
||||||
* returns clean Markdown using parser.
|
* returns clean Markdown using parser.
|
||||||
* @param url - The URL to convert
|
* @param url - The URL to convert
|
||||||
* @param existingPage - Optional existing Playwright page instance to reuse
|
* @param page - Existing Playwright page instance to use
|
||||||
*/
|
*/
|
||||||
export async function convertPageToMarkdown(url: string): Promise<string> {
|
export async function convertPageToMarkdown(url: string, page: Page): Promise<string> {
|
||||||
const browser = await connectToRemoteBrowser();
|
try {
|
||||||
const page = await browser.newPage();
|
logger.log('info', `[Scrape] Using existing page instance for markdown conversion of ${url}`);
|
||||||
|
|
||||||
await page.goto(url, { waitUntil: "networkidle", timeout: 100000 });
|
await gotoWithFallback(page, url);
|
||||||
|
|
||||||
const cleanedHtml = await page.evaluate(() => {
|
const cleanedHtml = await page.evaluate(() => {
|
||||||
const selectors = [
|
const selectors = [
|
||||||
"script",
|
"script",
|
||||||
"style",
|
"style",
|
||||||
"link[rel='stylesheet']",
|
"link[rel='stylesheet']",
|
||||||
"noscript",
|
"noscript",
|
||||||
"meta",
|
"meta",
|
||||||
"svg",
|
"svg",
|
||||||
"img",
|
"img",
|
||||||
"picture",
|
"picture",
|
||||||
"source",
|
"source",
|
||||||
"video",
|
"video",
|
||||||
"audio",
|
"audio",
|
||||||
"iframe",
|
"iframe",
|
||||||
"object",
|
"object",
|
||||||
"embed"
|
"embed"
|
||||||
];
|
];
|
||||||
|
|
||||||
selectors.forEach(sel => {
|
selectors.forEach(sel => {
|
||||||
document.querySelectorAll(sel).forEach(e => e.remove());
|
document.querySelectorAll(sel).forEach(e => e.remove());
|
||||||
});
|
|
||||||
|
|
||||||
// Remove inline event handlers (onclick, onload…)
|
|
||||||
const all = document.querySelectorAll("*");
|
|
||||||
all.forEach(el => {
|
|
||||||
[...el.attributes].forEach(attr => {
|
|
||||||
if (attr.name.startsWith("on")) {
|
|
||||||
el.removeAttribute(attr.name);
|
|
||||||
}
|
|
||||||
});
|
});
|
||||||
|
|
||||||
|
const all = document.querySelectorAll("*");
|
||||||
|
all.forEach(el => {
|
||||||
|
[...el.attributes].forEach(attr => {
|
||||||
|
if (attr.name.startsWith("on")) {
|
||||||
|
el.removeAttribute(attr.name);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
return document.documentElement.outerHTML;
|
||||||
});
|
});
|
||||||
|
|
||||||
return document.documentElement.outerHTML;
|
const markdown = await parseMarkdown(cleanedHtml, url);
|
||||||
});
|
return markdown;
|
||||||
|
} catch (error: any) {
|
||||||
if (shouldCloseBrowser && browser) {
|
logger.error(`[Scrape] Error during markdown conversion: ${error.message}`);
|
||||||
logger.log('info', `[Scrape] Closing browser instance created for markdown conversion`);
|
throw error;
|
||||||
await browser.close();
|
|
||||||
} else {
|
|
||||||
logger.log('info', `[Scrape] Keeping existing browser instance open after markdown conversion`);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Convert cleaned HTML → Markdown
|
|
||||||
const markdown = await parseMarkdown(cleanedHtml, url);
|
|
||||||
return markdown;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Fetches a webpage, strips scripts/styles/images/etc,
|
* Fetches a webpage, strips scripts/styles/images/etc,
|
||||||
* returns clean HTML.
|
* returns clean HTML.
|
||||||
* @param url - The URL to convert
|
* @param url - The URL to convert
|
||||||
* @param existingPage - Optional existing Playwright page instance to reuse
|
* @param page - Existing Playwright page instance to use
|
||||||
*/
|
*/
|
||||||
export async function convertPageToHTML(url: string): Promise<string> {
|
export async function convertPageToHTML(url: string, page: Page): Promise<string> {
|
||||||
const browser = await connectToRemoteBrowser();
|
try {
|
||||||
const page = await browser.newPage();
|
logger.log('info', `[Scrape] Using existing page instance for HTML conversion of ${url}`);
|
||||||
|
|
||||||
await page.goto(url, { waitUntil: "networkidle", timeout: 100000 });
|
await gotoWithFallback(page, url);
|
||||||
|
|
||||||
const cleanedHtml = await page.evaluate(() => {
|
const cleanedHtml = await page.evaluate(() => {
|
||||||
const selectors = [
|
const selectors = [
|
||||||
"script",
|
"script",
|
||||||
"style",
|
"style",
|
||||||
"link[rel='stylesheet']",
|
"link[rel='stylesheet']",
|
||||||
"noscript",
|
"noscript",
|
||||||
"meta",
|
"meta",
|
||||||
"svg",
|
"svg",
|
||||||
"img",
|
"img",
|
||||||
"picture",
|
"picture",
|
||||||
"source",
|
"source",
|
||||||
"video",
|
"video",
|
||||||
"audio",
|
"audio",
|
||||||
"iframe",
|
"iframe",
|
||||||
"object",
|
"object",
|
||||||
"embed"
|
"embed"
|
||||||
];
|
];
|
||||||
|
|
||||||
selectors.forEach(sel => {
|
selectors.forEach(sel => {
|
||||||
document.querySelectorAll(sel).forEach(e => e.remove());
|
document.querySelectorAll(sel).forEach(e => e.remove());
|
||||||
});
|
|
||||||
|
|
||||||
// Remove inline event handlers (onclick, onload…)
|
|
||||||
const all = document.querySelectorAll("*");
|
|
||||||
all.forEach(el => {
|
|
||||||
[...el.attributes].forEach(attr => {
|
|
||||||
if (attr.name.startsWith("on")) {
|
|
||||||
el.removeAttribute(attr.name);
|
|
||||||
}
|
|
||||||
});
|
});
|
||||||
|
|
||||||
|
const all = document.querySelectorAll("*");
|
||||||
|
all.forEach(el => {
|
||||||
|
[...el.attributes].forEach(attr => {
|
||||||
|
if (attr.name.startsWith("on")) {
|
||||||
|
el.removeAttribute(attr.name);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
return document.documentElement.outerHTML;
|
||||||
});
|
});
|
||||||
|
|
||||||
return document.documentElement.outerHTML;
|
return cleanedHtml;
|
||||||
});
|
} catch (error: any) {
|
||||||
|
logger.error(`[Scrape] Error during HTML conversion: ${error.message}`);
|
||||||
if (shouldCloseBrowser && browser) {
|
throw error;
|
||||||
logger.log('info', `[Scrape] Closing browser instance created for HTML conversion`);
|
|
||||||
await browser.close();
|
|
||||||
} else {
|
|
||||||
logger.log('info', `[Scrape] Keeping existing browser instance open after HTML conversion`);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Return cleaned HTML directly
|
|
||||||
return cleanedHtml;
|
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,7 +2,6 @@ import { router as record } from './record';
|
|||||||
import { router as workflow } from './workflow';
|
import { router as workflow } from './workflow';
|
||||||
import { router as storage } from './storage';
|
import { router as storage } from './storage';
|
||||||
import { router as auth } from './auth';
|
import { router as auth } from './auth';
|
||||||
import { router as integration } from './integration';
|
|
||||||
import { router as proxy } from './proxy';
|
import { router as proxy } from './proxy';
|
||||||
import { router as webhook } from './webhook';
|
import { router as webhook } from './webhook';
|
||||||
|
|
||||||
@@ -11,7 +10,6 @@ export {
|
|||||||
workflow,
|
workflow,
|
||||||
storage,
|
storage,
|
||||||
auth,
|
auth,
|
||||||
integration,
|
|
||||||
proxy,
|
proxy,
|
||||||
webhook
|
webhook
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -15,7 +15,6 @@ import { encrypt, decrypt } from '../utils/auth';
|
|||||||
import { WorkflowFile } from 'maxun-core';
|
import { WorkflowFile } from 'maxun-core';
|
||||||
import { cancelScheduledWorkflow, scheduleWorkflow } from '../storage/schedule';
|
import { cancelScheduledWorkflow, scheduleWorkflow } from '../storage/schedule';
|
||||||
import { pgBossClient } from '../storage/pgboss';
|
import { pgBossClient } from '../storage/pgboss';
|
||||||
chromium.use(stealthPlugin());
|
|
||||||
|
|
||||||
export const router = Router();
|
export const router = Router();
|
||||||
|
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ import { Server } from "socket.io";
|
|||||||
import cors from 'cors';
|
import cors from 'cors';
|
||||||
import dotenv from 'dotenv';
|
import dotenv from 'dotenv';
|
||||||
dotenv.config();
|
dotenv.config();
|
||||||
import { record, workflow, storage, auth, integration, proxy, webhook } from './routes';
|
import { record, workflow, storage, auth, proxy, webhook } from './routes';
|
||||||
import { BrowserPool } from "./browser-management/classes/BrowserPool";
|
import { BrowserPool } from "./browser-management/classes/BrowserPool";
|
||||||
import logger from './logger';
|
import logger from './logger';
|
||||||
import sequelize, { connectDB, syncDB } from './storage/db'
|
import sequelize, { connectDB, syncDB } from './storage/db'
|
||||||
@@ -107,7 +107,6 @@ app.use('/record', record);
|
|||||||
app.use('/workflow', workflow);
|
app.use('/workflow', workflow);
|
||||||
app.use('/storage', storage);
|
app.use('/storage', storage);
|
||||||
app.use('/auth', auth);
|
app.use('/auth', auth);
|
||||||
app.use('/integration', integration);
|
|
||||||
app.use('/proxy', proxy);
|
app.use('/proxy', proxy);
|
||||||
app.use('/api-docs', swaggerUi.serve, swaggerUi.setup(swaggerSpec));
|
app.use('/api-docs', swaggerUi.serve, swaggerUi.setup(swaggerSpec));
|
||||||
|
|
||||||
@@ -179,8 +178,6 @@ if (require.main === module) {
|
|||||||
|
|
||||||
await startWorkers();
|
await startWorkers();
|
||||||
|
|
||||||
io = new Server(server);
|
|
||||||
|
|
||||||
io.of('/queued-run').on('connection', (socket) => {
|
io.of('/queued-run').on('connection', (socket) => {
|
||||||
const userId = socket.handshake.query.userId as string;
|
const userId = socket.handshake.query.userId as string;
|
||||||
|
|
||||||
|
|||||||
@@ -1686,6 +1686,12 @@ export const BrowserWindow = () => {
|
|||||||
}
|
}
|
||||||
}, [paginationMode, resetPaginationSelector]);
|
}, [paginationMode, resetPaginationSelector]);
|
||||||
|
|
||||||
|
useEffect(() => {
|
||||||
|
if (!paginationMode || !getList) {
|
||||||
|
setHighlighterData(null);
|
||||||
|
}
|
||||||
|
}, [paginationMode, getList]);
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (paginationMode && currentListActionId) {
|
if (paginationMode && currentListActionId) {
|
||||||
const currentListStep = browserSteps.find(
|
const currentListStep = browserSteps.find(
|
||||||
@@ -1841,7 +1847,7 @@ export const BrowserWindow = () => {
|
|||||||
>
|
>
|
||||||
{/* Individual element highlight (for non-group or hovered element) */}
|
{/* Individual element highlight (for non-group or hovered element) */}
|
||||||
{((getText && !listSelector) ||
|
{((getText && !listSelector) ||
|
||||||
(getList && paginationMode && paginationType !== "" &&
|
(getList && paginationMode && !paginationSelector && paginationType !== "" &&
|
||||||
!["none", "scrollDown", "scrollUp"].includes(paginationType))) && (
|
!["none", "scrollDown", "scrollUp"].includes(paginationType))) && (
|
||||||
<div
|
<div
|
||||||
style={{
|
style={{
|
||||||
@@ -1909,6 +1915,7 @@ export const BrowserWindow = () => {
|
|||||||
listSelector &&
|
listSelector &&
|
||||||
!paginationMode &&
|
!paginationMode &&
|
||||||
!limitMode &&
|
!limitMode &&
|
||||||
|
captureStage === 'initial' &&
|
||||||
highlighterData.similarElements?.rects?.map((rect, index) => (
|
highlighterData.similarElements?.rects?.map((rect, index) => (
|
||||||
<React.Fragment key={`item-${index}`}>
|
<React.Fragment key={`item-${index}`}>
|
||||||
<div
|
<div
|
||||||
|
|||||||
@@ -268,6 +268,7 @@ export const DOMBrowserRenderer: React.FC<RRWebDOMBrowserRendererProps> = ({
|
|||||||
shouldHighlight = false;
|
shouldHighlight = false;
|
||||||
} else if (
|
} else if (
|
||||||
paginationMode &&
|
paginationMode &&
|
||||||
|
!paginationSelector &&
|
||||||
paginationType !== "" &&
|
paginationType !== "" &&
|
||||||
!["none", "scrollDown", "scrollUp"].includes(paginationType)
|
!["none", "scrollDown", "scrollUp"].includes(paginationType)
|
||||||
) {
|
) {
|
||||||
@@ -353,7 +354,7 @@ export const DOMBrowserRenderer: React.FC<RRWebDOMBrowserRendererProps> = ({
|
|||||||
const options: boolean | AddEventListenerOptions = ['wheel', 'touchstart', 'touchmove'].includes(event)
|
const options: boolean | AddEventListenerOptions = ['wheel', 'touchstart', 'touchmove'].includes(event)
|
||||||
? { passive: false }
|
? { passive: false }
|
||||||
: false;
|
: false;
|
||||||
iframeDoc.removeEventListener(event, handler as EventListener, options);
|
iframeDoc.removeEventListener(event, handler as EventListener, options);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -588,7 +589,7 @@ export const DOMBrowserRenderer: React.FC<RRWebDOMBrowserRendererProps> = ({
|
|||||||
const elementRect = element.getBoundingClientRect();
|
const elementRect = element.getBoundingClientRect();
|
||||||
const relativeX = iframeX - elementRect.left;
|
const relativeX = iframeX - elementRect.left;
|
||||||
const relativeY = iframeY - elementRect.top;
|
const relativeY = iframeY - elementRect.top;
|
||||||
|
|
||||||
socket.emit("dom:click", {
|
socket.emit("dom:click", {
|
||||||
selector,
|
selector,
|
||||||
url: snapshot.baseUrl,
|
url: snapshot.baseUrl,
|
||||||
@@ -636,7 +637,7 @@ export const DOMBrowserRenderer: React.FC<RRWebDOMBrowserRendererProps> = ({
|
|||||||
if (iframe) {
|
if (iframe) {
|
||||||
const focusedElement = iframeDoc.activeElement as HTMLElement;
|
const focusedElement = iframeDoc.activeElement as HTMLElement;
|
||||||
let coordinates = { x: 0, y: 0 };
|
let coordinates = { x: 0, y: 0 };
|
||||||
|
|
||||||
if (focusedElement && focusedElement !== iframeDoc.body) {
|
if (focusedElement && focusedElement !== iframeDoc.body) {
|
||||||
// Get coordinates from the focused element
|
// Get coordinates from the focused element
|
||||||
const rect = focusedElement.getBoundingClientRect();
|
const rect = focusedElement.getBoundingClientRect();
|
||||||
|
|||||||
@@ -415,6 +415,46 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
|
|||||||
}, [stopGetList, resetListState]);
|
}, [stopGetList, resetListState]);
|
||||||
|
|
||||||
const stopCaptureAndEmitGetListSettings = useCallback(() => {
|
const stopCaptureAndEmitGetListSettings = useCallback(() => {
|
||||||
|
if (autoDetectedPagination?.selector) {
|
||||||
|
const iframeElement = document.querySelector('#browser-window iframe') as HTMLIFrameElement;
|
||||||
|
if (iframeElement?.contentDocument) {
|
||||||
|
try {
|
||||||
|
function evaluateSelector(selector: string, doc: Document): Element[] {
|
||||||
|
if (selector.startsWith('//') || selector.startsWith('(//')) {
|
||||||
|
try {
|
||||||
|
const result = doc.evaluate(selector, doc, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
||||||
|
const elements: Element[] = [];
|
||||||
|
for (let i = 0; i < result.snapshotLength; i++) {
|
||||||
|
const node = result.snapshotItem(i);
|
||||||
|
if (node && node.nodeType === Node.ELEMENT_NODE) {
|
||||||
|
elements.push(node as Element);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return elements;
|
||||||
|
} catch (err) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
try {
|
||||||
|
return Array.from(doc.querySelectorAll(selector));
|
||||||
|
} catch (err) {
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const elements = evaluateSelector(autoDetectedPagination.selector, iframeElement.contentDocument);
|
||||||
|
elements.forEach((el: Element) => {
|
||||||
|
(el as HTMLElement).style.outline = '';
|
||||||
|
(el as HTMLElement).style.outlineOffset = '';
|
||||||
|
(el as HTMLElement).style.zIndex = '';
|
||||||
|
});
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error removing pagination highlight on completion:', error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const latestListStep = getLatestListStep(browserSteps);
|
const latestListStep = getLatestListStep(browserSteps);
|
||||||
if (latestListStep) {
|
if (latestListStep) {
|
||||||
extractDataClientSide(latestListStep.listSelector!, latestListStep.fields, latestListStep.id);
|
extractDataClientSide(latestListStep.listSelector!, latestListStep.fields, latestListStep.id);
|
||||||
@@ -423,7 +463,7 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
|
|||||||
...currentWorkflowActionsState,
|
...currentWorkflowActionsState,
|
||||||
hasScrapeListAction: true
|
hasScrapeListAction: true
|
||||||
});
|
});
|
||||||
|
|
||||||
emitActionForStep(latestListStep);
|
emitActionForStep(latestListStep);
|
||||||
|
|
||||||
handleStopGetList();
|
handleStopGetList();
|
||||||
@@ -441,7 +481,7 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
|
|||||||
onFinishCapture();
|
onFinishCapture();
|
||||||
clientSelectorGenerator.cleanup();
|
clientSelectorGenerator.cleanup();
|
||||||
}
|
}
|
||||||
}, [socket, notify, handleStopGetList, resetInterpretationLog, finishAction, onFinishCapture, t, browserSteps, extractDataClientSide, setCurrentWorkflowActionsState, currentWorkflowActionsState, emitActionForStep]);
|
}, [socket, notify, handleStopGetList, resetInterpretationLog, finishAction, onFinishCapture, t, browserSteps, extractDataClientSide, setCurrentWorkflowActionsState, currentWorkflowActionsState, emitActionForStep, autoDetectedPagination]);
|
||||||
|
|
||||||
const getLatestListStep = (steps: BrowserStep[]) => {
|
const getLatestListStep = (steps: BrowserStep[]) => {
|
||||||
const listSteps = steps.filter(step => step.type === 'list');
|
const listSteps = steps.filter(step => step.type === 'list');
|
||||||
|
|||||||
Reference in New Issue
Block a user