feat: add separate browser service
This commit is contained in:
@@ -18,9 +18,6 @@ COPY server/tsconfig.json ./server/
|
|||||||
# Install dependencies
|
# Install dependencies
|
||||||
RUN npm install --legacy-peer-deps
|
RUN npm install --legacy-peer-deps
|
||||||
|
|
||||||
# Install Playwright browsers and dependencies
|
|
||||||
RUN npx playwright install --with-deps chromium
|
|
||||||
|
|
||||||
# Create the Chromium data directory with necessary permissions
|
# Create the Chromium data directory with necessary permissions
|
||||||
RUN mkdir -p /tmp/chromium-data-dir && \
|
RUN mkdir -p /tmp/chromium-data-dir && \
|
||||||
chmod -R 777 /tmp/chromium-data-dir
|
chmod -R 777 /tmp/chromium-data-dir
|
||||||
|
|||||||
@@ -38,3 +38,8 @@ AIRTABLE_REDIRECT_URI=http://localhost:8080/auth/airtable/callback
|
|||||||
|
|
||||||
# Telemetry Settings - Please keep it enabled. Keeping it enabled helps us understand how the product is used and assess the impact of any new changes.
|
# Telemetry Settings - Please keep it enabled. Keeping it enabled helps us understand how the product is used and assess the impact of any new changes.
|
||||||
MAXUN_TELEMETRY=true
|
MAXUN_TELEMETRY=true
|
||||||
|
|
||||||
|
# WebSocket port for browser CDP connections
|
||||||
|
BROWSER_WS_PORT=3001
|
||||||
|
BROWSER_HEALTH_PORT=3002
|
||||||
|
BROWSER_WS_HOST=browser
|
||||||
9
browser/.dockerignore
Normal file
9
browser/.dockerignore
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
node_modules
|
||||||
|
npm-debug.log
|
||||||
|
.env
|
||||||
|
.git
|
||||||
|
.gitignore
|
||||||
|
dist
|
||||||
|
*.ts
|
||||||
|
!*.d.ts
|
||||||
|
tsconfig.json
|
||||||
30
browser/Dockerfile
Normal file
30
browser/Dockerfile
Normal file
@@ -0,0 +1,30 @@
|
|||||||
|
FROM mcr.microsoft.com/playwright:v1.57.0-jammy
|
||||||
|
|
||||||
|
WORKDIR /app
|
||||||
|
|
||||||
|
# Copy package files
|
||||||
|
COPY browser/package*.json ./
|
||||||
|
|
||||||
|
# Install dependencies
|
||||||
|
RUN npm ci
|
||||||
|
|
||||||
|
# Copy TypeScript source and config
|
||||||
|
COPY browser/server.ts ./
|
||||||
|
COPY browser/tsconfig.json ./
|
||||||
|
|
||||||
|
# Build TypeScript
|
||||||
|
RUN npm run build
|
||||||
|
|
||||||
|
# Accept build arguments for ports (with defaults)
|
||||||
|
ARG BROWSER_WS_PORT=3001
|
||||||
|
ARG BROWSER_HEALTH_PORT=3002
|
||||||
|
|
||||||
|
# Set as environment variables
|
||||||
|
ENV BROWSER_WS_PORT=${BROWSER_WS_PORT}
|
||||||
|
ENV BROWSER_HEALTH_PORT=${BROWSER_HEALTH_PORT}
|
||||||
|
|
||||||
|
# Expose ports dynamically based on build args
|
||||||
|
EXPOSE ${BROWSER_WS_PORT} ${BROWSER_HEALTH_PORT}
|
||||||
|
|
||||||
|
# Start the browser service (run compiled JS)
|
||||||
|
CMD ["node", "dist/server.js"]
|
||||||
21
browser/package.json
Normal file
21
browser/package.json
Normal file
@@ -0,0 +1,21 @@
|
|||||||
|
{
|
||||||
|
"name": "maxun-browser-service",
|
||||||
|
"version": "1.0.0",
|
||||||
|
"description": "Browser service that exposes Playwright browsers via WebSocket with stealth plugins",
|
||||||
|
"main": "dist/server.js",
|
||||||
|
"scripts": {
|
||||||
|
"build": "tsc",
|
||||||
|
"start": "node dist/server.js",
|
||||||
|
"dev": "ts-node server.ts"
|
||||||
|
},
|
||||||
|
"dependencies": {
|
||||||
|
"playwright": "1.57.0",
|
||||||
|
"playwright-extra": "^4.3.6",
|
||||||
|
"puppeteer-extra-plugin-stealth": "^2.11.2"
|
||||||
|
},
|
||||||
|
"devDependencies": {
|
||||||
|
"@types/node": "^22.7.9",
|
||||||
|
"typescript": "^5.0.0",
|
||||||
|
"ts-node": "^10.9.2"
|
||||||
|
}
|
||||||
|
}
|
||||||
92
browser/server.ts
Normal file
92
browser/server.ts
Normal file
@@ -0,0 +1,92 @@
|
|||||||
|
import { chromium } from 'playwright-extra';
|
||||||
|
import stealthPlugin from 'puppeteer-extra-plugin-stealth';
|
||||||
|
import http from 'http';
|
||||||
|
import type { BrowserServer } from 'playwright';
|
||||||
|
|
||||||
|
// Apply stealth plugin to chromium
|
||||||
|
chromium.use(stealthPlugin());
|
||||||
|
|
||||||
|
let browserServer: BrowserServer | null = null;
|
||||||
|
|
||||||
|
// Configurable ports with defaults
|
||||||
|
const BROWSER_WS_PORT = parseInt(process.env.BROWSER_WS_PORT || '3001', 10);
|
||||||
|
const BROWSER_HEALTH_PORT = parseInt(process.env.BROWSER_HEALTH_PORT || '3002', 10);
|
||||||
|
|
||||||
|
async function start(): Promise<void> {
|
||||||
|
console.log('Starting Maxun Browser Service...');
|
||||||
|
console.log(`WebSocket port: ${BROWSER_WS_PORT}`);
|
||||||
|
console.log(`Health check port: ${BROWSER_HEALTH_PORT}`);
|
||||||
|
|
||||||
|
try {
|
||||||
|
// Launch browser server that exposes WebSocket endpoint
|
||||||
|
browserServer = await chromium.launchServer({
|
||||||
|
headless: true,
|
||||||
|
args: [
|
||||||
|
'--disable-blink-features=AutomationControlled',
|
||||||
|
'--disable-web-security',
|
||||||
|
'--disable-features=IsolateOrigins,site-per-process',
|
||||||
|
'--disable-site-isolation-trials',
|
||||||
|
'--disable-extensions',
|
||||||
|
'--no-sandbox',
|
||||||
|
'--disable-dev-shm-usage',
|
||||||
|
'--disable-gpu',
|
||||||
|
'--force-color-profile=srgb',
|
||||||
|
'--force-device-scale-factor=2',
|
||||||
|
'--ignore-certificate-errors',
|
||||||
|
'--mute-audio'
|
||||||
|
],
|
||||||
|
port: BROWSER_WS_PORT,
|
||||||
|
});
|
||||||
|
|
||||||
|
console.log(`✅ Browser WebSocket endpoint ready: ${browserServer.wsEndpoint()}`);
|
||||||
|
console.log(`✅ Stealth plugin enabled`);
|
||||||
|
|
||||||
|
// Health check HTTP server
|
||||||
|
const healthServer = http.createServer((req, res) => {
|
||||||
|
if (req.url === '/health') {
|
||||||
|
res.writeHead(200, { 'Content-Type': 'application/json' });
|
||||||
|
res.end(JSON.stringify({
|
||||||
|
status: 'healthy',
|
||||||
|
wsEndpoint: browserServer?.wsEndpoint(),
|
||||||
|
wsPort: BROWSER_WS_PORT,
|
||||||
|
healthPort: BROWSER_HEALTH_PORT,
|
||||||
|
timestamp: new Date().toISOString()
|
||||||
|
}));
|
||||||
|
} else if (req.url === '/') {
|
||||||
|
res.writeHead(200, { 'Content-Type': 'text/plain' });
|
||||||
|
res.end(`Maxun Browser Service\nWebSocket: ${browserServer?.wsEndpoint()}\nHealth: http://localhost:${BROWSER_HEALTH_PORT}/health`);
|
||||||
|
} else {
|
||||||
|
res.writeHead(404);
|
||||||
|
res.end('Not Found');
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
healthServer.listen(BROWSER_HEALTH_PORT, () => {
|
||||||
|
console.log(`✅ Health check server running on port ${BROWSER_HEALTH_PORT}`);
|
||||||
|
console.log('Browser service is ready to accept connections!');
|
||||||
|
});
|
||||||
|
} catch (error) {
|
||||||
|
console.error('❌ Failed to start browser service:', error);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Graceful shutdown
|
||||||
|
async function shutdown(): Promise<void> {
|
||||||
|
console.log('Shutting down browser service...');
|
||||||
|
if (browserServer) {
|
||||||
|
try {
|
||||||
|
await browserServer.close();
|
||||||
|
console.log('Browser server closed');
|
||||||
|
} catch (error) {
|
||||||
|
console.error('Error closing browser server:', error);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
process.exit(0);
|
||||||
|
}
|
||||||
|
|
||||||
|
process.on('SIGTERM', shutdown);
|
||||||
|
process.on('SIGINT', shutdown);
|
||||||
|
|
||||||
|
// Start the service
|
||||||
|
start().catch(console.error);
|
||||||
24
browser/tsconfig.json
Normal file
24
browser/tsconfig.json
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
{
|
||||||
|
"compilerOptions": {
|
||||||
|
"target": "ES2020",
|
||||||
|
"module": "commonjs",
|
||||||
|
"lib": [
|
||||||
|
"ES2020"
|
||||||
|
],
|
||||||
|
"outDir": "./dist",
|
||||||
|
"rootDir": "./",
|
||||||
|
"strict": true,
|
||||||
|
"esModuleInterop": true,
|
||||||
|
"skipLibCheck": true,
|
||||||
|
"forceConsistentCasingInFileNames": true,
|
||||||
|
"resolveJsonModule": true,
|
||||||
|
"moduleResolution": "node"
|
||||||
|
},
|
||||||
|
"include": [
|
||||||
|
"server.ts"
|
||||||
|
],
|
||||||
|
"exclude": [
|
||||||
|
"node_modules",
|
||||||
|
"dist"
|
||||||
|
]
|
||||||
|
}
|
||||||
@@ -74,6 +74,42 @@ services:
|
|||||||
depends_on:
|
depends_on:
|
||||||
- backend
|
- backend
|
||||||
|
|
||||||
|
browser:
|
||||||
|
build:
|
||||||
|
context: .
|
||||||
|
dockerfile: browser/Dockerfile
|
||||||
|
args:
|
||||||
|
BROWSER_WS_PORT: ${BROWSER_WS_PORT:-3001}
|
||||||
|
BROWSER_HEALTH_PORT: ${BROWSER_HEALTH_PORT:-3002}
|
||||||
|
ports:
|
||||||
|
- "${BROWSER_WS_PORT:-3001}:${BROWSER_WS_PORT:-3001}"
|
||||||
|
- "${BROWSER_HEALTH_PORT:-3002}:${BROWSER_HEALTH_PORT:-3002}"
|
||||||
|
environment:
|
||||||
|
- NODE_ENV=production
|
||||||
|
- DEBUG=pw:browser*
|
||||||
|
- BROWSER_WS_PORT=${BROWSER_WS_PORT:-3001}
|
||||||
|
- BROWSER_HEALTH_PORT=${BROWSER_HEALTH_PORT:-3002}
|
||||||
|
restart: unless-stopped
|
||||||
|
healthcheck:
|
||||||
|
test: ["CMD", "curl", "-f", "http://localhost:${BROWSER_HEALTH_PORT:-3002}/health"]
|
||||||
|
interval: 10s
|
||||||
|
timeout: 5s
|
||||||
|
retries: 3
|
||||||
|
start_period: 10s
|
||||||
|
deploy:
|
||||||
|
resources:
|
||||||
|
limits:
|
||||||
|
memory: 2G
|
||||||
|
cpus: '1.5'
|
||||||
|
reservations:
|
||||||
|
memory: 1G
|
||||||
|
cpus: '1.0'
|
||||||
|
security_opt:
|
||||||
|
- seccomp:unconfined
|
||||||
|
shm_size: 2gb
|
||||||
|
cap_add:
|
||||||
|
- SYS_ADMIN
|
||||||
|
|
||||||
volumes:
|
volumes:
|
||||||
postgres_data:
|
postgres_data:
|
||||||
minio_data:
|
minio_data:
|
||||||
@@ -31,10 +31,10 @@
|
|||||||
"license": "AGPL-3.0-or-later",
|
"license": "AGPL-3.0-or-later",
|
||||||
"dependencies": {
|
"dependencies": {
|
||||||
"@cliqz/adblocker-playwright": "^1.31.3",
|
"@cliqz/adblocker-playwright": "^1.31.3",
|
||||||
|
"@types/node": "22.7.9",
|
||||||
"cross-fetch": "^4.0.0",
|
"cross-fetch": "^4.0.0",
|
||||||
"joi": "^17.6.0",
|
"joi": "^17.6.0",
|
||||||
"playwright": "^1.20.1",
|
"playwright-core": "1.57.0",
|
||||||
"playwright-extra": "^4.3.6",
|
"turndown": "^7.2.2"
|
||||||
"puppeteer-extra-plugin-stealth": "^2.11.2"
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1,5 +1,5 @@
|
|||||||
/* eslint-disable no-await-in-loop, no-restricted-syntax */
|
/* eslint-disable no-await-in-loop, no-restricted-syntax */
|
||||||
import { ElementHandle, Page, PageScreenshotOptions } from 'playwright';
|
import { ElementHandle, Page, PageScreenshotOptions } from 'playwright-core';
|
||||||
import { PlaywrightBlocker } from '@cliqz/adblocker-playwright';
|
import { PlaywrightBlocker } from '@cliqz/adblocker-playwright';
|
||||||
import fetch from 'cross-fetch';
|
import fetch from 'cross-fetch';
|
||||||
import path from 'path';
|
import path from 'path';
|
||||||
@@ -144,7 +144,7 @@ export default class Interpreter extends EventEmitter {
|
|||||||
private async applyAdBlocker(page: Page): Promise<void> {
|
private async applyAdBlocker(page: Page): Promise<void> {
|
||||||
if (this.blocker) {
|
if (this.blocker) {
|
||||||
try {
|
try {
|
||||||
await this.blocker.enableBlockingInPage(page);
|
await this.blocker.enableBlockingInPage(page as any);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
this.log(`Ad-blocker operation failed:`, Level.ERROR);
|
this.log(`Ad-blocker operation failed:`, Level.ERROR);
|
||||||
}
|
}
|
||||||
@@ -154,7 +154,7 @@ export default class Interpreter extends EventEmitter {
|
|||||||
private async disableAdBlocker(page: Page): Promise<void> {
|
private async disableAdBlocker(page: Page): Promise<void> {
|
||||||
if (this.blocker) {
|
if (this.blocker) {
|
||||||
try {
|
try {
|
||||||
await this.blocker.disableBlockingInPage(page);
|
await this.blocker.disableBlockingInPage(page as any);
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
this.log(`Ad-blocker operation failed:`, Level.ERROR);
|
this.log(`Ad-blocker operation failed:`, Level.ERROR);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
import { Page } from 'playwright';
|
import { Page } from 'playwright-core';
|
||||||
import {
|
import {
|
||||||
naryOperators, unaryOperators, operators, meta,
|
naryOperators, unaryOperators, operators, meta,
|
||||||
} from './logic';
|
} from './logic';
|
||||||
|
|||||||
@@ -60,12 +60,8 @@
|
|||||||
"pg": "^8.13.0",
|
"pg": "^8.13.0",
|
||||||
"pg-boss": "^10.1.6",
|
"pg-boss": "^10.1.6",
|
||||||
"pkce-challenge": "^4.1.0",
|
"pkce-challenge": "^4.1.0",
|
||||||
"playwright": "^1.48.2",
|
"playwright-core": "1.57.0",
|
||||||
"playwright-extra": "^4.3.6",
|
|
||||||
"posthog-node": "^4.2.1",
|
"posthog-node": "^4.2.1",
|
||||||
"prismjs": "^1.28.0",
|
|
||||||
"puppeteer-extra-plugin-recaptcha": "^3.6.8",
|
|
||||||
"puppeteer-extra-plugin-stealth": "^2.11.2",
|
|
||||||
"react": "^18.0.0",
|
"react": "^18.0.0",
|
||||||
"react-dom": "^18.0.0",
|
"react-dom": "^18.0.0",
|
||||||
"react-highlight": "0.15.0",
|
"react-highlight": "0.15.0",
|
||||||
|
|||||||
@@ -1,6 +1,4 @@
|
|||||||
import { Router, Request, Response } from 'express';
|
import { Router, Request, Response } from 'express';
|
||||||
import { chromium } from "playwright-extra";
|
|
||||||
import stealthPlugin from 'puppeteer-extra-plugin-stealth';
|
|
||||||
import { requireAPIKey } from "../middlewares/api";
|
import { requireAPIKey } from "../middlewares/api";
|
||||||
import Robot from "../models/Robot";
|
import Robot from "../models/Robot";
|
||||||
import Run from "../models/Run";
|
import Run from "../models/Run";
|
||||||
@@ -20,8 +18,6 @@ import { airtableUpdateTasks, processAirtableUpdates } from "../workflow-managem
|
|||||||
import { sendWebhook } from "../routes/webhook";
|
import { sendWebhook } from "../routes/webhook";
|
||||||
import { convertPageToHTML, convertPageToMarkdown } from '../markdownify/scrape';
|
import { convertPageToHTML, convertPageToMarkdown } from '../markdownify/scrape';
|
||||||
|
|
||||||
chromium.use(stealthPlugin());
|
|
||||||
|
|
||||||
const router = Router();
|
const router = Router();
|
||||||
|
|
||||||
const formatRecording = (recordingData: any) => {
|
const formatRecording = (recordingData: any) => {
|
||||||
|
|||||||
111
server/src/browser-management/browserConnection.ts
Normal file
111
server/src/browser-management/browserConnection.ts
Normal file
@@ -0,0 +1,111 @@
|
|||||||
|
import { chromium } from 'playwright-core';
|
||||||
|
import type { Browser } from 'playwright-core';
|
||||||
|
import logger from '../logger';
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Configuration for connection retry logic
|
||||||
|
*/
|
||||||
|
const CONNECTION_CONFIG = {
|
||||||
|
maxRetries: 3,
|
||||||
|
retryDelay: 2000,
|
||||||
|
connectionTimeout: 30000,
|
||||||
|
};
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get the WebSocket endpoint from the browser service health check
|
||||||
|
* @returns Promise<string> - The WebSocket endpoint URL with browser ID
|
||||||
|
*/
|
||||||
|
async function getBrowserServiceEndpoint(): Promise<string> {
|
||||||
|
const healthPort = process.env.BROWSER_HEALTH_PORT || '3002';
|
||||||
|
const healthHost = process.env.BROWSER_WS_HOST || 'localhost';
|
||||||
|
const healthEndpoint = `http://${healthHost}:${healthPort}/health`;
|
||||||
|
|
||||||
|
try {
|
||||||
|
logger.debug(`Fetching WebSocket endpoint from: ${healthEndpoint}`);
|
||||||
|
const response = await fetch(healthEndpoint);
|
||||||
|
const data = await response.json();
|
||||||
|
|
||||||
|
if (data.status === 'healthy' && data.wsEndpoint) {
|
||||||
|
logger.debug(`Got WebSocket endpoint: ${data.wsEndpoint}`);
|
||||||
|
return data.wsEndpoint;
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new Error('Health check did not return a valid wsEndpoint');
|
||||||
|
} catch (error: any) {
|
||||||
|
logger.error(`Failed to fetch endpoint from health check: ${error.message}`);
|
||||||
|
throw new Error(
|
||||||
|
`Browser service is not accessible at ${healthEndpoint}. ` +
|
||||||
|
`Make sure the browser service is running (docker-compose up browser)`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Connect to the remote browser service with retry logic
|
||||||
|
* @param retries - Number of connection attempts (default: 3)
|
||||||
|
* @returns Promise<Browser> - Connected browser instance
|
||||||
|
* @throws Error if connection fails after all retries
|
||||||
|
*/
|
||||||
|
export async function connectToRemoteBrowser(retries?: number): Promise<Browser> {
|
||||||
|
const maxRetries = retries ?? CONNECTION_CONFIG.maxRetries;
|
||||||
|
const wsEndpoint = await getBrowserServiceEndpoint();
|
||||||
|
|
||||||
|
logger.info(`Connecting to browser service at ${wsEndpoint}...`);
|
||||||
|
|
||||||
|
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
||||||
|
try {
|
||||||
|
logger.debug(`Connection attempt ${attempt}/${maxRetries}`);
|
||||||
|
|
||||||
|
const browser = await chromium.connect(wsEndpoint, {
|
||||||
|
timeout: CONNECTION_CONFIG.connectionTimeout,
|
||||||
|
});
|
||||||
|
|
||||||
|
logger.info(`Successfully connected to browser service`);
|
||||||
|
return browser;
|
||||||
|
} catch (error: any) {
|
||||||
|
logger.warn(
|
||||||
|
`Connection attempt ${attempt}/${maxRetries} failed: ${error.message}`
|
||||||
|
);
|
||||||
|
|
||||||
|
if (attempt === maxRetries) {
|
||||||
|
logger.error(
|
||||||
|
`Failed to connect to browser service after ${maxRetries} attempts`
|
||||||
|
);
|
||||||
|
throw new Error(
|
||||||
|
`Failed to connect to browser service at ${wsEndpoint}: ${error.message}`
|
||||||
|
);
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.debug(`Waiting ${CONNECTION_CONFIG.retryDelay}ms before retry...`);
|
||||||
|
await new Promise(resolve => setTimeout(resolve, CONNECTION_CONFIG.retryDelay));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
throw new Error('Failed to connect to browser service');
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Check if browser service is healthy
|
||||||
|
* @returns Promise<boolean> - true if service is healthy
|
||||||
|
*/
|
||||||
|
export async function checkBrowserServiceHealth(): Promise<boolean> {
|
||||||
|
try {
|
||||||
|
const healthPort = process.env.BROWSER_HEALTH_PORT || '3002';
|
||||||
|
const healthHost = process.env.BROWSER_WS_HOST || 'localhost';
|
||||||
|
const healthEndpoint = `http://${healthHost}:${healthPort}/health`;
|
||||||
|
|
||||||
|
const response = await fetch(healthEndpoint);
|
||||||
|
const data = await response.json();
|
||||||
|
|
||||||
|
if (data.status === 'healthy') {
|
||||||
|
logger.info('Browser service health check passed');
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.warn('Browser service health check failed:', data);
|
||||||
|
return false;
|
||||||
|
} catch (error: any) {
|
||||||
|
logger.error('Browser service health check error:', error.message);
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -2,11 +2,9 @@ import {
|
|||||||
Page,
|
Page,
|
||||||
Browser,
|
Browser,
|
||||||
CDPSession,
|
CDPSession,
|
||||||
BrowserContext,
|
BrowserContext
|
||||||
} from 'playwright';
|
} from 'playwright-core';
|
||||||
import { Socket } from "socket.io";
|
import { Socket } from "socket.io";
|
||||||
import { chromium } from 'playwright-extra';
|
|
||||||
import stealthPlugin from 'puppeteer-extra-plugin-stealth';
|
|
||||||
import { PlaywrightBlocker } from '@cliqz/adblocker-playwright';
|
import { PlaywrightBlocker } from '@cliqz/adblocker-playwright';
|
||||||
import fetch from 'cross-fetch';
|
import fetch from 'cross-fetch';
|
||||||
import sharp from 'sharp';
|
import sharp from 'sharp';
|
||||||
@@ -16,6 +14,7 @@ import { WorkflowGenerator } from "../../workflow-management/classes/Generator";
|
|||||||
import { WorkflowInterpreter } from "../../workflow-management/classes/Interpreter";
|
import { WorkflowInterpreter } from "../../workflow-management/classes/Interpreter";
|
||||||
import { getDecryptedProxyConfig } from '../../routes/proxy';
|
import { getDecryptedProxyConfig } from '../../routes/proxy';
|
||||||
import { getInjectableScript } from 'idcac-playwright';
|
import { getInjectableScript } from 'idcac-playwright';
|
||||||
|
import { connectToRemoteBrowser } from '../browserConnection';
|
||||||
|
|
||||||
declare global {
|
declare global {
|
||||||
interface Window {
|
interface Window {
|
||||||
@@ -83,8 +82,6 @@ interface ProcessedSnapshot {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
chromium.use(stealthPlugin());
|
|
||||||
|
|
||||||
const MEMORY_CONFIG = {
|
const MEMORY_CONFIG = {
|
||||||
gcInterval: 20000, // Check memory more frequently (20s instead of 60s)
|
gcInterval: 20000, // Check memory more frequently (20s instead of 60s)
|
||||||
maxHeapSize: 1536 * 1024 * 1024, // 1.5GB
|
maxHeapSize: 1536 * 1024 * 1024, // 1.5GB
|
||||||
@@ -567,23 +564,7 @@ export class RemoteBrowser {
|
|||||||
|
|
||||||
while (!success && retryCount < MAX_RETRIES) {
|
while (!success && retryCount < MAX_RETRIES) {
|
||||||
try {
|
try {
|
||||||
this.browser = <Browser>(await chromium.launch({
|
this.browser = await connectToRemoteBrowser();
|
||||||
headless: true,
|
|
||||||
args: [
|
|
||||||
"--disable-blink-features=AutomationControlled",
|
|
||||||
"--disable-web-security",
|
|
||||||
"--disable-features=IsolateOrigins,site-per-process",
|
|
||||||
"--disable-site-isolation-trials",
|
|
||||||
"--disable-extensions",
|
|
||||||
"--no-sandbox",
|
|
||||||
"--disable-dev-shm-usage",
|
|
||||||
"--disable-gpu",
|
|
||||||
"--force-color-profile=srgb",
|
|
||||||
"--force-device-scale-factor=2",
|
|
||||||
"--ignore-certificate-errors",
|
|
||||||
"--mute-audio"
|
|
||||||
],
|
|
||||||
}));
|
|
||||||
|
|
||||||
if (!this.browser || this.browser.isConnected() === false) {
|
if (!this.browser || this.browser.isConnected() === false) {
|
||||||
throw new Error('Browser failed to launch or is not connected');
|
throw new Error('Browser failed to launch or is not connected');
|
||||||
@@ -683,9 +664,9 @@ export class RemoteBrowser {
|
|||||||
|
|
||||||
try {
|
try {
|
||||||
const blocker = await PlaywrightBlocker.fromLists(fetch, ['https://easylist.to/easylist/easylist.txt']);
|
const blocker = await PlaywrightBlocker.fromLists(fetch, ['https://easylist.to/easylist/easylist.txt']);
|
||||||
await blocker.enableBlockingInPage(this.currentPage);
|
await blocker.enableBlockingInPage(this.currentPage as any);
|
||||||
this.client = await this.currentPage.context().newCDPSession(this.currentPage);
|
this.client = await this.currentPage.context().newCDPSession(this.currentPage);
|
||||||
await blocker.disableBlockingInPage(this.currentPage);
|
await blocker.disableBlockingInPage(this.currentPage as any);
|
||||||
console.log('Adblocker initialized');
|
console.log('Adblocker initialized');
|
||||||
} catch (error: any) {
|
} catch (error: any) {
|
||||||
console.warn('Failed to initialize adblocker, continuing without it:', error.message);
|
console.warn('Failed to initialize adblocker, continuing without it:', error.message);
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
import { chromium } from "playwright";
|
import { connectToRemoteBrowser } from "../browser-management/browserConnection";
|
||||||
import { parseMarkdown } from "./markdown";
|
import { parseMarkdown } from "./markdown";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
@@ -6,7 +6,7 @@ import { parseMarkdown } from "./markdown";
|
|||||||
* returns clean Markdown using parser.
|
* returns clean Markdown using parser.
|
||||||
*/
|
*/
|
||||||
export async function convertPageToMarkdown(url: string): Promise<string> {
|
export async function convertPageToMarkdown(url: string): Promise<string> {
|
||||||
const browser = await chromium.launch();
|
const browser = await connectToRemoteBrowser();
|
||||||
const page = await browser.newPage();
|
const page = await browser.newPage();
|
||||||
|
|
||||||
await page.goto(url, { waitUntil: "networkidle", timeout: 100000 });
|
await page.goto(url, { waitUntil: "networkidle", timeout: 100000 });
|
||||||
@@ -61,7 +61,7 @@ export async function convertPageToMarkdown(url: string): Promise<string> {
|
|||||||
* returns clean HTML.
|
* returns clean HTML.
|
||||||
*/
|
*/
|
||||||
export async function convertPageToHTML(url: string): Promise<string> {
|
export async function convertPageToHTML(url: string): Promise<string> {
|
||||||
const browser = await chromium.launch();
|
const browser = await connectToRemoteBrowser();
|
||||||
const page = await browser.newPage();
|
const page = await browser.newPage();
|
||||||
|
|
||||||
await page.goto(url, { waitUntil: "networkidle", timeout: 100000 });
|
await page.goto(url, { waitUntil: "networkidle", timeout: 100000 });
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ import { WorkflowFile } from 'maxun-core';
|
|||||||
import Run from './models/Run';
|
import Run from './models/Run';
|
||||||
import Robot from './models/Robot';
|
import Robot from './models/Robot';
|
||||||
import { browserPool } from './server';
|
import { browserPool } from './server';
|
||||||
import { Page } from 'playwright';
|
import { Page } from 'playwright-core';
|
||||||
import { capture } from './utils/analytics';
|
import { capture } from './utils/analytics';
|
||||||
import { googleSheetUpdateTasks, processGoogleSheetUpdates } from './workflow-management/integrations/gsheet';
|
import { googleSheetUpdateTasks, processGoogleSheetUpdates } from './workflow-management/integrations/gsheet';
|
||||||
import { airtableUpdateTasks, processAirtableUpdates } from './workflow-management/integrations/airtable';
|
import { airtableUpdateTasks, processAirtableUpdates } from './workflow-management/integrations/airtable';
|
||||||
|
|||||||
@@ -1,10 +1,8 @@
|
|||||||
import { Router, Request, Response } from 'express';
|
import { Router, Request, Response } from 'express';
|
||||||
import { chromium } from 'playwright-extra';
|
import { connectToRemoteBrowser } from '../browser-management/browserConnection';
|
||||||
import stealthPlugin from 'puppeteer-extra-plugin-stealth';
|
|
||||||
import User from '../models/User';
|
import User from '../models/User';
|
||||||
import { encrypt, decrypt } from '../utils/auth';
|
import { encrypt, decrypt } from '../utils/auth';
|
||||||
import { requireSignIn } from '../middlewares/auth';
|
import { requireSignIn } from '../middlewares/auth';
|
||||||
chromium.use(stealthPlugin());
|
|
||||||
|
|
||||||
export const router = Router();
|
export const router = Router();
|
||||||
|
|
||||||
@@ -86,11 +84,7 @@ router.get('/test', requireSignIn, async (req: Request, res: Response) => {
|
|||||||
}),
|
}),
|
||||||
};
|
};
|
||||||
|
|
||||||
const browser = await chromium.launch({
|
const browser = await connectToRemoteBrowser();
|
||||||
headless: true,
|
|
||||||
proxy: proxyOptions,
|
|
||||||
args:["--ignore-certificate-errors"]
|
|
||||||
});
|
|
||||||
const page = await browser.newPage();
|
const page = await browser.newPage();
|
||||||
await page.goto('https://example.com');
|
await page.goto('https://example.com');
|
||||||
await browser.close();
|
await browser.close();
|
||||||
|
|||||||
@@ -13,14 +13,11 @@ import {
|
|||||||
destroyRemoteBrowser,
|
destroyRemoteBrowser,
|
||||||
canCreateBrowserInState,
|
canCreateBrowserInState,
|
||||||
} from '../browser-management/controller';
|
} from '../browser-management/controller';
|
||||||
import { chromium } from 'playwright-extra';
|
|
||||||
import stealthPlugin from 'puppeteer-extra-plugin-stealth';
|
|
||||||
import logger from "../logger";
|
import logger from "../logger";
|
||||||
import { requireSignIn } from '../middlewares/auth';
|
import { requireSignIn } from '../middlewares/auth';
|
||||||
import { pgBoss } from '../pgboss-worker';
|
import { pgBoss } from '../pgboss-worker';
|
||||||
|
|
||||||
export const router = Router();
|
export const router = Router();
|
||||||
chromium.use(stealthPlugin());
|
|
||||||
|
|
||||||
export interface AuthenticatedRequest extends Request {
|
export interface AuthenticatedRequest extends Request {
|
||||||
user?: any;
|
user?: any;
|
||||||
|
|||||||
@@ -1,8 +1,6 @@
|
|||||||
import { Router } from 'express';
|
import { Router } from 'express';
|
||||||
import logger from "../logger";
|
import logger from "../logger";
|
||||||
import { createRemoteBrowserForRun, destroyRemoteBrowser, getActiveBrowserIdByState } from "../browser-management/controller";
|
import { createRemoteBrowserForRun, destroyRemoteBrowser, getActiveBrowserIdByState } from "../browser-management/controller";
|
||||||
import { chromium } from 'playwright-extra';
|
|
||||||
import stealthPlugin from 'puppeteer-extra-plugin-stealth';
|
|
||||||
import { browserPool } from "../server";
|
import { browserPool } from "../server";
|
||||||
import { v4 as uuid } from "uuid";
|
import { v4 as uuid } from "uuid";
|
||||||
import moment from 'moment-timezone';
|
import moment from 'moment-timezone';
|
||||||
@@ -18,7 +16,6 @@ import { encrypt, decrypt } from '../utils/auth';
|
|||||||
import { WorkflowFile } from 'maxun-core';
|
import { WorkflowFile } from 'maxun-core';
|
||||||
import { cancelScheduledWorkflow, scheduleWorkflow } from '../schedule-worker';
|
import { cancelScheduledWorkflow, scheduleWorkflow } from '../schedule-worker';
|
||||||
import { pgBoss, registerWorkerForQueue, registerAbortWorkerForQueue } from '../pgboss-worker';
|
import { pgBoss, registerWorkerForQueue, registerAbortWorkerForQueue } from '../pgboss-worker';
|
||||||
chromium.use(stealthPlugin());
|
|
||||||
|
|
||||||
export const router = Router();
|
export const router = Router();
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
import {BrowserType, LaunchOptions} from "playwright";
|
import {BrowserType, LaunchOptions} from "playwright-core";
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Interpreter settings properties including recording parameters.
|
* Interpreter settings properties including recording parameters.
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ import { Action, ActionType, Coordinates, TagName, DatePickerEventData } from ".
|
|||||||
import { WhereWhatPair, WorkflowFile } from 'maxun-core';
|
import { WhereWhatPair, WorkflowFile } from 'maxun-core';
|
||||||
import logger from "../../logger";
|
import logger from "../../logger";
|
||||||
import { Socket } from "socket.io";
|
import { Socket } from "socket.io";
|
||||||
import { Page } from "playwright";
|
import { Page } from "playwright-core";
|
||||||
import {
|
import {
|
||||||
getElementInformation,
|
getElementInformation,
|
||||||
getRect,
|
getRect,
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
import Interpreter, { WorkflowFile } from "maxun-core";
|
import Interpreter, { WorkflowFile } from "maxun-core";
|
||||||
import logger from "../../logger";
|
import logger from "../../logger";
|
||||||
import { Socket } from "socket.io";
|
import { Socket } from "socket.io";
|
||||||
import { Page } from "playwright";
|
import { Page } from "playwright-core";
|
||||||
import { InterpreterSettings } from "../../types";
|
import { InterpreterSettings } from "../../types";
|
||||||
import { decrypt } from "../../utils/auth";
|
import { decrypt } from "../../utils/auth";
|
||||||
import Run from "../../models/Run";
|
import Run from "../../models/Run";
|
||||||
|
|||||||
@@ -1,6 +1,4 @@
|
|||||||
import { v4 as uuid } from "uuid";
|
import { v4 as uuid } from "uuid";
|
||||||
import { chromium } from 'playwright-extra';
|
|
||||||
import stealthPlugin from 'puppeteer-extra-plugin-stealth';
|
|
||||||
import { io, Socket } from "socket.io-client";
|
import { io, Socket } from "socket.io-client";
|
||||||
import { createRemoteBrowserForRun, destroyRemoteBrowser } from '../../browser-management/controller';
|
import { createRemoteBrowserForRun, destroyRemoteBrowser } from '../../browser-management/controller';
|
||||||
import logger from '../../logger';
|
import logger from '../../logger';
|
||||||
@@ -12,11 +10,10 @@ import { getDecryptedProxyConfig } from "../../routes/proxy";
|
|||||||
import { BinaryOutputService } from "../../storage/mino";
|
import { BinaryOutputService } from "../../storage/mino";
|
||||||
import { capture } from "../../utils/analytics";
|
import { capture } from "../../utils/analytics";
|
||||||
import { WorkflowFile } from "maxun-core";
|
import { WorkflowFile } from "maxun-core";
|
||||||
import { Page } from "playwright";
|
import { Page } from "playwright-core";
|
||||||
import { sendWebhook } from "../../routes/webhook";
|
import { sendWebhook } from "../../routes/webhook";
|
||||||
import { airtableUpdateTasks, processAirtableUpdates } from "../integrations/airtable";
|
import { airtableUpdateTasks, processAirtableUpdates } from "../integrations/airtable";
|
||||||
import { convertPageToMarkdown, convertPageToHTML } from "../../markdownify/scrape";
|
import { convertPageToMarkdown, convertPageToHTML } from "../../markdownify/scrape";
|
||||||
chromium.use(stealthPlugin());
|
|
||||||
|
|
||||||
async function createWorkflowAndStoreMetadata(id: string, userId: string) {
|
async function createWorkflowAndStoreMetadata(id: string, userId: string) {
|
||||||
try {
|
try {
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
import { Page } from "playwright";
|
import { Page } from "playwright-core";
|
||||||
import { Coordinates } from "../types";
|
import { Coordinates } from "../types";
|
||||||
import { WhereWhatPair, WorkflowFile } from "maxun-core";
|
import { WhereWhatPair, WorkflowFile } from "maxun-core";
|
||||||
import logger from "../logger";
|
import logger from "../logger";
|
||||||
|
|||||||
Reference in New Issue
Block a user