Merge pull request #906 from getmaxun/browser-service
chore(infra): add separate browser service
This commit is contained in:
@@ -1,6 +1,4 @@
|
||||
import { Router, Request, Response } from 'express';
|
||||
import { chromium } from "playwright-extra";
|
||||
import stealthPlugin from 'puppeteer-extra-plugin-stealth';
|
||||
import { requireAPIKey } from "../middlewares/api";
|
||||
import Robot from "../models/Robot";
|
||||
import Run from "../models/Run";
|
||||
@@ -20,8 +18,6 @@ import { airtableUpdateTasks, processAirtableUpdates } from "../workflow-managem
|
||||
import { sendWebhook } from "../routes/webhook";
|
||||
import { convertPageToHTML, convertPageToMarkdown } from '../markdownify/scrape';
|
||||
|
||||
chromium.use(stealthPlugin());
|
||||
|
||||
const router = Router();
|
||||
|
||||
const formatRecording = (recordingData: any) => {
|
||||
|
||||
156
server/src/browser-management/browserConnection.ts
Normal file
156
server/src/browser-management/browserConnection.ts
Normal file
@@ -0,0 +1,156 @@
|
||||
import { chromium } from 'playwright-core';
|
||||
import type { Browser } from 'playwright-core';
|
||||
import logger from '../logger';
|
||||
|
||||
/**
|
||||
* Configuration for connection retry logic
|
||||
*/
|
||||
const CONNECTION_CONFIG = {
|
||||
maxRetries: 3,
|
||||
retryDelay: 2000,
|
||||
connectionTimeout: 30000,
|
||||
};
|
||||
|
||||
/**
|
||||
* Get the WebSocket endpoint from the browser service health check
|
||||
* @returns Promise<string> - The WebSocket endpoint URL with browser ID
|
||||
*/
|
||||
async function getBrowserServiceEndpoint(): Promise<string> {
|
||||
const healthPort = process.env.BROWSER_HEALTH_PORT || '3002';
|
||||
const healthHost = process.env.BROWSER_WS_HOST || 'localhost';
|
||||
const healthEndpoint = `http://${healthHost}:${healthPort}/health`;
|
||||
|
||||
try {
|
||||
logger.debug(`Fetching WebSocket endpoint from: ${healthEndpoint}`);
|
||||
const response = await fetch(healthEndpoint);
|
||||
const data = await response.json();
|
||||
|
||||
if (data.status === 'healthy' && data.wsEndpoint) {
|
||||
logger.debug(`Got WebSocket endpoint: ${data.wsEndpoint}`);
|
||||
return data.wsEndpoint;
|
||||
}
|
||||
|
||||
throw new Error('Health check did not return a valid wsEndpoint');
|
||||
} catch (error: any) {
|
||||
logger.error(`Failed to fetch endpoint from health check: ${error.message}`);
|
||||
throw new Error(
|
||||
`Browser service is not accessible at ${healthEndpoint}. ` +
|
||||
`Make sure the browser service is running (docker-compose up browser)`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Launch a local browser as fallback when browser service is unavailable
|
||||
* @returns Promise<Browser> - Locally launched browser instance
|
||||
*/
|
||||
async function launchLocalBrowser(): Promise<Browser> {
|
||||
logger.warn('Attempting to launch local browser');
|
||||
logger.warn('Note: This requires Chromium binaries to be installed (npx playwright install chromium)');
|
||||
|
||||
try {
|
||||
const browser = await chromium.launch({
|
||||
headless: true,
|
||||
args: [
|
||||
'--disable-blink-features=AutomationControlled',
|
||||
'--disable-web-security',
|
||||
'--disable-features=IsolateOrigins,site-per-process',
|
||||
'--disable-site-isolation-trials',
|
||||
'--disable-extensions',
|
||||
'--no-sandbox',
|
||||
'--disable-dev-shm-usage',
|
||||
'--disable-gpu',
|
||||
'--force-color-profile=srgb',
|
||||
'--force-device-scale-factor=2',
|
||||
'--ignore-certificate-errors',
|
||||
'--mute-audio'
|
||||
],
|
||||
});
|
||||
|
||||
logger.info('Successfully launched local browser');
|
||||
return browser;
|
||||
} catch (error: any) {
|
||||
logger.error(`Failed to launch local browser: ${error.message}`);
|
||||
throw new Error(
|
||||
`Could not launch local browser. ` +
|
||||
`Please either:\n` +
|
||||
` 1. Start the browser service: docker-compose up browser\n` +
|
||||
` 2. Install Chromium binaries: npx playwright@1.57.0 install chromium`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Connect to the remote browser service with retry logic, with fallback to local browser
|
||||
* @param retries - Number of connection attempts (default: 3)
|
||||
* @returns Promise<Browser> - Connected browser instance (remote or local)
|
||||
* @throws Error if both remote connection and local launch fail
|
||||
*/
|
||||
export async function connectToRemoteBrowser(retries?: number): Promise<Browser> {
|
||||
const maxRetries = retries ?? CONNECTION_CONFIG.maxRetries;
|
||||
|
||||
try {
|
||||
const wsEndpoint = await getBrowserServiceEndpoint();
|
||||
logger.info(`Connecting to browser service at ${wsEndpoint}...`);
|
||||
|
||||
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
||||
try {
|
||||
logger.debug(`Connection attempt ${attempt}/${maxRetries}`);
|
||||
|
||||
const browser = await chromium.connect(wsEndpoint, {
|
||||
timeout: CONNECTION_CONFIG.connectionTimeout,
|
||||
});
|
||||
|
||||
logger.info('Successfully connected to browser service');
|
||||
return browser;
|
||||
} catch (error: any) {
|
||||
logger.warn(
|
||||
`Connection attempt ${attempt}/${maxRetries} failed: ${error.message}`
|
||||
);
|
||||
|
||||
if (attempt === maxRetries) {
|
||||
logger.error(
|
||||
`Failed to connect to browser service after ${maxRetries} attempts`
|
||||
);
|
||||
throw new Error(`Remote connection failed: ${error.message}`);
|
||||
}
|
||||
|
||||
logger.debug(`Waiting ${CONNECTION_CONFIG.retryDelay}ms before retry...`);
|
||||
await new Promise(resolve => setTimeout(resolve, CONNECTION_CONFIG.retryDelay));
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error('Failed to connect to browser service');
|
||||
} catch (error: any) {
|
||||
logger.warn(`Browser service connection failed: ${error.message}`);
|
||||
logger.warn('Falling back to local browser launch...');
|
||||
|
||||
return await launchLocalBrowser();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if browser service is healthy
|
||||
* @returns Promise<boolean> - true if service is healthy
|
||||
*/
|
||||
export async function checkBrowserServiceHealth(): Promise<boolean> {
|
||||
try {
|
||||
const healthPort = process.env.BROWSER_HEALTH_PORT || '3002';
|
||||
const healthHost = process.env.BROWSER_WS_HOST || 'localhost';
|
||||
const healthEndpoint = `http://${healthHost}:${healthPort}/health`;
|
||||
|
||||
const response = await fetch(healthEndpoint);
|
||||
const data = await response.json();
|
||||
|
||||
if (data.status === 'healthy') {
|
||||
logger.info('Browser service health check passed');
|
||||
return true;
|
||||
}
|
||||
|
||||
logger.warn('Browser service health check failed:', data);
|
||||
return false;
|
||||
} catch (error: any) {
|
||||
logger.error('Browser service health check error:', error.message);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -2,11 +2,9 @@ import {
|
||||
Page,
|
||||
Browser,
|
||||
CDPSession,
|
||||
BrowserContext,
|
||||
} from 'playwright';
|
||||
BrowserContext
|
||||
} from 'playwright-core';
|
||||
import { Socket } from "socket.io";
|
||||
import { chromium } from 'playwright-extra';
|
||||
import stealthPlugin from 'puppeteer-extra-plugin-stealth';
|
||||
import { PlaywrightBlocker } from '@cliqz/adblocker-playwright';
|
||||
import fetch from 'cross-fetch';
|
||||
import sharp from 'sharp';
|
||||
@@ -16,6 +14,7 @@ import { WorkflowGenerator } from "../../workflow-management/classes/Generator";
|
||||
import { WorkflowInterpreter } from "../../workflow-management/classes/Interpreter";
|
||||
import { getDecryptedProxyConfig } from '../../routes/proxy';
|
||||
import { getInjectableScript } from 'idcac-playwright';
|
||||
import { connectToRemoteBrowser } from '../browserConnection';
|
||||
|
||||
declare global {
|
||||
interface Window {
|
||||
@@ -83,8 +82,6 @@ interface ProcessedSnapshot {
|
||||
};
|
||||
}
|
||||
|
||||
chromium.use(stealthPlugin());
|
||||
|
||||
const MEMORY_CONFIG = {
|
||||
gcInterval: 20000, // Check memory more frequently (20s instead of 60s)
|
||||
maxHeapSize: 1536 * 1024 * 1024, // 1.5GB
|
||||
@@ -567,23 +564,7 @@ export class RemoteBrowser {
|
||||
|
||||
while (!success && retryCount < MAX_RETRIES) {
|
||||
try {
|
||||
this.browser = <Browser>(await chromium.launch({
|
||||
headless: true,
|
||||
args: [
|
||||
"--disable-blink-features=AutomationControlled",
|
||||
"--disable-web-security",
|
||||
"--disable-features=IsolateOrigins,site-per-process",
|
||||
"--disable-site-isolation-trials",
|
||||
"--disable-extensions",
|
||||
"--no-sandbox",
|
||||
"--disable-dev-shm-usage",
|
||||
"--disable-gpu",
|
||||
"--force-color-profile=srgb",
|
||||
"--force-device-scale-factor=2",
|
||||
"--ignore-certificate-errors",
|
||||
"--mute-audio"
|
||||
],
|
||||
}));
|
||||
this.browser = await connectToRemoteBrowser();
|
||||
|
||||
if (!this.browser || this.browser.isConnected() === false) {
|
||||
throw new Error('Browser failed to launch or is not connected');
|
||||
@@ -683,9 +664,9 @@ export class RemoteBrowser {
|
||||
|
||||
try {
|
||||
const blocker = await PlaywrightBlocker.fromLists(fetch, ['https://easylist.to/easylist/easylist.txt']);
|
||||
await blocker.enableBlockingInPage(this.currentPage);
|
||||
await blocker.enableBlockingInPage(this.currentPage as any);
|
||||
this.client = await this.currentPage.context().newCDPSession(this.currentPage);
|
||||
await blocker.disableBlockingInPage(this.currentPage);
|
||||
await blocker.disableBlockingInPage(this.currentPage as any);
|
||||
console.log('Adblocker initialized');
|
||||
} catch (error: any) {
|
||||
console.warn('Failed to initialize adblocker, continuing without it:', error.message);
|
||||
|
||||
@@ -8,7 +8,7 @@ import logger from "../logger";
|
||||
import { Coordinates, ScrollDeltas, KeyboardInput, DatePickerEventData } from '../types';
|
||||
import { browserPool } from "../server";
|
||||
import { WorkflowGenerator } from "../workflow-management/classes/Generator";
|
||||
import { Page } from "playwright";
|
||||
import { Page } from "playwright-core";
|
||||
import { throttle } from "../../../src/helpers/inputHelpers";
|
||||
import { CustomActions } from "../../../src/shared/types";
|
||||
import { WhereWhatPair } from "maxun-core";
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { chromium, Page } from "playwright";
|
||||
import { connectToRemoteBrowser } from "../browser-management/browserConnection";
|
||||
import { parseMarkdown } from "./markdown";
|
||||
import logger from "../logger";
|
||||
|
||||
@@ -23,22 +23,11 @@ async function gotoWithFallback(page: any, url: string) {
|
||||
* @param url - The URL to convert
|
||||
* @param existingPage - Optional existing Playwright page instance to reuse
|
||||
*/
|
||||
export async function convertPageToMarkdown(url: string, existingPage?: Page): Promise<string> {
|
||||
let browser: any = null;
|
||||
let page: Page;
|
||||
let shouldCloseBrowser = false;
|
||||
export async function convertPageToMarkdown(url: string): Promise<string> {
|
||||
const browser = await connectToRemoteBrowser();
|
||||
const page = await browser.newPage();
|
||||
|
||||
if (existingPage) {
|
||||
logger.log('info', `[Scrape] Reusing existing Playwright page instance for markdown conversion of ${url}`);
|
||||
page = existingPage;
|
||||
} else {
|
||||
logger.log('info', `[Scrape] Creating new Chromium browser instance for markdown conversion of ${url}`);
|
||||
browser = await chromium.launch();
|
||||
page = await browser.newPage();
|
||||
shouldCloseBrowser = true;
|
||||
}
|
||||
|
||||
await gotoWithFallback(page, url);
|
||||
await page.goto(url, { waitUntil: "networkidle", timeout: 100000 });
|
||||
|
||||
const cleanedHtml = await page.evaluate(() => {
|
||||
const selectors = [
|
||||
@@ -93,22 +82,11 @@ export async function convertPageToMarkdown(url: string, existingPage?: Page): P
|
||||
* @param url - The URL to convert
|
||||
* @param existingPage - Optional existing Playwright page instance to reuse
|
||||
*/
|
||||
export async function convertPageToHTML(url: string, existingPage?: Page): Promise<string> {
|
||||
let browser: any = null;
|
||||
let page: Page;
|
||||
let shouldCloseBrowser = false;
|
||||
export async function convertPageToHTML(url: string): Promise<string> {
|
||||
const browser = await connectToRemoteBrowser();
|
||||
const page = await browser.newPage();
|
||||
|
||||
if (existingPage) {
|
||||
logger.log('info', `[Scrape] Reusing existing Playwright page instance for HTML conversion of ${url}`);
|
||||
page = existingPage;
|
||||
} else {
|
||||
logger.log('info', `[Scrape] Creating new Chromium browser instance for HTML conversion of ${url}`);
|
||||
browser = await chromium.launch();
|
||||
page = await browser.newPage();
|
||||
shouldCloseBrowser = true;
|
||||
}
|
||||
|
||||
await gotoWithFallback(page, url);
|
||||
await page.goto(url, { waitUntil: "networkidle", timeout: 100000 });
|
||||
|
||||
const cleanedHtml = await page.evaluate(() => {
|
||||
const selectors = [
|
||||
|
||||
@@ -13,7 +13,7 @@ import { WorkflowFile } from 'maxun-core';
|
||||
import Run from './models/Run';
|
||||
import Robot from './models/Robot';
|
||||
import { browserPool } from './server';
|
||||
import { Page } from 'playwright';
|
||||
import { Page } from 'playwright-core';
|
||||
import { capture } from './utils/analytics';
|
||||
import { googleSheetUpdateTasks, processGoogleSheetUpdates } from './workflow-management/integrations/gsheet';
|
||||
import { airtableUpdateTasks, processAirtableUpdates } from './workflow-management/integrations/airtable';
|
||||
|
||||
@@ -1,10 +1,8 @@
|
||||
import { Router, Request, Response } from 'express';
|
||||
import { chromium } from 'playwright-extra';
|
||||
import stealthPlugin from 'puppeteer-extra-plugin-stealth';
|
||||
import { connectToRemoteBrowser } from '../browser-management/browserConnection';
|
||||
import User from '../models/User';
|
||||
import { encrypt, decrypt } from '../utils/auth';
|
||||
import { requireSignIn } from '../middlewares/auth';
|
||||
chromium.use(stealthPlugin());
|
||||
|
||||
export const router = Router();
|
||||
|
||||
@@ -86,11 +84,7 @@ router.get('/test', requireSignIn, async (req: Request, res: Response) => {
|
||||
}),
|
||||
};
|
||||
|
||||
const browser = await chromium.launch({
|
||||
headless: true,
|
||||
proxy: proxyOptions,
|
||||
args:["--ignore-certificate-errors"]
|
||||
});
|
||||
const browser = await connectToRemoteBrowser();
|
||||
const page = await browser.newPage();
|
||||
await page.goto('https://example.com');
|
||||
await browser.close();
|
||||
|
||||
@@ -13,14 +13,11 @@ import {
|
||||
destroyRemoteBrowser,
|
||||
canCreateBrowserInState,
|
||||
} from '../browser-management/controller';
|
||||
import { chromium } from 'playwright-extra';
|
||||
import stealthPlugin from 'puppeteer-extra-plugin-stealth';
|
||||
import logger from "../logger";
|
||||
import { requireSignIn } from '../middlewares/auth';
|
||||
import { pgBoss } from '../pgboss-worker';
|
||||
|
||||
export const router = Router();
|
||||
chromium.use(stealthPlugin());
|
||||
|
||||
export interface AuthenticatedRequest extends Request {
|
||||
user?: any;
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
import { Router } from 'express';
|
||||
import logger from "../logger";
|
||||
import { createRemoteBrowserForRun, destroyRemoteBrowser, getActiveBrowserIdByState } from "../browser-management/controller";
|
||||
import { chromium } from 'playwright-extra';
|
||||
import stealthPlugin from 'puppeteer-extra-plugin-stealth';
|
||||
import { browserPool } from "../server";
|
||||
import { v4 as uuid } from "uuid";
|
||||
import moment from 'moment-timezone';
|
||||
@@ -18,7 +16,6 @@ import { encrypt, decrypt } from '../utils/auth';
|
||||
import { WorkflowFile } from 'maxun-core';
|
||||
import { cancelScheduledWorkflow, scheduleWorkflow } from '../schedule-worker';
|
||||
import { pgBoss, registerWorkerForQueue, registerAbortWorkerForQueue } from '../pgboss-worker';
|
||||
chromium.use(stealthPlugin());
|
||||
|
||||
export const router = Router();
|
||||
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import {BrowserType, LaunchOptions} from "playwright";
|
||||
import {BrowserType, LaunchOptions} from "playwright-core";
|
||||
|
||||
/**
|
||||
* Interpreter settings properties including recording parameters.
|
||||
|
||||
@@ -2,7 +2,7 @@ import { Action, ActionType, Coordinates, TagName, DatePickerEventData } from ".
|
||||
import { WhereWhatPair, WorkflowFile } from 'maxun-core';
|
||||
import logger from "../../logger";
|
||||
import { Socket } from "socket.io";
|
||||
import { Page } from "playwright";
|
||||
import { Page } from "playwright-core";
|
||||
import {
|
||||
getElementInformation,
|
||||
getRect,
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import Interpreter, { WorkflowFile } from "maxun-core";
|
||||
import logger from "../../logger";
|
||||
import { Socket } from "socket.io";
|
||||
import { Page } from "playwright";
|
||||
import { Page } from "playwright-core";
|
||||
import { InterpreterSettings } from "../../types";
|
||||
import { decrypt } from "../../utils/auth";
|
||||
import Run from "../../models/Run";
|
||||
|
||||
@@ -1,6 +1,4 @@
|
||||
import { v4 as uuid } from "uuid";
|
||||
import { chromium } from 'playwright-extra';
|
||||
import stealthPlugin from 'puppeteer-extra-plugin-stealth';
|
||||
import { io, Socket } from "socket.io-client";
|
||||
import { createRemoteBrowserForRun, destroyRemoteBrowser } from '../../browser-management/controller';
|
||||
import logger from '../../logger';
|
||||
@@ -12,11 +10,10 @@ import { getDecryptedProxyConfig } from "../../routes/proxy";
|
||||
import { BinaryOutputService } from "../../storage/mino";
|
||||
import { capture } from "../../utils/analytics";
|
||||
import { WorkflowFile } from "maxun-core";
|
||||
import { Page } from "playwright";
|
||||
import { Page } from "playwright-core";
|
||||
import { sendWebhook } from "../../routes/webhook";
|
||||
import { airtableUpdateTasks, processAirtableUpdates } from "../integrations/airtable";
|
||||
import { convertPageToMarkdown, convertPageToHTML } from "../../markdownify/scrape";
|
||||
chromium.use(stealthPlugin());
|
||||
|
||||
async function createWorkflowAndStoreMetadata(id: string, userId: string) {
|
||||
try {
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { Page } from "playwright";
|
||||
import { Page } from "playwright-core";
|
||||
import { Coordinates } from "../types";
|
||||
import { WhereWhatPair, WorkflowFile } from "maxun-core";
|
||||
import logger from "../logger";
|
||||
|
||||
Reference in New Issue
Block a user