feat: add separate browser service

This commit is contained in:
Rohit Rajan
2025-11-30 17:41:44 +05:30
parent ad8df66ecd
commit cf19a72dc0
25 changed files with 355 additions and 72 deletions

View File

@@ -0,0 +1,111 @@
import { chromium } from 'playwright-core';
import type { Browser } from 'playwright-core';
import logger from '../logger';
/**
* Configuration for connection retry logic
*/
const CONNECTION_CONFIG = {
maxRetries: 3,
retryDelay: 2000,
connectionTimeout: 30000,
};
/**
* Get the WebSocket endpoint from the browser service health check
* @returns Promise<string> - The WebSocket endpoint URL with browser ID
*/
async function getBrowserServiceEndpoint(): Promise<string> {
const healthPort = process.env.BROWSER_HEALTH_PORT || '3002';
const healthHost = process.env.BROWSER_WS_HOST || 'localhost';
const healthEndpoint = `http://${healthHost}:${healthPort}/health`;
try {
logger.debug(`Fetching WebSocket endpoint from: ${healthEndpoint}`);
const response = await fetch(healthEndpoint);
const data = await response.json();
if (data.status === 'healthy' && data.wsEndpoint) {
logger.debug(`Got WebSocket endpoint: ${data.wsEndpoint}`);
return data.wsEndpoint;
}
throw new Error('Health check did not return a valid wsEndpoint');
} catch (error: any) {
logger.error(`Failed to fetch endpoint from health check: ${error.message}`);
throw new Error(
`Browser service is not accessible at ${healthEndpoint}. ` +
`Make sure the browser service is running (docker-compose up browser)`
);
}
}
/**
* Connect to the remote browser service with retry logic
* @param retries - Number of connection attempts (default: 3)
* @returns Promise<Browser> - Connected browser instance
* @throws Error if connection fails after all retries
*/
export async function connectToRemoteBrowser(retries?: number): Promise<Browser> {
const maxRetries = retries ?? CONNECTION_CONFIG.maxRetries;
const wsEndpoint = await getBrowserServiceEndpoint();
logger.info(`Connecting to browser service at ${wsEndpoint}...`);
for (let attempt = 1; attempt <= maxRetries; attempt++) {
try {
logger.debug(`Connection attempt ${attempt}/${maxRetries}`);
const browser = await chromium.connect(wsEndpoint, {
timeout: CONNECTION_CONFIG.connectionTimeout,
});
logger.info(`Successfully connected to browser service`);
return browser;
} catch (error: any) {
logger.warn(
`Connection attempt ${attempt}/${maxRetries} failed: ${error.message}`
);
if (attempt === maxRetries) {
logger.error(
`Failed to connect to browser service after ${maxRetries} attempts`
);
throw new Error(
`Failed to connect to browser service at ${wsEndpoint}: ${error.message}`
);
}
logger.debug(`Waiting ${CONNECTION_CONFIG.retryDelay}ms before retry...`);
await new Promise(resolve => setTimeout(resolve, CONNECTION_CONFIG.retryDelay));
}
}
throw new Error('Failed to connect to browser service');
}
/**
* Check if browser service is healthy
* @returns Promise<boolean> - true if service is healthy
*/
export async function checkBrowserServiceHealth(): Promise<boolean> {
try {
const healthPort = process.env.BROWSER_HEALTH_PORT || '3002';
const healthHost = process.env.BROWSER_WS_HOST || 'localhost';
const healthEndpoint = `http://${healthHost}:${healthPort}/health`;
const response = await fetch(healthEndpoint);
const data = await response.json();
if (data.status === 'healthy') {
logger.info('Browser service health check passed');
return true;
}
logger.warn('Browser service health check failed:', data);
return false;
} catch (error: any) {
logger.error('Browser service health check error:', error.message);
return false;
}
}

View File

@@ -2,11 +2,9 @@ import {
Page,
Browser,
CDPSession,
BrowserContext,
} from 'playwright';
BrowserContext
} from 'playwright-core';
import { Socket } from "socket.io";
import { chromium } from 'playwright-extra';
import stealthPlugin from 'puppeteer-extra-plugin-stealth';
import { PlaywrightBlocker } from '@cliqz/adblocker-playwright';
import fetch from 'cross-fetch';
import sharp from 'sharp';
@@ -16,6 +14,7 @@ import { WorkflowGenerator } from "../../workflow-management/classes/Generator";
import { WorkflowInterpreter } from "../../workflow-management/classes/Interpreter";
import { getDecryptedProxyConfig } from '../../routes/proxy';
import { getInjectableScript } from 'idcac-playwright';
import { connectToRemoteBrowser } from '../browserConnection';
declare global {
interface Window {
@@ -83,8 +82,6 @@ interface ProcessedSnapshot {
};
}
chromium.use(stealthPlugin());
const MEMORY_CONFIG = {
gcInterval: 20000, // Check memory more frequently (20s instead of 60s)
maxHeapSize: 1536 * 1024 * 1024, // 1.5GB
@@ -567,23 +564,7 @@ export class RemoteBrowser {
while (!success && retryCount < MAX_RETRIES) {
try {
this.browser = <Browser>(await chromium.launch({
headless: true,
args: [
"--disable-blink-features=AutomationControlled",
"--disable-web-security",
"--disable-features=IsolateOrigins,site-per-process",
"--disable-site-isolation-trials",
"--disable-extensions",
"--no-sandbox",
"--disable-dev-shm-usage",
"--disable-gpu",
"--force-color-profile=srgb",
"--force-device-scale-factor=2",
"--ignore-certificate-errors",
"--mute-audio"
],
}));
this.browser = await connectToRemoteBrowser();
if (!this.browser || this.browser.isConnected() === false) {
throw new Error('Browser failed to launch or is not connected');
@@ -683,9 +664,9 @@ export class RemoteBrowser {
try {
const blocker = await PlaywrightBlocker.fromLists(fetch, ['https://easylist.to/easylist/easylist.txt']);
await blocker.enableBlockingInPage(this.currentPage);
await blocker.enableBlockingInPage(this.currentPage as any);
this.client = await this.currentPage.context().newCDPSession(this.currentPage);
await blocker.disableBlockingInPage(this.currentPage);
await blocker.disableBlockingInPage(this.currentPage as any);
console.log('Adblocker initialized');
} catch (error: any) {
console.warn('Failed to initialize adblocker, continuing without it:', error.message);