From 762654395b93897f77d2b37f3586d1dfc814caeb Mon Sep 17 00:00:00 2001 From: Rohit Rajan Date: Sat, 29 Nov 2025 14:40:59 +0530 Subject: [PATCH] feat: add promise timeout, socket cleanup --- .../classes/RemoteBrowser.ts | 1723 ++++------------- server/src/browser-management/controller.ts | 197 +- .../src/browser-management/inputHandlers.ts | 37 +- server/src/socket-connection/connection.ts | 8 +- .../workflow-management/classes/Generator.ts | 58 + 5 files changed, 640 insertions(+), 1383 deletions(-) diff --git a/server/src/browser-management/classes/RemoteBrowser.ts b/server/src/browser-management/classes/RemoteBrowser.ts index c88ba068..731b583d 100644 --- a/server/src/browser-management/classes/RemoteBrowser.ts +++ b/server/src/browser-management/classes/RemoteBrowser.ts @@ -9,13 +9,14 @@ import { chromium } from 'playwright-extra'; import stealthPlugin from 'puppeteer-extra-plugin-stealth'; import { PlaywrightBlocker } from '@cliqz/adblocker-playwright'; import fetch from 'cross-fetch'; -import sharp from 'sharp'; import logger from '../../logger'; import { InterpreterSettings } from "../../types"; import { WorkflowGenerator } from "../../workflow-management/classes/Generator"; import { WorkflowInterpreter } from "../../workflow-management/classes/Interpreter"; import { getDecryptedProxyConfig } from '../../routes/proxy'; import { getInjectableScript } from 'idcac-playwright'; +import { FingerprintInjector } from "fingerprint-injector"; +import { FingerprintGenerator } from "fingerprint-generator"; declare global { interface Window { @@ -35,52 +36,7 @@ interface RRWebSnapshot { interface ProcessedSnapshot { snapshot: RRWebSnapshot; - resources: { - stylesheets: Array<{ - href: string; - content: string; - media?: string; - }>; - images: Array<{ - src: string; - dataUrl: string; - alt?: string; - }>; - fonts: Array<{ - url: string; - dataUrl: string; - format?: string; - }>; - scripts: Array<{ - src: string; - content: string; - type?: string; - }>; - media: Array<{ - src: string; - dataUrl: string; - type: string; - }>; - }; baseUrl: string; - viewport: { width: number; height: number }; - timestamp: number; - processingStats: { - discoveredResources: { - images: number; - stylesheets: number; - scripts: number; - fonts: number; - media: number; - }; - cachedResources: { - stylesheets: number; - images: number; - fonts: number; - scripts: number; - media: number; - }; - }; } chromium.use(stealthPlugin()); @@ -91,33 +47,6 @@ const MEMORY_CONFIG = { heapUsageThreshold: 0.7 // 70% (reduced threshold to react earlier) }; -const DEFAULT_VIEWPORT = { - width: 1280, - height: 720, - deviceScaleFactor: 1, - mobile: false -}; - -const SCREENCAST_CONFIG: { - format: "jpeg" | "png"; - maxWidth: number; - maxHeight: number; - targetFPS: number; - compressionQuality: number; - maxQueueSize: number; - skipFrameThreshold: number, - enableAdaptiveQuality: boolean, -} = { - format: 'jpeg', - maxWidth: DEFAULT_VIEWPORT.width, - maxHeight: DEFAULT_VIEWPORT.height, - targetFPS: 30, - compressionQuality: 0.8, - maxQueueSize: 2, - skipFrameThreshold: 100, - enableAdaptiveQuality: true, -}; - /** * This class represents a remote browser instance. * It is used to allow a variety of interaction with the Playwright's browser instance. @@ -183,30 +112,24 @@ export class RemoteBrowser { */ public interpreter: WorkflowInterpreter; - - private screenshotQueue: Buffer[] = []; - private isProcessingScreenshot = false; - private screencastInterval: NodeJS.Timeout | null = null - private isScreencastActive: boolean = false; - - private isDOMStreamingActive: boolean = false; + public isDOMStreamingActive: boolean = false; private domUpdateInterval: NodeJS.Timeout | null = null; - private renderingMode: "screenshot" | "dom" = "screenshot"; private lastScrollPosition = { x: 0, y: 0 }; private scrollThreshold = 200; // pixels private snapshotDebounceTimeout: NodeJS.Timeout | null = null; - private isScrollTriggeredSnapshot = false; private networkRequestTimeout: NodeJS.Timeout | null = null; private pendingNetworkRequests: string[] = []; - private readonly NETWORK_QUIET_PERIOD = 8000; private readonly INITIAL_LOAD_QUIET_PERIOD = 3000; private networkWaitStartTime: number = 0; private progressInterval: NodeJS.Timeout | null = null; private hasShownInitialLoader: boolean = false; private isInitialLoadInProgress: boolean = false; + private memoryCleanupInterval: NodeJS.Timeout | null = null; + private memoryManagementInterval: NodeJS.Timeout | null = null; + /** * Initializes a new instances of the {@link Generator} and {@link WorkflowInterpreter} classes and * assigns the socket instance everywhere. @@ -220,126 +143,63 @@ export class RemoteBrowser { this.generator = new WorkflowGenerator(socket, poolId); } - private cleanupMemory(): void { - if (this.screenshotQueue.length > 10) { - this.screenshotQueue = this.screenshotQueue.slice(-3); // Keep only last 3 - } - } - - private setupMemoryCleanup(): void { - setInterval(() => { - this.cleanupMemory(); - }, 30000); // Every 30 seconds - } - private async processRRWebSnapshot( snapshot: RRWebSnapshot ): Promise { const baseUrl = this.currentPage?.url() || ""; - const resources = { - stylesheets: [] as Array<{ - href: string; - content: string; - media?: string; - }>, - images: [] as Array<{ src: string; dataUrl: string; alt?: string }>, - fonts: [] as Array<{ url: string; dataUrl: string; format?: string }>, - scripts: [] as Array<{ src: string; content: string; type?: string }>, - media: [] as Array<{ src: string; dataUrl: string; type: string }>, - }; - - const viewport = (await this.currentPage?.viewportSize()) || { - width: 1280, - height: 720, - }; - return { snapshot, - resources, - baseUrl, - viewport, - timestamp: Date.now(), - processingStats: { - discoveredResources: { - images: resources.images.length, - stylesheets: resources.stylesheets.length, - scripts: resources.scripts.length, - fonts: resources.fonts.length, - media: resources.media.length, - }, - cachedResources: { - stylesheets: resources.stylesheets.length, - images: resources.images.length, - fonts: resources.fonts.length, - scripts: resources.scripts.length, - media: resources.media.length, - }, - }, + baseUrl }; } private initializeMemoryManagement(): void { - setInterval(() => { - const memoryUsage = process.memoryUsage(); - const heapUsageRatio = memoryUsage.heapUsed / MEMORY_CONFIG.maxHeapSize; - - if (heapUsageRatio > MEMORY_CONFIG.heapUsageThreshold * 1.2) { - logger.warn('Critical memory pressure detected, triggering emergency cleanup'); - this.performMemoryCleanup(); - } else if (heapUsageRatio > MEMORY_CONFIG.heapUsageThreshold) { - logger.warn('High memory usage detected, triggering cleanup'); - - if (this.screenshotQueue.length > 0) { - this.screenshotQueue = []; - logger.info('Screenshot queue cleared due to memory pressure'); - } - - if (global.gc && heapUsageRatio > MEMORY_CONFIG.heapUsageThreshold * 1.1) { - global.gc(); - } - } - - if (this.screenshotQueue.length > SCREENCAST_CONFIG.maxQueueSize) { - this.screenshotQueue = this.screenshotQueue.slice(-SCREENCAST_CONFIG.maxQueueSize); - } - }, MEMORY_CONFIG.gcInterval); + this.memoryManagementInterval = setInterval(() => { + const memoryUsage = process.memoryUsage(); + const heapUsageRatio = memoryUsage.heapUsed / MEMORY_CONFIG.maxHeapSize; + + if (heapUsageRatio > MEMORY_CONFIG.heapUsageThreshold * 1.2) { + logger.warn( + "Critical memory pressure detected, triggering emergency cleanup" + ); + this.performMemoryCleanup(); + } else if (heapUsageRatio > MEMORY_CONFIG.heapUsageThreshold) { + logger.warn("High memory usage detected, triggering cleanup"); + + if ( + global.gc && + heapUsageRatio > MEMORY_CONFIG.heapUsageThreshold * 1.1 + ) { + global.gc(); + } + } + }, MEMORY_CONFIG.gcInterval); } private async performMemoryCleanup(): Promise { - this.screenshotQueue = []; - this.isProcessingScreenshot = false; - - if (global.gc) { - try { - global.gc(); - logger.info('Garbage collection requested'); - } catch (error) { - logger.error('Error during garbage collection:', error); - } + if (global.gc) { + try { + global.gc(); + logger.info("Garbage collection requested"); + } catch (error) { + logger.error("Error during garbage collection:", error); } - - if (this.client) { - try { - await this.stopScreencast(); - - await new Promise(resolve => setTimeout(resolve, 500)); - - this.client = null; - if (this.currentPage) { - this.client = await this.currentPage.context().newCDPSession(this.currentPage); - await this.startScreencast(); - logger.info('CDP session reset completed'); - } - } catch (error) { - logger.error('Error resetting CDP session:', error); - } + } + + if (this.currentPage) { + try { + await new Promise((resolve) => setTimeout(resolve, 500)); + logger.info("CDP session reset completed"); + } catch (error) { + logger.error("Error resetting CDP session:", error); } - - this.socket.emit('memory-cleanup', { - userId: this.userId, - timestamp: Date.now() - }); + } + + this.socket.emit("memory-cleanup", { + userId: this.userId, + timestamp: Date.now(), + }); } /** @@ -374,30 +234,25 @@ export class RemoteBrowser { * Setup scroll event listener to track user scrolling */ private setupScrollEventListener(): void { + try { + this.socket.removeAllListeners('dom:scroll'); + } catch (error: any) { + logger.warn(`Error removing old scroll listener: ${error.message}`); + } + this.socket.on( "dom:scroll", async (data: { deltaX: number; deltaY: number }) => { if (!this.isDOMStreamingActive || !this.currentPage) return; try { - logger.debug( - `Received scroll event: deltaX=${data.deltaX}, deltaY=${data.deltaY}` - ); - await this.currentPage.mouse.wheel(data.deltaX, data.deltaY); - await this.currentPage.waitForLoadState("networkidle", { timeout: 5000 }); const scrollInfo = await this.currentPage.evaluate(() => ({ x: window.scrollX, y: window.scrollY, - maxX: Math.max( - 0, - document.documentElement.scrollWidth - window.innerWidth - ), - maxY: Math.max( - 0, - document.documentElement.scrollHeight - window.innerHeight - ), + maxX: Math.max(0, document.documentElement.scrollWidth - window.innerWidth), + maxY: Math.max(0, document.documentElement.scrollHeight - window.innerHeight), documentHeight: document.documentElement.scrollHeight, viewportHeight: window.innerHeight, })); @@ -406,23 +261,14 @@ export class RemoteBrowser { Math.abs(scrollInfo.y - this.lastScrollPosition.y) + Math.abs(scrollInfo.x - this.lastScrollPosition.x); - logger.debug( - `Scroll delta: ${scrollDelta}, threshold: ${this.scrollThreshold}` - ); - if (scrollDelta > this.scrollThreshold) { this.lastScrollPosition = { x: scrollInfo.x, y: scrollInfo.y }; - this.isScrollTriggeredSnapshot = true; if (this.snapshotDebounceTimeout) { clearTimeout(this.snapshotDebounceTimeout); } this.snapshotDebounceTimeout = setTimeout(async () => { - logger.info( - `Triggering snapshot due to scroll. Position: ${scrollInfo.y}/${scrollInfo.maxY}` - ); - await this.makeAndEmitDOMSnapshot(); }, 300); } @@ -436,6 +282,15 @@ export class RemoteBrowser { private setupPageChangeListeners(): void { if (!this.currentPage) return; + try { + if (!this.currentPage.isClosed()) { + this.currentPage.removeAllListeners("domcontentloaded"); + this.currentPage.removeAllListeners("response"); + } + } catch (error: any) { + logger.warn(`Error removing page change listeners: ${error.message}`); + } + this.currentPage.on("domcontentloaded", async () => { if (!this.isInitialLoadInProgress) { logger.info("DOM content loaded - triggering snapshot"); @@ -463,6 +318,7 @@ export class RemoteBrowser { } this.networkWaitStartTime = Date.now(); + this.progressInterval = setInterval(() => { const elapsed = Date.now() - this.networkWaitStartTime; const navigationProgress = Math.min((elapsed / this.INITIAL_LOAD_QUIET_PERIOD) * 40, 35); @@ -506,6 +362,14 @@ export class RemoteBrowser { } private async setupPageEventListeners(page: Page) { + try { + page.removeAllListeners('framenavigated'); + page.removeAllListeners('load'); + logger.debug('Removed existing page event listeners before re-registering'); + } catch (error: any) { + logger.warn(`Error removing existing page listeners: ${error.message}`); + } + page.on('framenavigated', async (frame) => { if (frame === page.mainFrame()) { const currentUrl = page.url(); @@ -522,6 +386,11 @@ export class RemoteBrowser { try { await page.waitForLoadState('networkidle', { timeout: 5000 }); + if (page.isClosed()) { + logger.debug('Page is closed, cannot inject script'); + return false; + } + await page.evaluate(getInjectableScript()); return true; } catch (error: any) { @@ -548,6 +417,28 @@ export class RemoteBrowser { return userAgents[Math.floor(Math.random() * userAgents.length)]; } + /** + * Apply modern fingerprint-suite injection + */ + private async applyEnhancedFingerprinting(context: BrowserContext): Promise { + try { + try { + const fingerprintGenerator = new FingerprintGenerator(); + const fingerprint = fingerprintGenerator.getFingerprint(); + const fingerprintInjector = new FingerprintInjector(); + + await fingerprintInjector.attachFingerprintToPlaywright(context as any, fingerprint); + + logger.info("Enhanced fingerprinting applied successfully"); + } catch (fingerprintError: any) { + logger.warn(`Modern fingerprint injection failed: ${fingerprintError.message}. Using existing protection.`); + } + } catch (error: any) { + logger.error(`Enhanced fingerprinting failed: ${error.message}`); + // Don't throw - fallback to basic functionality + } + } + /** * An asynchronous constructor for asynchronously initialized properties. * Must be called right after creating an instance of RemoteBrowser class. @@ -556,6 +447,7 @@ export class RemoteBrowser { */ public initialize = async (userId: string): Promise => { const MAX_RETRIES = 3; + const OVERALL_INIT_TIMEOUT = 120000; let retryCount = 0; let success = false; @@ -565,715 +457,160 @@ export class RemoteBrowser { }); this.emitLoadingProgress(0, 0); - while (!success && retryCount < MAX_RETRIES) { + const initializationPromise = (async () => { + while (!success && retryCount < MAX_RETRIES) { try { - this.browser = (await chromium.launch({ - headless: true, - args: [ - "--disable-blink-features=AutomationControlled", - "--disable-web-security", - "--disable-features=IsolateOrigins,site-per-process", - "--disable-site-isolation-trials", - "--disable-extensions", - "--no-sandbox", - "--disable-dev-shm-usage", - "--disable-gpu", - "--force-color-profile=srgb", - "--force-device-scale-factor=2", - "--ignore-certificate-errors", - "--mute-audio" - ], - })); - - if (!this.browser || this.browser.isConnected() === false) { - throw new Error('Browser failed to launch or is not connected'); - } + this.browser = (await chromium.launch({ + headless: true, + args: [ + "--disable-blink-features=AutomationControlled", + "--disable-web-security", + "--disable-features=IsolateOrigins,site-per-process", + "--disable-site-isolation-trials", + "--disable-extensions", + "--no-sandbox", + "--disable-dev-shm-usage", + "--disable-gpu", + "--force-color-profile=srgb", + "--force-device-scale-factor=2", + "--ignore-certificate-errors", + "--mute-audio" + ], + })); - this.emitLoadingProgress(20, 0); + if (!this.browser || this.browser.isConnected() === false) { + throw new Error('Browser failed to launch or is not connected'); + } - const proxyConfig = await getDecryptedProxyConfig(userId); - let proxyOptions: { server: string, username?: string, password?: string } = { server: '' }; - - if (proxyConfig.proxy_url) { - proxyOptions = { - server: proxyConfig.proxy_url, - ...(proxyConfig.proxy_username && proxyConfig.proxy_password && { - username: proxyConfig.proxy_username, - password: proxyConfig.proxy_password, - }), - }; - } - - const contextOptions: any = { - // viewport: { height: 400, width: 900 }, - // recordVideo: { dir: 'videos/' } - // Force reduced motion to prevent animation issues - reducedMotion: 'reduce', - // Force JavaScript to be enabled - javaScriptEnabled: true, - // Set a reasonable timeout - timeout: 50000, - // Disable hardware acceleration - forcedColors: 'none', - isMobile: false, - hasTouch: false, - userAgent: this.getUserAgent(), + this.emitLoadingProgress(20, 0); + + const proxyConfig = await getDecryptedProxyConfig(userId); + let proxyOptions: { server: string, username?: string, password?: string } = { server: '' }; + + if (proxyConfig.proxy_url) { + proxyOptions = { + server: proxyConfig.proxy_url, + ...(proxyConfig.proxy_username && proxyConfig.proxy_password && { + username: proxyConfig.proxy_username, + password: proxyConfig.proxy_password, + }), }; - - if (proxyOptions.server) { - contextOptions.proxy = { - server: proxyOptions.server, - username: proxyOptions.username ? proxyOptions.username : undefined, - password: proxyOptions.password ? proxyOptions.password : undefined, - }; - } - - await new Promise(resolve => setTimeout(resolve, 500)); - - const contextPromise = this.browser.newContext(contextOptions); - this.context = await Promise.race([ - contextPromise, - new Promise((_, reject) => { - setTimeout(() => reject(new Error('Context creation timed out after 15s')), 15000); - }) - ]) as BrowserContext; - - await this.context.addInitScript( - `const defaultGetter = Object.getOwnPropertyDescriptor( - Navigator.prototype, - "webdriver" - ).get; - defaultGetter.apply(navigator); - defaultGetter.toString(); - Object.defineProperty(Navigator.prototype, "webdriver", { - set: undefined, - enumerable: true, - configurable: true, - get: new Proxy(defaultGetter, { - apply: (target, thisArg, args) => { - Reflect.apply(target, thisArg, args); - return false; - }, - }), - }); - const patchedGetter = Object.getOwnPropertyDescriptor( - Navigator.prototype, - "webdriver" - ).get; - patchedGetter.apply(navigator); - patchedGetter.toString();` - ); + } - await this.context.addInitScript({ path: './server/src/browser-management/classes/rrweb-bundle.js' }); - - this.currentPage = await this.context.newPage(); + const contextOptions: any = { + // viewport: { height: 400, width: 900 }, + // recordVideo: { dir: 'videos/' } + // Force reduced motion to prevent animation issues + reducedMotion: 'reduce', + // Force JavaScript to be enabled + javaScriptEnabled: true, + // Set a reasonable timeout + timeout: 50000, + // Disable hardware acceleration + forcedColors: 'none', + isMobile: false, + hasTouch: false, + userAgent: this.getUserAgent(), + }; - this.emitLoadingProgress(40, 0); + if (proxyOptions.server) { + contextOptions.proxy = { + server: proxyOptions.server, + username: proxyOptions.username ? proxyOptions.username : undefined, + password: proxyOptions.password ? proxyOptions.password : undefined, + }; + } - await this.setupPageEventListeners(this.currentPage); - - const viewportSize = await this.currentPage.viewportSize(); - if (viewportSize) { - this.socket.emit('viewportInfo', { - width: viewportSize.width, - height: viewportSize.height, - userId: this.userId - }); - } - - try { - const blocker = await PlaywrightBlocker.fromLists(fetch, ['https://easylist.to/easylist/easylist.txt']); - await blocker.enableBlockingInPage(this.currentPage); - this.client = await this.currentPage.context().newCDPSession(this.currentPage); - await blocker.disableBlockingInPage(this.currentPage); - console.log('Adblocker initialized'); - } catch (error: any) { - console.warn('Failed to initialize adblocker, continuing without it:', error.message); - // Still need to set up the CDP session even if blocker fails - this.client = await this.currentPage.context().newCDPSession(this.currentPage); - } + await new Promise(resolve => setTimeout(resolve, 500)); - this.emitLoadingProgress(60, 0); + const contextPromise = this.browser.newContext(contextOptions); + this.context = await Promise.race([ + contextPromise, + new Promise((_, reject) => { + setTimeout(() => reject(new Error('Context creation timed out after 15s')), 15000); + }) + ]) as BrowserContext; - success = true; - logger.log('debug', `Browser initialized successfully for user ${userId}`); + await this.applyEnhancedFingerprinting(this.context); + + await this.context.addInitScript( + `const defaultGetter = Object.getOwnPropertyDescriptor( + Navigator.prototype, + "webdriver" + ).get; + defaultGetter.apply(navigator); + defaultGetter.toString(); + Object.defineProperty(Navigator.prototype, "webdriver", { + set: undefined, + enumerable: true, + configurable: true, + get: new Proxy(defaultGetter, { + apply: (target, thisArg, args) => { + Reflect.apply(target, thisArg, args); + return false; + }, + }), + }); + const patchedGetter = Object.getOwnPropertyDescriptor( + Navigator.prototype, + "webdriver" + ).get; + patchedGetter.apply(navigator); + patchedGetter.toString();` + ); + + await this.context.addInitScript({ path: './server/src/browser-management/classes/rrweb-bundle.js' }); + + this.currentPage = await this.context.newPage(); + + this.emitLoadingProgress(40, 0); + + await this.setupPageEventListeners(this.currentPage); + + try { + const blocker = await PlaywrightBlocker.fromLists(fetch, ['https://easylist.to/easylist/easylist.txt']); + await blocker.enableBlockingInPage(this.currentPage); + this.client = await this.currentPage.context().newCDPSession(this.currentPage); + await blocker.disableBlockingInPage(this.currentPage); + console.log('Adblocker initialized'); + } catch (error: any) { + console.warn('Failed to initialize adblocker, continuing without it:', error.message); + // Still need to set up the CDP session even if blocker fails + this.client = await this.currentPage.context().newCDPSession(this.currentPage); + } + + this.emitLoadingProgress(60, 0); + + success = true; + logger.log('debug', `Browser initialized successfully for user ${userId}`); } catch (error: any) { - retryCount++; - logger.log('error', `Browser initialization failed (attempt ${retryCount}/${MAX_RETRIES}): ${error.message}`); - - if (this.browser) { - try { - await this.browser.close(); - } catch (closeError) { - logger.log('warn', `Failed to close browser during cleanup: ${closeError}`); - } - this.browser = null; - } - - if (retryCount >= MAX_RETRIES) { - throw new Error(`Failed to initialize browser after ${MAX_RETRIES} attempts: ${error.message}`); - } - - await new Promise(resolve => setTimeout(resolve, 1000)); - } - } + retryCount++; + logger.log('error', `Browser initialization failed (attempt ${retryCount}/${MAX_RETRIES}): ${error.message}`); - this.setupMemoryCleanup(); - // this.initializeMemoryManagement(); - }; - - public updateViewportInfo = async (): Promise => { - if (this.currentPage) { - const viewportSize = await this.currentPage.viewportSize(); - if (viewportSize) { - this.socket.emit('viewportInfo', { - width: viewportSize.width, - height: viewportSize.height, - userId: this.userId - }); - } - } - }; - - /** - * Extract data from a list of elements on a page - * @param page - Playwright Page object - * @param listSelector - CSS selector for the list container - * @param fields - Record of field configurations - * @param limit - Maximum number of items to extract (default: 5) - * @returns Promise>> - Array of extracted data objects - */ - private async extractListData( - page: Page, - listSelector: string, - fields: Record, - limit: number = 5 - ): Promise>> { - if (page.isClosed()) { - logger.warn("Page is closed, cannot extract list data"); - return []; - } - - return await page.evaluate( - async ({ listSelector, fields, limit }: { - listSelector: string; - fields: Record; - limit: number; - }) => { - const convertedFields: Record = {}; - - for (const [key, field] of Object.entries(fields)) { - convertedFields[field.label] = { - selector: field.selectorObj.selector, - attribute: field.selectorObj.attribute - }; - } - - const queryElement = (rootElement: Element | Document, selector: string): Element | null => { - if (!selector.includes('>>') && !selector.includes(':>>')) { - return rootElement.querySelector(selector); - } - - const parts = selector.split(/(?:>>|:>>)/).map(part => part.trim()); - let currentElement: Element | Document | null = rootElement; - - for (let i = 0; i < parts.length; i++) { - if (!currentElement) return null; - - if ((currentElement as Element).tagName === 'IFRAME' || (currentElement as Element).tagName === 'FRAME') { - try { - const frameElement = currentElement as HTMLIFrameElement | HTMLFrameElement; - const frameDoc = frameElement.contentDocument || frameElement.contentWindow?.document; - if (!frameDoc) return null; - currentElement = frameDoc.querySelector(parts[i]); - continue; - } catch (e) { - console.warn(`Cannot access ${(currentElement as Element).tagName.toLowerCase()} content:`, e); - return null; - } - } - - let nextElement: Element | null = null; - - if ('querySelector' in currentElement) { - nextElement = currentElement.querySelector(parts[i]); - } - - if (!nextElement && 'shadowRoot' in currentElement && (currentElement as Element).shadowRoot) { - nextElement = (currentElement as Element).shadowRoot!.querySelector(parts[i]); - } - - if (!nextElement && 'children' in currentElement) { - const children: any = Array.from((currentElement as Element).children || []); - for (const child of children) { - if (child.shadowRoot) { - nextElement = child.shadowRoot.querySelector(parts[i]); - if (nextElement) break; - } - } - } - - currentElement = nextElement; - } - - return currentElement as Element | null; - }; - - const queryElementAll = (rootElement: Element | Document, selector: string): Element[] => { - if (!selector.includes('>>') && !selector.includes(':>>')) { - return Array.from(rootElement.querySelectorAll(selector)); - } - - const parts = selector.split(/(?:>>|:>>)/).map(part => part.trim()); - let currentElements: (Element | Document)[] = [rootElement]; - - for (const part of parts) { - const nextElements: Element[] = []; - - for (const element of currentElements) { - if ((element as Element).tagName === 'IFRAME' || (element as Element).tagName === 'FRAME') { - try { - const frameElement = element as HTMLIFrameElement | HTMLFrameElement; - const frameDoc = frameElement.contentDocument || frameElement.contentWindow?.document; - if (frameDoc) { - nextElements.push(...Array.from(frameDoc.querySelectorAll(part))); - } - } catch (e) { - console.warn(`Cannot access ${(element as Element).tagName.toLowerCase()} content:`, e); - continue; - } - } else { - if ('querySelectorAll' in element) { - nextElements.push(...Array.from(element.querySelectorAll(part))); - } - - if ('shadowRoot' in element && (element as Element).shadowRoot) { - nextElements.push(...Array.from((element as Element).shadowRoot!.querySelectorAll(part))); - } - - if ('children' in element) { - const children = Array.from((element as Element).children || []); - for (const child of children) { - if (child.shadowRoot) { - nextElements.push(...Array.from(child.shadowRoot.querySelectorAll(part))); - } - } - } - } - } - - currentElements = nextElements; - } - - return currentElements as Element[]; - }; - - function extractValue(element: Element, attribute: string): string | null { - if (!element) return null; - - const baseURL = element.ownerDocument?.location?.href || window.location.origin; - - if (element.shadowRoot) { - const shadowContent = element.shadowRoot.textContent; - if (shadowContent?.trim()) { - return shadowContent.trim(); - } - } - - if (attribute === 'innerText') { - return (element as HTMLElement).innerText.trim(); - } else if (attribute === 'innerHTML') { - return element.innerHTML.trim(); - } else if (attribute === 'src' || attribute === 'href') { - if (attribute === 'href' && element.tagName !== 'A') { - const parentElement = element.parentElement; - if (parentElement && parentElement.tagName === 'A') { - const parentHref = parentElement.getAttribute('href'); - if (parentHref) { - try { - return new URL(parentHref, baseURL).href; - } catch (e) { - return parentHref; - } - } - } - } - - const attrValue = element.getAttribute(attribute); - const dataAttr = attrValue || element.getAttribute('data-' + attribute); - - if (!dataAttr || dataAttr.trim() === '') { - if (attribute === 'src') { - const style = window.getComputedStyle(element); - const bgImage = style.backgroundImage; - if (bgImage && bgImage !== 'none') { - const matches = bgImage.match(/url\(['"]?([^'")]+)['"]?\)/); - return matches ? new URL(matches[1], baseURL).href : null; - } - } - return null; - } - + if (this.browser) { try { - return new URL(dataAttr, baseURL).href; - } catch (e) { - console.warn('Error creating URL from', dataAttr, e); - return dataAttr; // Return the original value if URL construction fails + await this.browser.close(); + } catch (closeError) { + logger.log('warn', `Failed to close browser during cleanup: ${closeError}`); } + this.browser = null; } - return element.getAttribute(attribute); + + if (retryCount >= MAX_RETRIES) { + throw new Error(`Failed to initialize browser after ${MAX_RETRIES} attempts: ${error.message}`); + } + + await new Promise(resolve => setTimeout(resolve, 1000)); } - - function findTableAncestor(element: Element): { type: string; element: Element } | null { - let currentElement: Element | null = element; - const MAX_DEPTH = 5; - let depth = 0; - - while (currentElement && depth < MAX_DEPTH) { - if (currentElement.getRootNode() instanceof ShadowRoot) { - currentElement = (currentElement.getRootNode() as ShadowRoot).host; - continue; - } - - if (currentElement.tagName === 'TD') { - return { type: 'TD', element: currentElement }; - } else if (currentElement.tagName === 'TR') { - return { type: 'TR', element: currentElement }; - } - - if (currentElement.tagName === 'IFRAME' || currentElement.tagName === 'FRAME') { - try { - const frameElement = currentElement as HTMLIFrameElement | HTMLFrameElement; - currentElement = frameElement.contentDocument?.body || null; - } catch (e) { - return null; - } - } else { - currentElement = currentElement.parentElement; - } - depth++; - } - return null; - } - - function getCellIndex(td: Element): number { - if (td.getRootNode() instanceof ShadowRoot) { - const shadowRoot = td.getRootNode() as ShadowRoot; - const allCells = Array.from(shadowRoot.querySelectorAll('td')); - return allCells.indexOf(td as HTMLTableCellElement); - } - - let index = 0; - let sibling = td; - while (sibling = sibling.previousElementSibling as Element) { - index++; - } - return index; - } - - function hasThElement(row: Element, tableFields: Record): boolean { - for (const [_, { selector }] of Object.entries(tableFields)) { - const element = queryElement(row, selector); - if (element) { - let current: Element | ShadowRoot | Document | null = element; - while (current && current !== row) { - if (current.getRootNode() instanceof ShadowRoot) { - current = (current.getRootNode() as ShadowRoot).host; - continue; - } - - if ((current as Element).tagName === 'TH') return true; - - if ((current as Element).tagName === 'IFRAME' || (current as Element).tagName === 'FRAME') { - try { - const frameElement = current as HTMLIFrameElement | HTMLFrameElement; - current = frameElement.contentDocument?.body || null; - } catch (e) { - break; - } - } else { - current = (current as Element).parentElement; - } - } - } - } - return false; - } - - function filterRowsBasedOnTag(rows: Element[], tableFields: Record): Element[] { - for (const row of rows) { - if (hasThElement(row, tableFields)) { - return rows; - } - } - return rows.filter(row => { - const directTH = row.getElementsByTagName('TH').length === 0; - const shadowTH = row.shadowRoot ? - row.shadowRoot.querySelector('th') === null : true; - return directTH && shadowTH; - }); - } - - function calculateClassSimilarity(classList1: string[], classList2: string[]): number { - const set1 = new Set(classList1); - const set2 = new Set(classList2); - const intersection = new Set([...set1].filter(x => set2.has(x))); - const union = new Set([...set1, ...set2]); - return intersection.size / union.size; - } - - function findSimilarElements(baseElement: Element, similarityThreshold: number = 0.7): Element[] { - const baseClasses = Array.from(baseElement.classList); - if (baseClasses.length === 0) return []; - - const allElements: Element[] = []; - - allElements.push(...Array.from(document.getElementsByTagName(baseElement.tagName))); - - if (baseElement.getRootNode() instanceof ShadowRoot) { - const shadowHost = (baseElement.getRootNode() as ShadowRoot).host; - allElements.push(...Array.from(shadowHost.getElementsByTagName(baseElement.tagName))); - } - - const frames = [ - ...Array.from(document.getElementsByTagName('iframe')), - ...Array.from(document.getElementsByTagName('frame')) - ]; - - for (const frame of frames) { - try { - const frameElement = frame as HTMLIFrameElement | HTMLFrameElement; - const frameDoc = frameElement.contentDocument || frameElement.contentWindow?.document; - if (frameDoc) { - allElements.push(...Array.from(frameDoc.getElementsByTagName(baseElement.tagName))); - } - } catch (e) { - console.warn(`Cannot access ${frame.tagName.toLowerCase()} content:`, e); - } - } - - return allElements.filter(element => { - if (element === baseElement) return false; - const similarity = calculateClassSimilarity( - baseClasses, - Array.from(element.classList) - ); - return similarity >= similarityThreshold; - }); - } - - let containers = queryElementAll(document, listSelector); - - if (containers.length === 0) return []; - - if (limit > 1 && containers.length === 1) { - const baseContainer = containers[0]; - const similarContainers = findSimilarElements(baseContainer); - - if (similarContainers.length > 0) { - const newContainers = similarContainers.filter(container => - !container.matches(listSelector) - ); - containers = [...containers, ...newContainers]; - } - } - - const containerFields = containers.map(() => ({ - tableFields: {} as Record, - nonTableFields: {} as Record - })); - - containers.forEach((container, containerIndex) => { - for (const [label, field] of Object.entries(convertedFields)) { - const sampleElement = queryElement(container, field.selector); - - if (sampleElement) { - const ancestor = findTableAncestor(sampleElement); - if (ancestor) { - containerFields[containerIndex].tableFields[label] = { - ...field, - tableContext: ancestor.type, - cellIndex: ancestor.type === 'TD' ? getCellIndex(ancestor.element) : -1 - }; - } else { - containerFields[containerIndex].nonTableFields[label] = field; - } - } else { - containerFields[containerIndex].nonTableFields[label] = field; - } - } - }); - - const tableData: Array> = []; - const nonTableData: Array> = []; - - for (let containerIndex = 0; containerIndex < containers.length; containerIndex++) { - const container = containers[containerIndex]; - const { tableFields } = containerFields[containerIndex]; - - if (Object.keys(tableFields).length > 0) { - const firstField = Object.values(tableFields)[0]; - const firstElement = queryElement(container, firstField.selector); - let tableContext: Element | null = firstElement; - - while (tableContext && tableContext.tagName !== 'TABLE' && tableContext !== container) { - if (tableContext.getRootNode() instanceof ShadowRoot) { - tableContext = (tableContext.getRootNode() as ShadowRoot).host; - continue; - } - - if (tableContext.tagName === 'IFRAME' || tableContext.tagName === 'FRAME') { - try { - const frameElement = tableContext as HTMLIFrameElement | HTMLFrameElement; - tableContext = frameElement.contentDocument?.body || null; - } catch (e) { - break; - } - } else { - tableContext = tableContext.parentElement; - } - } - - if (tableContext) { - const rows: Element[] = []; - - rows.push(...Array.from(tableContext.getElementsByTagName('TR'))); - - if (tableContext.tagName === 'IFRAME' || tableContext.tagName === 'FRAME') { - try { - const frameElement = tableContext as HTMLIFrameElement | HTMLFrameElement; - const frameDoc = frameElement.contentDocument || frameElement.contentWindow?.document; - if (frameDoc) { - rows.push(...Array.from(frameDoc.getElementsByTagName('TR'))); - } - } catch (e) { - console.warn(`Cannot access ${tableContext.tagName.toLowerCase()} rows:`, e); - } - } - - const processedRows = filterRowsBasedOnTag(rows, tableFields); - - for (let rowIndex = 0; rowIndex < Math.min(processedRows.length, limit); rowIndex++) { - const record: Record = {}; - const currentRow = processedRows[rowIndex]; - - for (const [label, { selector, attribute, cellIndex }] of Object.entries(tableFields)) { - let element: Element | null = null; - - if (cellIndex !== undefined && cellIndex >= 0) { - let td: Element | null = currentRow.children[cellIndex] || null; - - if (!td && currentRow.shadowRoot) { - const shadowCells = currentRow.shadowRoot.children; - if (shadowCells && shadowCells.length > cellIndex) { - td = shadowCells[cellIndex]; - } - } - - if (td) { - element = queryElement(td, selector); - - if (!element && selector.split(/(?:>>|:>>)/).pop()?.includes('td:nth-child')) { - element = td; - } - - if (!element) { - const tagOnlySelector = selector.split('.')[0]; - element = queryElement(td, tagOnlySelector); - } - - if (!element) { - let currentElement: Element | null = td; - while (currentElement && currentElement.children.length > 0) { - let foundContentChild = false; - for (const child of Array.from(currentElement.children)) { - if (extractValue(child, attribute)) { - currentElement = child; - foundContentChild = true; - break; - } - } - if (!foundContentChild) break; - } - element = currentElement; - } - } - } else { - element = queryElement(currentRow, selector); - } - - if (element) { - const value = extractValue(element, attribute); - if (value !== null) { - record[label] = value; - } - } - } - - if (Object.keys(record).length > 0) { - tableData.push(record); - } - } - } - } - } - - for (let containerIndex = 0; containerIndex < containers.length; containerIndex++) { - if (nonTableData.length >= limit) break; - - const container = containers[containerIndex]; - const { nonTableFields } = containerFields[containerIndex]; - - if (Object.keys(nonTableFields).length > 0) { - const record: Record = {}; - - for (const [label, { selector, attribute }] of Object.entries(nonTableFields)) { - const relativeSelector = selector.split(/(?:>>|:>>)/).slice(-1)[0]; - const element = queryElement(container, relativeSelector); - - if (element) { - const value = extractValue(element, attribute); - if (value !== null) { - record[label] = value; - } - } - } - - if (Object.keys(record).length > 0) { - nonTableData.push(record); - } - } - } - - const scrapedData = [...tableData, ...nonTableData].slice(0, limit); - return scrapedData; - }, - { listSelector, fields, limit } - ) as Array>; - } + } + })(); + + const timeoutPromise = new Promise((_, reject) => { + setTimeout(() => reject(new Error(`Browser initialization timed out after ${OVERALL_INIT_TIMEOUT}ms`)), OVERALL_INIT_TIMEOUT); + }); + + await Promise.race([initializationPromise, timeoutPromise]); + }; /** * Captures a screenshot directly without running the workflow interpreter @@ -1332,133 +669,51 @@ export class RemoteBrowser { } }; + /** + * Removes all socket event listeners + */ + private removeAllSocketListeners(): void { + try { + this.socket.removeAllListeners('captureDirectScreenshot'); + this.socket.removeAllListeners('rerender'); + this.socket.removeAllListeners('settings'); + this.socket.removeAllListeners('changeTab'); + this.socket.removeAllListeners('addTab'); + this.socket.removeAllListeners('closeTab'); + this.socket.removeAllListeners('dom:scroll'); + + logger.debug(`Removed all socket listeners for user ${this.userId}`); + } catch (error: any) { + logger.warn(`Error removing socket listeners: ${error.message}`); + } + } + /** * Registers all event listeners needed for the recording editor session. * Should be called only once after the full initialization of the remote browser. * @returns void */ public registerEditorEvents = (): void => { - // For each event, include userId to make sure events are handled for the correct browser logger.log("debug", `Registering editor events for user: ${this.userId}`); - this.socket.on( - `captureDirectScreenshot:${this.userId}`, - async (settings) => { - logger.debug( - `Direct screenshot capture requested for user ${this.userId}` - ); - await this.captureDirectScreenshot(settings); - } - ); + this.removeAllSocketListeners(); - // For backward compatibility this.socket.on("captureDirectScreenshot", async (settings) => { await this.captureDirectScreenshot(settings); }); - // Listen for specific events for this user - this.socket.on(`rerender:${this.userId}`, async () => { - logger.debug(`Rerender event received for user ${this.userId}`); - if (this.renderingMode === "dom") { - await this.makeAndEmitDOMSnapshot(); - } else { - await this.makeAndEmitScreenshot(); - } - }); - this.socket.on("rerender", async () => { logger.debug( `General rerender event received, checking if for user ${this.userId}` ); - if (this.renderingMode === "dom") { - await this.makeAndEmitDOMSnapshot(); - } else { - await this.makeAndEmitScreenshot(); - } + await this.makeAndEmitDOMSnapshot(); }); - this.socket.on(`settings:${this.userId}`, (settings) => { - this.interpreterSettings = settings; - logger.debug(`Settings updated for user ${this.userId}`); - }); - - this.socket.on(`changeTab:${this.userId}`, async (tabIndex) => { - logger.debug( - `Tab change to ${tabIndex} requested for user ${this.userId}` - ); - await this.changeTab(tabIndex); - }); - - this.socket.on(`addTab:${this.userId}`, async () => { - logger.debug(`New tab requested for user ${this.userId}`); - await this.currentPage?.context().newPage(); - const lastTabIndex = this.currentPage - ? this.currentPage.context().pages().length - 1 - : 0; - await this.changeTab(lastTabIndex); - }); - - this.socket.on(`closeTab:${this.userId}`, async (tabInfo) => { - logger.debug( - `Close tab ${tabInfo.index} requested for user ${this.userId}` - ); - const page = this.currentPage?.context().pages()[tabInfo.index]; - if (page) { - if (tabInfo.isCurrent) { - if (this.currentPage?.context().pages()[tabInfo.index + 1]) { - // next tab - await this.changeTab(tabInfo.index + 1); - } else { - //previous tab - await this.changeTab(tabInfo.index - 1); - } - } - await page.close(); - logger.log( - "debug", - `Tab ${tabInfo.index} was closed for user ${ - this.userId - }, new tab count: ${this.currentPage?.context().pages().length}` - ); - } else { - logger.log( - "error", - `Tab index ${tabInfo.index} out of range for user ${this.userId}` - ); - } - }); - - this.socket.on( - `setViewportSize:${this.userId}`, - async (data: { width: number; height: number }) => { - const { width, height } = data; - logger.log( - "debug", - `Viewport size change to width=${width}, height=${height} requested for user ${this.userId}` - ); - - // Update the browser context's viewport dynamically - if (this.context && this.browser) { - this.context = await this.browser.newContext({ - viewport: { width, height }, - }); - logger.log( - "debug", - `Viewport size updated to width=${width}, height=${height} for user ${this.userId}` - ); - } - } - ); - - // For backward compatibility, also register the standard events - this.socket.on( - "settings", - (settings) => (this.interpreterSettings = settings) - ); this.socket.on( "changeTab", async (tabIndex) => await this.changeTab(tabIndex) ); + this.socket.on("addTab", async () => { await this.currentPage?.context().newPage(); const lastTabIndex = this.currentPage @@ -1466,6 +721,7 @@ export class RemoteBrowser { : 0; await this.changeTab(lastTabIndex); }); + this.socket.on("closeTab", async (tabInfo) => { const page = this.currentPage?.context().pages()[tabInfo.index]; if (page) { @@ -1479,78 +735,6 @@ export class RemoteBrowser { await page.close(); } }); - this.socket.on( - "setViewportSize", - async (data: { width: number; height: number }) => { - const { width, height } = data; - if (this.context && this.browser) { - this.context = await this.browser.newContext({ - viewport: { width, height }, - }); - } - } - ); - - this.socket.on( - "extractListData", - async (data: { - listSelector: string; - fields: Record; - currentListId: number; - pagination: any; - }) => { - if (this.currentPage) { - const extractedData = await this.extractListData( - this.currentPage, - data.listSelector, - data.fields - ); - - this.socket.emit("listDataExtracted", { - currentListId: data.currentListId, - data: extractedData, - }); - } - } - ); - }; - /** - * Subscribes the remote browser for a screencast session - * on [CDP](https://chromedevtools.github.io/devtools-protocol/) level, - * where screenshot is being sent through the socket - * every time the browser's active page updates. - * @returns {Promise} - */ - public subscribeToScreencast = async (): Promise => { - logger.log('debug', `Starting screencast for user: ${this.userId}`); - await this.startScreencast(); - if (!this.client) { - logger.log('warn', 'client is not initialized'); - return; - } - // Set flag to indicate screencast is active - this.isScreencastActive = true; - - await this.updateViewportInfo(); - - this.client.on('Page.screencastFrame', ({ data: base64, sessionId }) => { - // Only process if screencast is still active for this user - if (!this.isScreencastActive) { - return; - } - this.emitScreenshot(Buffer.from(base64, 'base64')) - setTimeout(async () => { - try { - if (!this.client || !this.isScreencastActive) { - logger.log('warn', 'client is not initialized'); - return; - } - await this.client.send('Page.screencastFrameAck', { sessionId: sessionId }); - } catch (e: any) { - logger.log('error', `Screencast error: ${e}`); - } - }, 100); - }); }; /** @@ -1563,10 +747,6 @@ export class RemoteBrowser { } try { - // Enable required CDP domains - await this.client.send("DOM.enable"); - await this.client.send("CSS.enable"); - this.isDOMStreamingActive = true; logger.info("DOM streaming started successfully"); @@ -1663,7 +843,7 @@ export class RemoteBrowser { error.message.includes("Target closed")) ) { logger.debug("DOM snapshot skipped due to page navigation or closure"); - return; // Don't emit error for navigation - this is expected + return; } logger.error("Failed to create rrweb snapshot:", error); @@ -1688,7 +868,7 @@ export class RemoteBrowser { } /** - * Stop DOM streaming - following screencast pattern + * Stop DOM streaming - following dom snapshot pattern */ private async stopDOM(): Promise { this.isDOMStreamingActive = false; @@ -1705,95 +885,148 @@ export class RemoteBrowser { this.pendingNetworkRequests = []; - if (this.client) { - try { - await this.client.send("DOM.disable"); - await this.client.send("CSS.disable"); - } catch (error) { - logger.warn("Error stopping DOM stream:", error); - } - } - logger.info("DOM streaming stopped successfully"); } /**rrweb-bundle - * Terminates the screencast session and closes the remote browser. + * Terminates the dom snapshot session and closes the remote browser. * If an interpretation was running it will be stopped. * @returns {Promise} */ public async switchOff(): Promise { + this.isDOMStreamingActive = false; + + if (this.domUpdateInterval) { + clearInterval(this.domUpdateInterval); + this.domUpdateInterval = null; + } + + if (this.memoryCleanupInterval) { + clearInterval(this.memoryCleanupInterval); + this.memoryCleanupInterval = null; + } + + if (this.memoryManagementInterval) { + clearInterval(this.memoryManagementInterval); + this.memoryManagementInterval = null; + } + + if (this.progressInterval) { + clearInterval(this.progressInterval); + this.progressInterval = null; + } + + if (this.snapshotDebounceTimeout) { + clearTimeout(this.snapshotDebounceTimeout); + this.snapshotDebounceTimeout = null; + } + + if (this.networkRequestTimeout) { + clearTimeout(this.networkRequestTimeout); + this.networkRequestTimeout = null; + } + + this.removeAllSocketListeners(); + try { - this.isScreencastActive = false; - this.isDOMStreamingActive = false; + if (this.currentPage) { + const isClosed = this.currentPage.isClosed(); + if (!isClosed) { + this.currentPage.removeAllListeners(); + logger.debug('Removed all page event listeners'); + } else { + logger.debug('Page already closed, skipping listener removal'); + } + } + } catch (error: any) { + logger.warn(`Error removing page listeners: ${error.message}`); + } + // Clean up Generator listeners to prevent memory leaks + if (this.generator) { + try { + this.generator.cleanup(); + logger.debug('Generator cleanup completed'); + } catch (error: any) { + logger.warn(`Error cleaning up generator: ${error.message}`); + } + } + + // Stop interpretation with individual error handling (also calls clearState which removes pausing listeners) + try { await this.interpreter.stopInterpretation(); - - if (this.screencastInterval) { - clearInterval(this.screencastInterval); - } - - if (this.domUpdateInterval) { - clearInterval(this.domUpdateInterval); - } - - if (this.client) { - await this.stopScreencast(); - await this.stopDOM(); - } - - if (this.browser) { - await this.browser.close(); - } - - this.screenshotQueue = []; - //this.performanceMonitor.reset(); - } catch (error) { - logger.error('Error during browser shutdown:', error); + logger.error("Error stopping interpretation during shutdown:", error); + } + + // Stop DOM streaming with individual error handling + try { + await this.stopDOM(); + } catch (error) { + logger.error("Error stopping DOM during shutdown:", error); + } + + try { + if (this.client && this.currentPage && !this.currentPage.isClosed()) { + const detachPromise = this.client.detach(); + const timeoutPromise = new Promise((_, reject) => + setTimeout(() => reject(new Error('CDP detach timeout')), 5000) + ); + await Promise.race([detachPromise, timeoutPromise]); + logger.debug('CDP session detached successfully'); + } + } catch (error: any) { + logger.warn(`Error detaching CDP session: ${error.message}`); + } finally { + this.client = null; + } + + try { + if (this.currentPage && !this.currentPage.isClosed()) { + const closePromise = this.currentPage.close(); + const timeoutPromise = new Promise((_, reject) => + setTimeout(() => reject(new Error('Page close timeout')), 5000) + ); + await Promise.race([closePromise, timeoutPromise]); + logger.debug('Current page closed successfully'); + } + } catch (error: any) { + logger.warn(`Error closing current page: ${error.message}`); + } finally { + this.currentPage = null; + } + + try { + if (this.context) { + const contextClosePromise = this.context.close(); + const timeoutPromise = new Promise((_, reject) => + setTimeout(() => reject(new Error('Context close timeout')), 5000) + ); + await Promise.race([contextClosePromise, timeoutPromise]); + logger.debug('Browser context closed successfully'); + } + } catch (error: any) { + logger.warn(`Error closing browser context: ${error.message}`); + } finally { + this.context = null; + } + + try { + if (this.browser) { + const browserClosePromise = this.browser.close(); + const timeoutPromise = new Promise((_, reject) => + setTimeout(() => reject(new Error('Browser close timeout')), 5000) + ); + await Promise.race([browserClosePromise, timeoutPromise]); + logger.debug('Browser closed successfully'); + } + } catch (error: any) { + logger.error("Error during browser close:", error); + } finally { + this.browser = null; } } - private async optimizeScreenshot(screenshot: Buffer): Promise { - try { - return await sharp(screenshot) - .png({ - quality: Math.round(SCREENCAST_CONFIG.compressionQuality * 100), - compressionLevel: 6, - adaptiveFiltering: true, - force: true - }) - .resize({ - width: SCREENCAST_CONFIG.maxWidth, - height: SCREENCAST_CONFIG.maxHeight, - fit: 'inside', - withoutEnlargement: true, - kernel: 'lanczos3' - }) - .toBuffer(); - } catch (error) { - logger.error('Screenshot optimization failed:', error); - return screenshot; - } - } - - - /** - * Makes and emits a single screenshot to the client side. - * @returns {Promise} - */ - public makeAndEmitScreenshot = async (): Promise => { - try { - const screenshot = await this.currentPage?.screenshot(); - if (screenshot) { - this.emitScreenshot(screenshot); - } - } catch (e) { - const { message } = e as Error; - logger.log('error', `Screenshot error: ${message}`); - } - }; - /** * Updates the active socket instance. * This will update all registered events for the socket and @@ -1806,6 +1039,10 @@ export class RemoteBrowser { this.registerEditorEvents(); this.generator?.updateSocket(socket); this.interpreter?.updateSocket(socket); + + if (this.isDOMStreamingActive) { + this.setupScrollEventListener(); + } }; /** @@ -1845,15 +1082,6 @@ export class RemoteBrowser { } }; - /** - * Stops the workflow interpretation and initializes a new page. - * @returns {Promise} - */ - public stopCurrentInterpretation = async (): Promise => { - await this.interpreter.stopInterpretation(); - await this.initializeNewPage(); - }; - /** * Returns the current page instance. * @returns {Page | null | undefined} @@ -1872,7 +1100,6 @@ export class RemoteBrowser { private changeTab = async (tabIndex: number): Promise => { const page = this.currentPage?.context().pages()[tabIndex]; if (page) { - await this.stopScreencast(); await this.stopDOM(); this.currentPage = page; @@ -1888,9 +1115,6 @@ export class RemoteBrowser { if (this.isDOMStreamingActive) { await this.makeAndEmitDOMSnapshot(); await this.subscribeToDOM(); - } else { - await this.makeAndEmitScreenshot(); - await this.subscribeToScreencast(); } } else { logger.log('error', `${tabIndex} index out of range of pages`) @@ -1903,7 +1127,6 @@ export class RemoteBrowser { * @returns {Promise} */ private initializeNewPage = async (options?: Object): Promise => { - await this.stopScreencast(); const newPage = options ? await this.browser?.newPage(options) : await this.browser?.newPage(); await newPage?.setExtraHTTPHeaders({ @@ -1915,161 +1138,9 @@ export class RemoteBrowser { if (this.currentPage) { await this.setupPageEventListeners(this.currentPage); - this.client = await this.currentPage.context().newCDPSession(this.currentPage); - if (this.renderingMode === "dom") { - await this.subscribeToDOM(); - } else { - await this.subscribeToScreencast(); - } + await this.subscribeToDOM(); } else { logger.log('error', 'Could not get a new page, returned undefined'); } }; - - /** - * Initiates screencast of the remote browser through socket, - * registers listener for rerender event and emits the loaded event. - * Should be called only once after the browser is fully initialized. - * @returns {Promise} - */ - private async startScreencast(): Promise { - if (!this.client) { - logger.warn('Client is not initialized'); - return; - } - - try { - await this.client.send('Page.startScreencast', { - format: SCREENCAST_CONFIG.format, - quality: Math.round(SCREENCAST_CONFIG.compressionQuality * 100), - maxWidth: SCREENCAST_CONFIG.maxWidth, - maxHeight: SCREENCAST_CONFIG.maxHeight, - everyNthFrame: 1 - }); - - this.isScreencastActive = true; - - this.client.on('Page.screencastFrame', async ({ data, sessionId }) => { - try { - if (this.screenshotQueue.length >= SCREENCAST_CONFIG.maxQueueSize && this.isProcessingScreenshot) { - await this.client?.send('Page.screencastFrameAck', { sessionId }); - return; - } - - const buffer = Buffer.from(data, 'base64'); - this.emitScreenshot(buffer); - - setTimeout(async () => { - try { - if (this.client) { - await this.client.send('Page.screencastFrameAck', { sessionId }); - } - } catch (e) { - logger.error('Error acknowledging screencast frame:', e); - } - }, 10); - } catch (error) { - logger.error('Screencast frame processing failed:', error); - - try { - await this.client?.send('Page.screencastFrameAck', { sessionId }); - } catch (ackError) { - logger.error('Failed to acknowledge screencast frame:', ackError); - } - } - }); - logger.info('Screencast started successfully'); - } catch (error) { - logger.error('Failed to start screencast:', error); - } - } - - private async stopScreencast(): Promise { - if (!this.client) { - logger.error('Client is not initialized'); - return; - } - - try { - // Set flag to indicate screencast is active - this.isScreencastActive = false; - await this.client.send('Page.stopScreencast'); - this.screenshotQueue = []; - this.isProcessingScreenshot = false; - logger.info('Screencast stopped successfully'); - } catch (error) { - logger.error('Failed to stop screencast:', error); - } - } - - - /** - * Helper for emitting the screenshot of browser's active page through websocket. - * @param payload the screenshot binary data - * @returns void - */ - private emitScreenshot = async (payload: Buffer, viewportSize?: { width: number, height: number }): Promise => { - if (this.screenshotQueue.length > SCREENCAST_CONFIG.maxQueueSize) { - this.screenshotQueue = this.screenshotQueue.slice(-1); - } - - if (this.isProcessingScreenshot) { - if (this.screenshotQueue.length < SCREENCAST_CONFIG.maxQueueSize) { - this.screenshotQueue.push(payload); - } - return; - } - - this.isProcessingScreenshot = true; - - try { - const optimizationPromise = this.optimizeScreenshot(payload); - const timeoutPromise = new Promise((resolve) => { - setTimeout(() => resolve(payload), 100); - }); - - const optimizedScreenshot = await Promise.race([optimizationPromise, timeoutPromise]); - const base64Data = optimizedScreenshot.toString('base64'); - const dataWithMimeType = `data:image/${SCREENCAST_CONFIG.format};base64,${base64Data}`; - - payload = null as any; - - setImmediate(async () => { - this.socket.emit('screencast', { - image: dataWithMimeType, - userId: this.userId, - viewport: viewportSize || await this.currentPage?.viewportSize() || null - }); - }); - } catch (error) { - logger.error('Screenshot emission failed:', error); - try { - const base64Data = payload.toString('base64'); - const dataWithMimeType = `data:image/png;base64,${base64Data}`; - - setImmediate(async () => { - this.socket.emit('screencast', { - image: dataWithMimeType, - userId: this.userId, - viewport: viewportSize || await this.currentPage?.viewportSize() || null - }); - }); - } catch (e) { - logger.error('Fallback screenshot emission also failed:', e); - } - } finally { - this.isProcessingScreenshot = false; - - if (this.screenshotQueue.length > 0) { - const nextScreenshot = this.screenshotQueue.shift(); - if (nextScreenshot) { - const delay = this.screenshotQueue.length > 0 ? 16 : 33; - setTimeout(() => { - this.emitScreenshot(nextScreenshot); - }, delay); - } - } - } - }; - } diff --git a/server/src/browser-management/controller.ts b/server/src/browser-management/controller.ts index a6db615e..4058fa56 100644 --- a/server/src/browser-management/controller.ts +++ b/server/src/browser-management/controller.ts @@ -31,22 +31,50 @@ export const initializeRemoteBrowserForRecording = (userId: string, mode: string if (activeId) { const remoteBrowser = browserPool.getRemoteBrowser(activeId); remoteBrowser?.updateSocket(socket); - await remoteBrowser?.makeAndEmitScreenshot(); + + if (remoteBrowser?.isDOMStreamingActive) { + remoteBrowser?.makeAndEmitDOMSnapshot(); + } } else { const browserSession = new RemoteBrowser(socket, userId, id); browserSession.interpreter.subscribeToPausing(); - await browserSession.initialize(userId); - await browserSession.registerEditorEvents(); - - if (mode === "dom") { - await browserSession.subscribeToDOM(); - logger.info('DOM streaming started for scraping browser in recording mode'); - } else { - await browserSession.subscribeToScreencast(); - logger.info('Screenshot streaming started for local browser in recording mode'); - } - browserPool.addRemoteBrowser(id, browserSession, userId, false, "recording"); + try { + await browserSession.initialize(userId); + await browserSession.registerEditorEvents(); + + await browserSession.subscribeToDOM(); + logger.info('DOM streaming started for remote browser in recording mode'); + + browserPool.addRemoteBrowser(id, browserSession, userId, false, "recording"); + } catch (initError: any) { + logger.error(`Failed to initialize browser for recording: ${initError.message}`); + logger.info('Sending browser failure notification to frontend'); + + socket.emit('dom-mode-error', { + userId: userId, + error: 'Failed to start the browser, please try again in some time.' + }); + + socket.emit('error', { + userId: userId, + message: 'Failed to start the browser, please try again in some time.', + details: initError.message + }); + + await new Promise(resolve => setTimeout(resolve, 100)); + + try { + await browserSession.switchOff(); + logger.debug('Cleaned up failed browser session'); + } catch (cleanupError: any) { + logger.warn(`Failed to cleanup browser session: ${cleanupError.message}`); + } + + logger.info('Browser initialization failed, user notified'); + + return id; + } } socket.emit('loaded'); }); @@ -69,7 +97,7 @@ export const createRemoteBrowserForRun = (userId: string): string => { const id = uuid(); - const slotReserved = browserPool.reserveBrowserSlot(id, userId, "run"); + const slotReserved = browserPool.reserveBrowserSlotAtomic(id, userId, "run"); if (!slotReserved) { logger.log('warn', `Cannot create browser for user ${userId}: no available slots`); throw new Error('User has reached maximum browser limit'); @@ -94,45 +122,78 @@ export const createRemoteBrowserForRun = (userId: string): string => { * @category BrowserManagement-Controller */ export const destroyRemoteBrowser = async (id: string, userId: string): Promise => { + const DESTROY_TIMEOUT = 30000; + + const destroyPromise = (async () => { + try { + const browserSession = browserPool.getRemoteBrowser(id); + if (!browserSession) { + logger.log('info', `Browser with id: ${id} not found, may have already been destroyed`); + return true; + } + + logger.log('debug', `Switching off the browser with id: ${id}`); + + try { + await browserSession.switchOff(); + } catch (switchOffError) { + logger.log('warn', `Error switching off browser ${id}: ${switchOffError}`); + } + + try { + const namespace = io.of(id); + + const sockets = await namespace.fetchSockets(); + for (const socket of sockets) { + socket.disconnect(true); + } + + namespace.removeAllListeners(); + + await new Promise(resolve => setTimeout(resolve, 100)); + + const nsps = (io as any)._nsps; + if (nsps && nsps.has(`/${id}`)) { + const ns = nsps.get(`/${id}`); + if (ns && ns.sockets && ns.sockets.size === 0) { + nsps.delete(`/${id}`); + logger.log('debug', `Deleted empty namespace /${id} from io._nsps Map`); + } else { + logger.log('warn', `Namespace /${id} still has ${ns?.sockets?.size || 0} sockets, skipping manual deletion`); + } + } + + logger.log('debug', `Cleaned up socket namespace for browser ${id}`); + } catch (namespaceCleanupError: any) { + logger.log('warn', `Error cleaning up socket namespace for browser ${id}: ${namespaceCleanupError.message}`); + } + + return browserPool.deleteRemoteBrowser(id); + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error); + logger.log('error', `Failed to destroy browser ${id}: ${errorMessage}`); + + try { + return browserPool.deleteRemoteBrowser(id); + } catch (deleteError) { + logger.log('error', `Failed to delete browser ${id} from pool: ${deleteError}`); + return false; + } + } + })(); + try { - const browserSession = browserPool.getRemoteBrowser(id); - if (!browserSession) { - logger.log('info', `Browser with id: ${id} not found, may have already been destroyed`); - return true; - } - - logger.log('debug', `Switching off the browser with id: ${id}`); - - try { - await browserSession.stopCurrentInterpretation(); - } catch (stopError) { - logger.log('warn', `Error stopping interpretation for browser ${id}: ${stopError}`); - } - - try { - await browserSession.switchOff(); - } catch (switchOffError) { - logger.log('warn', `Error switching off browser ${id}: ${switchOffError}`); - } + const timeoutPromise = new Promise((_, reject) => + setTimeout(() => reject(new Error(`Browser destruction timed out after ${DESTROY_TIMEOUT}ms`)), DESTROY_TIMEOUT) + ); - try { - const namespace = io.of(id); - namespace.removeAllListeners(); - namespace.disconnectSockets(true); - logger.log('debug', `Cleaned up socket namespace for browser ${id}`); - } catch (namespaceCleanupError: any) { - logger.log('warn', `Error cleaning up socket namespace for browser ${id}: ${namespaceCleanupError.message}`); - } - - return browserPool.deleteRemoteBrowser(id); - } catch (error) { - const errorMessage = error instanceof Error ? error.message : String(error); - logger.log('error', `Failed to destroy browser ${id}: ${errorMessage}`); - + return await Promise.race([destroyPromise, timeoutPromise]); + } catch (timeoutError: any) { + logger.log('error', `Browser ${id} destruction timeout: ${timeoutError.message} - force removing from pool`); try { return browserPool.deleteRemoteBrowser(id); } catch (deleteError) { - logger.log('error', `Failed to delete browser ${id} from pool: ${deleteError}`); + logger.log('error', `Failed to force delete browser ${id} after timeout: ${deleteError}`); return false; } } @@ -229,8 +290,8 @@ export const interpretWholeWorkflow = async (userId: string) => { export const stopRunningInterpretation = async (userId: string) => { const id = getActiveBrowserIdByState(userId, "recording"); if (id) { - const browser = browserPool.getRemoteBrowser(id); - await browser?.stopCurrentInterpretation(); + const browserSession = browserPool.getRemoteBrowser(id); + await browserSession?.switchOff(); } else { logger.log('error', 'Cannot stop interpretation: No active browser or generator.'); } @@ -264,7 +325,31 @@ const initializeBrowserAsync = async (id: string, userId: string) => { browserPool.failBrowserSlot(id); }); - const socket = await waitForConnection; + const connectWithRetry = async (maxRetries: number = 3): Promise => { + let retryCount = 0; + + while (retryCount < maxRetries) { + try { + const socket = await waitForConnection; + if (socket || retryCount === maxRetries - 1) { + return socket; + } + } catch (error: any) { + logger.log('warn', `Connection attempt ${retryCount + 1} failed for browser ${id}: ${error.message}`); + } + + retryCount++; + if (retryCount < maxRetries) { + const delay = Math.pow(2, retryCount) * 1000; + logger.log('info', `Retrying connection for browser ${id} in ${delay}ms (attempt ${retryCount + 1}/${maxRetries})`); + await new Promise(resolve => setTimeout(resolve, delay)); + } + } + + return null; + }; + + const socket = await connectWithRetry(3); try { let browserSession: RemoteBrowser; @@ -288,9 +373,17 @@ const initializeBrowserAsync = async (id: string, userId: string) => { logger.log('debug', `Starting browser initialization for ${id}`); try { - await browserSession.initialize(userId); - logger.log('debug', `Browser initialization completed for ${id}`); + const BROWSER_INIT_TIMEOUT = 45000; + logger.log('info', `Browser initialization starting with ${BROWSER_INIT_TIMEOUT/1000}s timeout`); + + const initPromise = browserSession.initialize(userId); + const timeoutPromise = new Promise((_, reject) => { + setTimeout(() => reject(new Error('Browser initialization timeout')), BROWSER_INIT_TIMEOUT); + }); + + await Promise.race([initPromise, timeoutPromise]); } catch (initError: any) { + logger.log('error', `Browser initialization failed for ${id}: ${initError.message}`); try { await browserSession.switchOff(); logger.log('info', `Cleaned up failed browser initialization for ${id}`); diff --git a/server/src/browser-management/inputHandlers.ts b/server/src/browser-management/inputHandlers.ts index 3e58664e..ee31ba06 100644 --- a/server/src/browser-management/inputHandlers.ts +++ b/server/src/browser-management/inputHandlers.ts @@ -7,9 +7,7 @@ import { Socket } from 'socket.io'; import logger from "../logger"; import { Coordinates, ScrollDeltas, KeyboardInput, DatePickerEventData } from '../types'; import { browserPool } from "../server"; -import { WorkflowGenerator } from "../workflow-management/classes/Generator"; import { Page } from "playwright"; -import { throttle } from "../../../src/helpers/inputHelpers"; import { CustomActions } from "../../../src/shared/types"; import { WhereWhatPair } from "maxun-core"; import { RemoteBrowser } from './classes/RemoteBrowser'; @@ -899,4 +897,37 @@ const registerInputHandlers = (socket: Socket, userId: string) => { socket.on("dom:addpair", (data) => onDOMWorkflowPair(data, userId)); }; -export default registerInputHandlers; +/** + * Removes all input handler socket listeners to prevent memory leaks + * Must be called when socket disconnects or browser session ends + * @param socket websocket with established connection + * @returns void + * @category BrowserManagement + */ +const removeInputHandlers = (socket: Socket) => { + try { + socket.removeAllListeners("input:mousedown"); + socket.removeAllListeners("input:wheel"); + socket.removeAllListeners("input:mousemove"); + socket.removeAllListeners("input:keydown"); + socket.removeAllListeners("input:keyup"); + socket.removeAllListeners("input:url"); + socket.removeAllListeners("input:refresh"); + socket.removeAllListeners("input:back"); + socket.removeAllListeners("input:forward"); + socket.removeAllListeners("input:date"); + socket.removeAllListeners("input:dropdown"); + socket.removeAllListeners("input:time"); + socket.removeAllListeners("input:datetime-local"); + socket.removeAllListeners("action"); + socket.removeAllListeners("dom:input"); + socket.removeAllListeners("dom:click"); + socket.removeAllListeners("dom:keypress"); + socket.removeAllListeners("dom:addpair"); + socket.removeAllListeners("removeAction"); + } catch (error: any) { + console.warn(`Error removing input handlers: ${error.message}`); + } +}; + +export { registerInputHandlers, removeInputHandlers }; diff --git a/server/src/socket-connection/connection.ts b/server/src/socket-connection/connection.ts index 109e50cb..88ad2c13 100644 --- a/server/src/socket-connection/connection.ts +++ b/server/src/socket-connection/connection.ts @@ -1,6 +1,6 @@ import { Namespace, Socket } from 'socket.io'; import logger from "../logger"; -import registerInputHandlers from '../browser-management/inputHandlers'; +import { registerInputHandlers, removeInputHandlers } from '../browser-management/inputHandlers'; /** * Opens a websocket canal for duplex data transfer and registers all handlers for this data for the recording session. @@ -17,7 +17,11 @@ export const createSocketConnection = ( const onConnection = async (socket: Socket) => { logger.log('info', "Client connected " + socket.id); registerInputHandlers(socket, userId); - socket.on('disconnect', () => logger.log('info', "Client disconnected " + socket.id)); + socket.on('disconnect', () => { + logger.log('info', "Client disconnected " + socket.id); + removeInputHandlers(socket); + logger.log('debug', "Input handlers cleaned up for socket " + socket.id); + }); callback(socket); } diff --git a/server/src/workflow-management/classes/Generator.ts b/server/src/workflow-management/classes/Generator.ts index 57a30863..bb19b465 100644 --- a/server/src/workflow-management/classes/Generator.ts +++ b/server/src/workflow-management/classes/Generator.ts @@ -71,6 +71,8 @@ export class WorkflowGenerator { private poolId: string | null = null; + private pageCloseListeners: Map void> = new Map(); + /** * The public constructor of the WorkflowGenerator. * Takes socket for communication as a parameter and registers some important events on it. @@ -884,6 +886,29 @@ export class WorkflowGenerator { } }; + /** + * Removes all socket listeners to prevent memory leaks + * Must be called before re-registering listeners or during cleanup + * @private + */ + private removeSocketListeners(): void { + try { + this.socket.removeAllListeners('setGetList'); + this.socket.removeAllListeners('listSelector'); + this.socket.removeAllListeners('setPaginationMode'); + this.socket.removeAllListeners('dom-mode-enabled'); + this.socket.removeAllListeners('screenshot-mode-enabled'); + this.socket.removeAllListeners('save'); + this.socket.removeAllListeners('new-recording'); + this.socket.removeAllListeners('activeIndex'); + this.socket.removeAllListeners('decision'); + this.socket.removeAllListeners('updatePair'); + logger.log('debug', 'Removed all Generator socket listeners'); + } catch (error: any) { + logger.warn(`Error removing Generator socket listeners: ${error.message}`); + } + } + /** * Removes an action with the given actionId from the workflow. * Only removes the specific action from the what array, not the entire pair. @@ -938,6 +963,39 @@ export class WorkflowGenerator { this.initializeDOMListeners(); }; + /** + * Cleanup method to release resources and prevent memory leaks + * Must be called when the generator is no longer needed + */ + public cleanup(): void { + try { + this.removeSocketListeners(); + + for (const [page, listener] of this.pageCloseListeners.entries()) { + try { + if (!page.isClosed()) { + page.removeListener('close', listener); + } + } catch (error: any) { + logger.warn(`Error removing page close listener: ${error.message}`); + } + } + this.pageCloseListeners.clear(); + + this.workflowRecord = { workflow: [] }; + this.generatedData = { + lastUsedSelector: '', + lastIndex: null, + lastAction: '', + lastUsedSelectorTagName: '', + lastUsedSelectorInnerText: '', + }; + logger.log('debug', 'Generator cleanup completed'); + } catch (error: any) { + logger.error(`Error during Generator cleanup: ${error.message}`); + } + } + /** * Returns the currently generated workflow without all the generated flag actions. * @param workflow The workflow for removing the generated flag actions from.