Files
parcer/server/src/browser-management/classes/RemoteBrowser.ts

1128 lines
39 KiB
TypeScript
Raw Normal View History

2024-06-01 10:55:04 +05:30
import {
Page,
Browser,
CDPSession,
2025-11-30 17:41:44 +05:30
BrowserContext
} from 'playwright-core';
2024-06-01 10:55:04 +05:30
import { Socket } from "socket.io";
2024-09-21 18:51:11 +05:30
import { PlaywrightBlocker } from '@cliqz/adblocker-playwright';
2024-07-20 05:57:15 +05:30
import fetch from 'cross-fetch';
2024-06-01 10:55:04 +05:30
import logger from '../../logger';
2025-04-01 23:25:47 +05:30
import { InterpreterSettings } from "../../types";
2024-06-01 10:55:04 +05:30
import { WorkflowGenerator } from "../../workflow-management/classes/Generator";
import { WorkflowInterpreter } from "../../workflow-management/classes/Interpreter";
import { getDecryptedProxyConfig } from '../../routes/proxy';
2024-12-09 17:57:28 +05:30
import { getInjectableScript } from 'idcac-playwright';
import { FingerprintInjector } from "fingerprint-injector";
import { FingerprintGenerator } from "fingerprint-generator";
2025-11-30 17:41:44 +05:30
import { connectToRemoteBrowser } from '../browserConnection';
2025-06-23 13:21:46 +05:30
declare global {
interface Window {
rrwebSnapshot?: any;
}
}
interface RRWebSnapshot {
type: number;
childNodes?: RRWebSnapshot[];
tagName?: string;
attributes?: Record<string, string>;
textContent?: string;
id: number;
[key: string]: any;
}
interface ProcessedSnapshot {
snapshot: RRWebSnapshot;
baseUrl: string;
}
2025-01-06 11:47:56 +05:30
const MEMORY_CONFIG = {
2025-03-23 16:24:41 +05:30
gcInterval: 20000, // Check memory more frequently (20s instead of 60s)
maxHeapSize: 1536 * 1024 * 1024, // 1.5GB
heapUsageThreshold: 0.7 // 70% (reduced threshold to react earlier)
2025-01-06 11:47:56 +05:30
};
2024-06-01 10:55:04 +05:30
/**
* This class represents a remote browser instance.
* It is used to allow a variety of interaction with the Playwright's browser instance.
* Every remote browser holds an instance of a generator and interpreter classes with
* the purpose of generating and interpreting workflows.
* @category BrowserManagement
*/
export class RemoteBrowser {
/**
* Playwright's [browser](https://playwright.dev/docs/api/class-browser) instance.
* @private
*/
private browser: Browser | null = null;
private context: BrowserContext | null = null;
2024-06-01 10:55:04 +05:30
/**
* The Playwright's [CDPSession](https://playwright.dev/docs/api/class-cdpsession) instance,
* used to talk raw Chrome Devtools Protocol.
* @private
*/
2024-06-01 11:05:45 +05:30
private client: CDPSession | null | undefined = null;
2024-06-01 10:55:04 +05:30
/**
* Socket.io socket instance enabling communication with the client (frontend) side.
* @private
*/
2024-06-01 11:05:45 +05:30
private socket: Socket;
2024-06-01 10:55:04 +05:30
/**
* The Playwright's [Page](https://playwright.dev/docs/api/class-page) instance
* as current interactive remote browser's page.
* @private
*/
2024-06-01 11:05:45 +05:30
private currentPage: Page | null | undefined = null;
2024-06-01 10:55:04 +05:30
/**
* Interpreter settings for any started interpretation.
* @private
*/
private interpreterSettings: InterpreterSettings = {
debug: false,
maxConcurrency: 1,
maxRepeats: 1,
};
2025-01-06 21:11:11 +05:30
/**
* The user ID that owns this browser instance
* @private
*/
private userId: string;
private lastEmittedUrl: string | null = null;
2024-06-01 10:55:04 +05:30
/**
* {@link WorkflowGenerator} instance specific to the remote browser.
*/
public generator: WorkflowGenerator;
/**
* {@link WorkflowInterpreter} instance specific to the remote browser.
*/
public interpreter: WorkflowInterpreter;
public isDOMStreamingActive: boolean = false;
2025-06-23 13:21:46 +05:30
private domUpdateInterval: NodeJS.Timeout | null = null;
private lastScrollPosition = { x: 0, y: 0 };
private scrollThreshold = 200; // pixels
private snapshotDebounceTimeout: NodeJS.Timeout | null = null;
private networkRequestTimeout: NodeJS.Timeout | null = null;
private pendingNetworkRequests: string[] = [];
2025-11-03 22:55:14 +05:30
private readonly INITIAL_LOAD_QUIET_PERIOD = 3000;
private networkWaitStartTime: number = 0;
private progressInterval: NodeJS.Timeout | null = null;
private hasShownInitialLoader: boolean = false;
private isInitialLoadInProgress: boolean = false;
2025-06-23 13:21:46 +05:30
private memoryCleanupInterval: NodeJS.Timeout | null = null;
private memoryManagementInterval: NodeJS.Timeout | null = null;
2024-06-01 10:55:04 +05:30
/**
* Initializes a new instances of the {@link Generator} and {@link WorkflowInterpreter} classes and
* assigns the socket instance everywhere.
* @param socket socket.io socket instance used to communicate with the client side
* @constructor
*/
2025-04-12 19:19:08 +05:30
public constructor(socket: Socket, userId: string, poolId: string) {
2024-06-01 10:55:04 +05:30
this.socket = socket;
2025-03-06 05:11:06 +05:30
this.userId = userId;
2024-06-01 10:55:04 +05:30
this.interpreter = new WorkflowInterpreter(socket);
2025-04-12 19:19:08 +05:30
this.generator = new WorkflowGenerator(socket, poolId);
2024-06-01 10:55:04 +05:30
}
2025-06-23 13:21:46 +05:30
private async processRRWebSnapshot(
snapshot: RRWebSnapshot
): Promise<ProcessedSnapshot> {
const baseUrl = this.currentPage?.url() || "";
return {
snapshot,
baseUrl
2025-06-23 13:21:46 +05:30
};
}
private initializeMemoryManagement(): void {
this.memoryManagementInterval = setInterval(() => {
const memoryUsage = process.memoryUsage();
const heapUsageRatio = memoryUsage.heapUsed / MEMORY_CONFIG.maxHeapSize;
if (heapUsageRatio > MEMORY_CONFIG.heapUsageThreshold * 1.2) {
logger.warn(
"Critical memory pressure detected, triggering emergency cleanup"
);
this.performMemoryCleanup();
} else if (heapUsageRatio > MEMORY_CONFIG.heapUsageThreshold) {
logger.warn("High memory usage detected, triggering cleanup");
if (
global.gc &&
heapUsageRatio > MEMORY_CONFIG.heapUsageThreshold * 1.1
) {
global.gc();
}
}
}, MEMORY_CONFIG.gcInterval);
}
2025-01-06 11:47:38 +05:30
private async performMemoryCleanup(): Promise<void> {
if (global.gc) {
try {
global.gc();
logger.info("Garbage collection requested");
} catch (error) {
logger.error("Error during garbage collection:", error);
2025-01-06 11:47:38 +05:30
}
}
if (this.currentPage) {
try {
await new Promise((resolve) => setTimeout(resolve, 500));
logger.info("CDP session reset completed");
} catch (error) {
logger.error("Error resetting CDP session:", error);
2025-01-06 11:47:38 +05:30
}
}
this.socket.emit("memory-cleanup", {
userId: this.userId,
timestamp: Date.now(),
});
2025-01-06 11:47:38 +05:30
}
/**
* Normalizes URLs to prevent navigation loops while maintaining consistent format
*/
private normalizeUrl(url: string): string {
try {
const parsedUrl = new URL(url);
// Remove trailing slashes except for root path
parsedUrl.pathname = parsedUrl.pathname.replace(/\/+$/, '') || '/';
// Ensure consistent protocol handling
parsedUrl.protocol = parsedUrl.protocol.toLowerCase();
return parsedUrl.toString();
} catch {
return url;
}
2024-12-13 21:21:00 +05:30
}
/**
* Determines if a URL change is significant enough to emit
*/
private shouldEmitUrlChange(newUrl: string): boolean {
if (!this.lastEmittedUrl) {
return true;
}
const normalizedNew = this.normalizeUrl(newUrl);
const normalizedLast = this.normalizeUrl(this.lastEmittedUrl);
return normalizedNew !== normalizedLast;
}
2025-06-23 13:21:46 +05:30
/**
* Setup scroll event listener to track user scrolling
*/
private setupScrollEventListener(): void {
try {
this.socket.removeAllListeners('dom:scroll');
} catch (error: any) {
logger.warn(`Error removing old scroll listener: ${error.message}`);
}
2025-06-23 13:21:46 +05:30
this.socket.on(
"dom:scroll",
async (data: { deltaX: number; deltaY: number }) => {
if (!this.isDOMStreamingActive || !this.currentPage) return;
try {
await this.currentPage.mouse.wheel(data.deltaX, data.deltaY);
const scrollInfo = await this.currentPage.evaluate(() => ({
x: window.scrollX,
y: window.scrollY,
maxX: Math.max(0, document.documentElement.scrollWidth - window.innerWidth),
maxY: Math.max(0, document.documentElement.scrollHeight - window.innerHeight),
2025-06-23 13:21:46 +05:30
documentHeight: document.documentElement.scrollHeight,
viewportHeight: window.innerHeight,
}));
const scrollDelta =
Math.abs(scrollInfo.y - this.lastScrollPosition.y) +
Math.abs(scrollInfo.x - this.lastScrollPosition.x);
if (scrollDelta > this.scrollThreshold) {
this.lastScrollPosition = { x: scrollInfo.x, y: scrollInfo.y };
if (this.snapshotDebounceTimeout) {
clearTimeout(this.snapshotDebounceTimeout);
}
this.snapshotDebounceTimeout = setTimeout(async () => {
await this.makeAndEmitDOMSnapshot();
}, 300);
}
} catch (error) {
logger.error("Error handling scroll event:", error);
}
}
);
}
private setupPageChangeListeners(): void {
if (!this.currentPage) return;
try {
if (!this.currentPage.isClosed()) {
this.currentPage.removeAllListeners("domcontentloaded");
this.currentPage.removeAllListeners("response");
}
} catch (error: any) {
logger.warn(`Error removing page change listeners: ${error.message}`);
}
2025-06-23 13:21:46 +05:30
this.currentPage.on("domcontentloaded", async () => {
2025-11-03 22:55:14 +05:30
if (!this.isInitialLoadInProgress) {
logger.info("DOM content loaded - triggering snapshot");
await this.makeAndEmitDOMSnapshot();
}
2025-06-23 13:21:46 +05:30
});
this.currentPage.on("response", async (response) => {
const url = response.url();
2025-11-03 22:55:14 +05:30
const isDocumentRequest = response.request().resourceType() === "document";
if (!this.hasShownInitialLoader && isDocumentRequest && !url.includes("about:blank")) {
this.hasShownInitialLoader = true;
this.isInitialLoadInProgress = true;
this.pendingNetworkRequests.push(url);
if (this.networkRequestTimeout) {
clearTimeout(this.networkRequestTimeout);
this.networkRequestTimeout = null;
}
2025-11-03 22:55:14 +05:30
if (this.progressInterval) {
clearInterval(this.progressInterval);
this.progressInterval = null;
}
this.networkWaitStartTime = Date.now();
2025-11-03 22:55:14 +05:30
this.progressInterval = setInterval(() => {
const elapsed = Date.now() - this.networkWaitStartTime;
const navigationProgress = Math.min((elapsed / this.INITIAL_LOAD_QUIET_PERIOD) * 40, 35);
const totalProgress = 60 + navigationProgress;
this.emitLoadingProgress(totalProgress, this.pendingNetworkRequests.length);
}, 500);
logger.debug(
2025-11-03 22:55:14 +05:30
`Initial load network request received: ${url}. Using ${this.INITIAL_LOAD_QUIET_PERIOD}ms quiet period`
);
this.networkRequestTimeout = setTimeout(async () => {
logger.info(
2025-11-03 22:55:14 +05:30
`Initial load network quiet period reached (${this.INITIAL_LOAD_QUIET_PERIOD}ms)`
);
2025-11-03 22:55:14 +05:30
if (this.progressInterval) {
clearInterval(this.progressInterval);
this.progressInterval = null;
}
this.emitLoadingProgress(100, this.pendingNetworkRequests.length);
this.pendingNetworkRequests = [];
this.networkRequestTimeout = null;
2025-11-03 22:55:14 +05:30
this.isInitialLoadInProgress = false;
2025-06-23 13:21:46 +05:30
await this.makeAndEmitDOMSnapshot();
2025-11-03 22:55:14 +05:30
}, this.INITIAL_LOAD_QUIET_PERIOD);
2025-06-23 13:21:46 +05:30
}
});
}
2025-11-03 22:55:14 +05:30
private emitLoadingProgress(progress: number, pendingRequests: number): void {
this.socket.emit("domLoadingProgress", {
progress: Math.round(progress),
pendingRequests,
userId: this.userId,
timestamp: Date.now(),
});
}
private async setupPageEventListeners(page: Page) {
try {
page.removeAllListeners('framenavigated');
page.removeAllListeners('load');
logger.debug('Removed existing page event listeners before re-registering');
} catch (error: any) {
logger.warn(`Error removing existing page listeners: ${error.message}`);
}
page.on('framenavigated', async (frame) => {
if (frame === page.mainFrame()) {
const currentUrl = page.url();
if (this.shouldEmitUrlChange(currentUrl)) {
this.lastEmittedUrl = currentUrl;
this.socket.emit('urlChanged', {url: currentUrl, userId: this.userId});
}
}
});
// Handle page load events with retry mechanism
2024-12-13 21:21:00 +05:30
page.on('load', async () => {
const injectScript = async (): Promise<boolean> => {
try {
await page.waitForLoadState('networkidle', { timeout: 5000 });
2024-12-13 21:21:00 +05:30
if (page.isClosed()) {
logger.debug('Page is closed, cannot inject script');
return false;
}
await page.evaluate(getInjectableScript());
return true;
} catch (error: any) {
logger.log('warn', `Script injection attempt failed: ${error.message}`);
return false;
}
};
const success = await injectScript();
console.log("Script injection result:", success);
});
}
2024-12-14 22:30:50 +05:30
private getUserAgent() {
const userAgents = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.5845.140 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:117.0) Gecko/20100101 Firefox/117.0',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.1938.81 Safari/537.36 Edg/116.0.1938.81',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.5845.96 Safari/537.36 OPR/101.0.4843.25',
'Mozilla/5.0 (Windows NT 11.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.5938.62 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:118.0) Gecko/20100101 Firefox/118.0',
];
2025-01-06 19:34:13 +05:30
2024-12-14 22:30:50 +05:30
return userAgents[Math.floor(Math.random() * userAgents.length)];
}
/**
* Apply modern fingerprint-suite injection
*/
private async applyEnhancedFingerprinting(context: BrowserContext): Promise<void> {
try {
try {
const fingerprintGenerator = new FingerprintGenerator();
const fingerprint = fingerprintGenerator.getFingerprint();
const fingerprintInjector = new FingerprintInjector();
await fingerprintInjector.attachFingerprintToPlaywright(context as any, fingerprint);
logger.info("Enhanced fingerprinting applied successfully");
} catch (fingerprintError: any) {
logger.warn(`Modern fingerprint injection failed: ${fingerprintError.message}. Using existing protection.`);
}
} catch (error: any) {
logger.error(`Enhanced fingerprinting failed: ${error.message}`);
// Don't throw - fallback to basic functionality
}
}
/**
* An asynchronous constructor for asynchronously initialized properties.
* Must be called right after creating an instance of RemoteBrowser class.
* @param options remote browser options to be used when launching the browser
* @returns {Promise<void>}
*/
2024-11-03 02:59:30 +05:30
public initialize = async (userId: string): Promise<void> => {
const MAX_RETRIES = 3;
const OVERALL_INIT_TIMEOUT = 120000;
let retryCount = 0;
let success = false;
2025-11-03 22:55:14 +05:30
this.socket.emit("dom-snapshot-loading", {
userId: this.userId,
timestamp: Date.now(),
});
this.emitLoadingProgress(0, 0);
const initializationPromise = (async () => {
while (!success && retryCount < MAX_RETRIES) {
try {
2025-11-30 19:45:54 +05:30
this.browser = await connectToRemoteBrowser();
if (!this.browser || this.browser.isConnected() === false) {
2025-11-30 19:45:54 +05:30
throw new Error('Browser failed to launch or is not connected');
}
2025-11-03 22:55:14 +05:30
this.emitLoadingProgress(20, 0);
const proxyConfig = await getDecryptedProxyConfig(userId);
let proxyOptions: { server: string, username?: string, password?: string } = { server: '' };
if (proxyConfig.proxy_url) {
proxyOptions = {
server: proxyConfig.proxy_url,
...(proxyConfig.proxy_username && proxyConfig.proxy_password && {
username: proxyConfig.proxy_username,
password: proxyConfig.proxy_password,
}),
};
}
const contextOptions: any = {
// viewport: { height: 400, width: 900 },
// recordVideo: { dir: 'videos/' }
// Force reduced motion to prevent animation issues
reducedMotion: 'reduce',
// Force JavaScript to be enabled
javaScriptEnabled: true,
// Set a reasonable timeout
timeout: 50000,
// Disable hardware acceleration
forcedColors: 'none',
isMobile: false,
hasTouch: false,
userAgent: this.getUserAgent(),
};
2025-11-03 22:55:14 +05:30
if (proxyOptions.server) {
contextOptions.proxy = {
server: proxyOptions.server,
username: proxyOptions.username ? proxyOptions.username : undefined,
password: proxyOptions.password ? proxyOptions.password : undefined,
};
}
2025-11-03 22:55:14 +05:30
await new Promise(resolve => setTimeout(resolve, 500));
2025-03-23 16:24:41 +05:30
const contextPromise = this.browser.newContext(contextOptions);
this.context = await Promise.race([
contextPromise,
new Promise<never>((_, reject) => {
setTimeout(() => reject(new Error('Context creation timed out after 15s')), 15000);
})
]) as BrowserContext;
await this.applyEnhancedFingerprinting(this.context);
await this.context.addInitScript(
`const defaultGetter = Object.getOwnPropertyDescriptor(
Navigator.prototype,
"webdriver"
).get;
defaultGetter.apply(navigator);
defaultGetter.toString();
Object.defineProperty(Navigator.prototype, "webdriver", {
set: undefined,
enumerable: true,
configurable: true,
get: new Proxy(defaultGetter, {
apply: (target, thisArg, args) => {
Reflect.apply(target, thisArg, args);
return false;
},
}),
});
const patchedGetter = Object.getOwnPropertyDescriptor(
Navigator.prototype,
"webdriver"
).get;
patchedGetter.apply(navigator);
patchedGetter.toString();`
);
await this.context.addInitScript({ path: './server/src/browser-management/classes/rrweb-bundle.js' });
this.currentPage = await this.context.newPage();
this.emitLoadingProgress(40, 0);
await this.setupPageEventListeners(this.currentPage);
try {
const blocker = await PlaywrightBlocker.fromLists(fetch, ['https://easylist.to/easylist/easylist.txt']);
await blocker.enableBlockingInPage(this.currentPage);
this.client = await this.currentPage.context().newCDPSession(this.currentPage);
await blocker.disableBlockingInPage(this.currentPage);
console.log('Adblocker initialized');
} catch (error: any) {
console.warn('Failed to initialize adblocker, continuing without it:', error.message);
// Still need to set up the CDP session even if blocker fails
this.client = await this.currentPage.context().newCDPSession(this.currentPage);
}
2024-06-01 10:57:32 +05:30
this.emitLoadingProgress(60, 0);
2025-03-14 12:40:28 +05:30
success = true;
logger.log('debug', `Browser initialized successfully for user ${userId}`);
} catch (error: any) {
retryCount++;
logger.log('error', `Browser initialization failed (attempt ${retryCount}/${MAX_RETRIES}): ${error.message}`);
if (this.browser) {
try {
await this.browser.close();
} catch (closeError) {
logger.log('warn', `Failed to close browser during cleanup: ${closeError}`);
}
this.browser = null;
}
if (retryCount >= MAX_RETRIES) {
throw new Error(`Failed to initialize browser after ${MAX_RETRIES} attempts: ${error.message}`);
}
await new Promise(resolve => setTimeout(resolve, 1000));
}
}
})();
const timeoutPromise = new Promise<never>((_, reject) => {
setTimeout(() => reject(new Error(`Browser initialization timed out after ${OVERALL_INIT_TIMEOUT}ms`)), OVERALL_INIT_TIMEOUT);
});
await Promise.race([initializationPromise, timeoutPromise]);
};
/**
* Captures a screenshot directly without running the workflow interpreter
* @param settings Screenshot settings containing fullPage, type, etc.
* @returns Promise<void>
*/
public captureDirectScreenshot = async (settings: {
fullPage: boolean;
type: 'png' | 'jpeg';
timeout?: number;
animations?: 'disabled' | 'allow';
caret?: 'hide' | 'initial';
scale?: 'css' | 'device';
}): Promise<void> => {
if (!this.currentPage) {
logger.error("No current page available for screenshot");
this.socket.emit('screenshotError', {
userId: this.userId,
error: 'No active page available'
});
return;
}
try {
this.socket.emit('screenshotCaptureStarted', {
userId: this.userId,
fullPage: settings.fullPage
});
const screenshotBuffer = await this.currentPage.screenshot({
fullPage: settings.fullPage,
type: settings.type || 'png',
timeout: settings.timeout || 30000,
animations: settings.animations || 'allow',
caret: settings.caret || 'hide',
scale: settings.scale || 'device'
});
const base64Data = screenshotBuffer.toString('base64');
const mimeType = `image/${settings.type || 'png'}`;
const dataUrl = `data:${mimeType};base64,${base64Data}`;
this.socket.emit('directScreenshotCaptured', {
userId: this.userId,
screenshot: dataUrl,
mimeType: mimeType,
fullPage: settings.fullPage,
timestamp: Date.now()
});
} catch (error) {
logger.error('Failed to capture direct screenshot:', error);
this.socket.emit('screenshotError', {
userId: this.userId,
error: error instanceof Error ? error.message : 'Unknown error occurred'
});
}
};
/**
* Removes all socket event listeners
*/
private removeAllSocketListeners(): void {
try {
this.socket.removeAllListeners('captureDirectScreenshot');
this.socket.removeAllListeners('rerender');
this.socket.removeAllListeners('settings');
this.socket.removeAllListeners('changeTab');
this.socket.removeAllListeners('addTab');
this.socket.removeAllListeners('closeTab');
this.socket.removeAllListeners('dom:scroll');
logger.debug(`Removed all socket listeners for user ${this.userId}`);
} catch (error: any) {
logger.warn(`Error removing socket listeners: ${error.message}`);
}
}
2024-06-01 10:57:32 +05:30
/**
* Registers all event listeners needed for the recording editor session.
* Should be called only once after the full initialization of the remote browser.
* @returns void
*/
2024-06-01 11:05:45 +05:30
public registerEditorEvents = (): void => {
2025-07-06 21:44:19 +05:30
logger.log("debug", `Registering editor events for user: ${this.userId}`);
this.removeAllSocketListeners();
2025-07-06 21:44:19 +05:30
this.socket.on("captureDirectScreenshot", async (settings) => {
await this.captureDirectScreenshot(settings);
});
this.socket.on("rerender", async () => {
logger.debug(
`General rerender event received, checking if for user ${this.userId}`
);
await this.makeAndEmitDOMSnapshot();
2025-07-06 21:44:19 +05:30
});
this.socket.on(
"changeTab",
async (tabIndex) => await this.changeTab(tabIndex)
);
2025-07-06 21:44:19 +05:30
this.socket.on("addTab", async () => {
await this.currentPage?.context().newPage();
const lastTabIndex = this.currentPage
? this.currentPage.context().pages().length - 1
: 0;
await this.changeTab(lastTabIndex);
});
2025-07-06 21:44:19 +05:30
this.socket.on("closeTab", async (tabInfo) => {
const page = this.currentPage?.context().pages()[tabInfo.index];
if (page) {
if (tabInfo.isCurrent) {
if (this.currentPage?.context().pages()[tabInfo.index + 1]) {
await this.changeTab(tabInfo.index + 1);
} else {
await this.changeTab(tabInfo.index - 1);
}
2025-07-06 21:44:19 +05:30
}
await page.close();
}
});
2024-06-01 10:58:23 +05:30
};
2024-06-01 10:59:15 +05:30
2025-06-23 13:21:46 +05:30
/**
* Subscribe to DOM streaming - simplified version following screenshot pattern
*/
public async subscribeToDOM(): Promise<void> {
if (!this.client) {
logger.warn("DOM streaming requires scraping browser with CDP client");
return;
}
try {
this.isDOMStreamingActive = true;
logger.info("DOM streaming started successfully");
this.setupScrollEventListener();
this.setupPageChangeListeners();
} catch (error) {
logger.error("Failed to start DOM streaming:", error);
this.isDOMStreamingActive = false;
}
}
/**
* CDP-based DOM snapshot creation using captured network resources
*/
public async makeAndEmitDOMSnapshot(): Promise<void> {
if (!this.currentPage || !this.isDOMStreamingActive) {
2025-06-23 13:21:46 +05:30
return;
}
try {
// Check if page is still valid and not closed
if (this.currentPage.isClosed()) {
logger.debug("Skipping DOM snapshot - page is closed");
return;
}
// Double-check page state after network wait
if (this.currentPage.isClosed()) {
logger.debug("Skipping DOM snapshot - page closed during network wait");
return;
}
// Get current scroll position
const currentScrollInfo = await this.currentPage.evaluate(() => ({
x: window.scrollX,
y: window.scrollY,
maxX: Math.max(
0,
document.documentElement.scrollWidth - window.innerWidth
),
maxY: Math.max(
0,
document.documentElement.scrollHeight - window.innerHeight
),
documentHeight: document.documentElement.scrollHeight,
}));
logger.info(
`Creating rrweb snapshot at scroll position: ${currentScrollInfo.y}/${currentScrollInfo.maxY}`
);
// Update our tracked scroll position
this.lastScrollPosition = {
x: currentScrollInfo.x,
y: currentScrollInfo.y,
};
// Final check before snapshot
if (this.currentPage.isClosed()) {
logger.debug("Skipping DOM snapshot - page closed before snapshot");
return;
}
// Capture snapshot using rrweb
const rawSnapshot = await this.currentPage.evaluate(() => {
if (typeof window.rrwebSnapshot === "undefined") {
throw new Error("rrweb-snapshot library not available");
}
return window.rrwebSnapshot.snapshot(document, {
2025-09-16 22:48:15 +05:30
inlineImages: false,
collectFonts: true,
});
2025-06-23 13:21:46 +05:30
});
// Process the snapshot to proxy resources
const processedSnapshot = await this.processRRWebSnapshot(rawSnapshot);
// Add scroll position information
const enhancedSnapshot = {
...processedSnapshot,
scrollPosition: currentScrollInfo,
captureTime: Date.now(),
};
// Emit the processed snapshot
this.emitRRWebSnapshot(enhancedSnapshot);
} catch (error) {
// Handle navigation context destruction gracefully
if (
error instanceof Error &&
(error.message.includes("Execution context was destroyed") ||
2025-06-23 13:21:46 +05:30
error.message.includes("most likely because of a navigation") ||
error.message.includes("Target closed"))
) {
2025-06-23 13:21:46 +05:30
logger.debug("DOM snapshot skipped due to page navigation or closure");
return;
2025-06-23 13:21:46 +05:30
}
logger.error("Failed to create rrweb snapshot:", error);
this.socket.emit("dom-mode-error", {
userId: this.userId,
message: "Failed to create rrweb snapshot",
error: error instanceof Error ? error.message : String(error),
timestamp: Date.now(),
});
}
}
/**
* Emit DOM snapshot to client - following screenshot pattern
*/
private emitRRWebSnapshot(processedSnapshot: ProcessedSnapshot): void {
this.socket.emit("domcast", {
snapshotData: processedSnapshot,
userId: this.userId,
timestamp: Date.now(),
});
}
/**
* Stop DOM streaming - following dom snapshot pattern
2025-06-23 13:21:46 +05:30
*/
private async stopDOM(): Promise<void> {
this.isDOMStreamingActive = false;
if (this.domUpdateInterval) {
clearInterval(this.domUpdateInterval);
this.domUpdateInterval = null;
}
if (this.networkRequestTimeout) {
clearTimeout(this.networkRequestTimeout);
this.networkRequestTimeout = null;
}
this.pendingNetworkRequests = [];
2025-06-23 13:21:46 +05:30
logger.info("DOM streaming stopped successfully");
}
2025-06-30 16:29:07 +05:30
/**rrweb-bundle
* Terminates the dom snapshot session and closes the remote browser.
2024-06-01 10:59:15 +05:30
* If an interpretation was running it will be stopped.
* @returns {Promise<void>}
*/
public async switchOff(): Promise<void> {
this.isDOMStreamingActive = false;
2025-01-06 21:11:11 +05:30
if (this.domUpdateInterval) {
clearInterval(this.domUpdateInterval);
this.domUpdateInterval = null;
}
2025-01-06 21:11:11 +05:30
if (this.memoryCleanupInterval) {
clearInterval(this.memoryCleanupInterval);
this.memoryCleanupInterval = null;
}
2025-01-06 21:11:11 +05:30
if (this.memoryManagementInterval) {
clearInterval(this.memoryManagementInterval);
this.memoryManagementInterval = null;
}
2025-01-06 21:11:11 +05:30
if (this.progressInterval) {
clearInterval(this.progressInterval);
this.progressInterval = null;
}
if (this.snapshotDebounceTimeout) {
clearTimeout(this.snapshotDebounceTimeout);
this.snapshotDebounceTimeout = null;
}
if (this.networkRequestTimeout) {
clearTimeout(this.networkRequestTimeout);
this.networkRequestTimeout = null;
}
this.removeAllSocketListeners();
try {
if (this.currentPage) {
const isClosed = this.currentPage.isClosed();
if (!isClosed) {
this.currentPage.removeAllListeners();
logger.debug('Removed all page event listeners');
} else {
logger.debug('Page already closed, skipping listener removal');
}
}
} catch (error: any) {
logger.warn(`Error removing page listeners: ${error.message}`);
}
2025-01-06 21:11:11 +05:30
// Clean up Generator listeners to prevent memory leaks
if (this.generator) {
try {
this.generator.cleanup();
logger.debug('Generator cleanup completed');
} catch (error: any) {
logger.warn(`Error cleaning up generator: ${error.message}`);
2024-06-01 10:59:15 +05:30
}
}
// Stop interpretation with individual error handling (also calls clearState which removes pausing listeners)
try {
await this.interpreter.stopInterpretation();
} catch (error) {
logger.error("Error stopping interpretation during shutdown:", error);
}
// Stop DOM streaming with individual error handling
try {
await this.stopDOM();
} catch (error) {
logger.error("Error stopping DOM during shutdown:", error);
}
2024-06-01 10:59:15 +05:30
try {
if (this.client && this.currentPage && !this.currentPage.isClosed()) {
const detachPromise = this.client.detach();
const timeoutPromise = new Promise((_, reject) =>
setTimeout(() => reject(new Error('CDP detach timeout')), 5000)
);
await Promise.race([detachPromise, timeoutPromise]);
logger.debug('CDP session detached successfully');
2025-01-06 11:54:48 +05:30
}
} catch (error: any) {
logger.warn(`Error detaching CDP session: ${error.message}`);
} finally {
this.client = null;
}
2025-01-06 11:54:48 +05:30
try {
if (this.currentPage && !this.currentPage.isClosed()) {
const closePromise = this.currentPage.close();
const timeoutPromise = new Promise((_, reject) =>
setTimeout(() => reject(new Error('Page close timeout')), 5000)
);
await Promise.race([closePromise, timeoutPromise]);
logger.debug('Current page closed successfully');
2024-06-01 10:59:49 +05:30
}
} catch (error: any) {
logger.warn(`Error closing current page: ${error.message}`);
} finally {
this.currentPage = null;
}
try {
if (this.context) {
const contextClosePromise = this.context.close();
const timeoutPromise = new Promise((_, reject) =>
setTimeout(() => reject(new Error('Context close timeout')), 5000)
);
await Promise.race([contextClosePromise, timeoutPromise]);
logger.debug('Browser context closed successfully');
}
} catch (error: any) {
logger.warn(`Error closing browser context: ${error.message}`);
} finally {
this.context = null;
}
try {
if (this.browser) {
const browserClosePromise = this.browser.close();
const timeoutPromise = new Promise((_, reject) =>
setTimeout(() => reject(new Error('Browser close timeout')), 5000)
);
await Promise.race([browserClosePromise, timeoutPromise]);
logger.debug('Browser closed successfully');
}
} catch (error: any) {
logger.error("Error during browser close:", error);
} finally {
this.browser = null;
}
}
2024-06-01 10:59:49 +05:30
2024-06-01 11:00:28 +05:30
/**
* Updates the active socket instance.
* This will update all registered events for the socket and
* all the properties using the socket.
* @param socket socket.io socket instance used to communicate with the client side
* @returns void
*/
2024-06-01 11:05:45 +05:30
public updateSocket = (socket: Socket): void => {
2024-06-01 11:00:28 +05:30
this.socket = socket;
this.registerEditorEvents();
this.generator?.updateSocket(socket);
this.interpreter?.updateSocket(socket);
if (this.isDOMStreamingActive) {
this.setupScrollEventListener();
}
2024-06-01 11:00:28 +05:30
};
2024-06-01 11:03:01 +05:30
/**
* Starts the interpretation of the currently generated workflow.
* @returns {Promise<void>}
*/
2024-06-01 11:05:45 +05:30
public interpretCurrentRecording = async (): Promise<void> => {
2024-06-01 11:03:01 +05:30
logger.log('debug', 'Starting interpretation in the editor');
if (this.generator) {
const workflow = this.generator.AddGeneratedFlags(this.generator.getWorkflowFile());
await this.initializeNewPage();
if (this.currentPage) {
2025-03-14 12:40:28 +05:30
// this.currentPage.setViewportSize({ height: 400, width: 900 });
2024-06-01 11:03:01 +05:30
const params = this.generator.getParams();
if (params) {
this.interpreterSettings.params = params.reduce((acc, param) => {
if (this.interpreterSettings.params && Object.keys(this.interpreterSettings.params).includes(param)) {
return { ...acc, [param]: this.interpreterSettings.params[param] };
} else {
return { ...acc, [param]: '', }
}
}, {})
}
logger.log('debug', `Starting interpretation with settings: ${JSON.stringify(this.interpreterSettings, null, 2)}`);
await this.interpreter.interpretRecordingInEditor(
2024-06-01 11:05:45 +05:30
workflow, this.currentPage,
(newPage: Page) => this.currentPage = newPage,
this.interpreterSettings
2024-06-01 11:03:01 +05:30
);
// clear the active index from generator
this.generator.clearLastIndex();
} else {
logger.log('error', 'Could not get a new page, returned undefined');
}
} else {
logger.log('error', 'Generator is not initialized');
}
};
/**
* Returns the current page instance.
* @returns {Page | null | undefined}
*/
2024-06-01 11:05:45 +05:30
public getCurrentPage = (): Page | null | undefined => {
return this.currentPage;
};
/**
* Changes the active page to the page instance on the given index
* available in pages array on the {@link BrowserContext}.
* Automatically stops the screencast session on the previous page and starts the new one.
* @param tabIndex index of the page in the pages array on the {@link BrowserContext}
* @returns {Promise<void>}
*/
2024-06-01 11:05:45 +05:30
private changeTab = async (tabIndex: number): Promise<void> => {
const page = this.currentPage?.context().pages()[tabIndex];
if (page) {
await this.stopDOM();
this.currentPage = page;
await this.setupPageEventListeners(this.currentPage);
2024-12-09 22:05:50 +05:30
2024-10-22 15:23:57 +05:30
//await this.currentPage.setViewportSize({ height: 400, width: 900 })
this.client = await this.currentPage.context().newCDPSession(this.currentPage);
// Include userId in the URL change event
this.socket.emit('urlChanged', {
url: this.currentPage.url(),
userId: this.userId
});
2025-07-06 21:44:19 +05:30
if (this.isDOMStreamingActive) {
await this.makeAndEmitDOMSnapshot();
await this.subscribeToDOM();
}
} else {
logger.log('error', `${tabIndex} index out of range of pages`)
}
}
2024-06-01 11:05:13 +05:30
/**
* Internal method for a new page initialization. Subscribes this page to the screencast.
* @param options optional page options to be used when creating a new page
* @returns {Promise<void>}
*/
2024-06-01 11:05:45 +05:30
private initializeNewPage = async (options?: Object): Promise<void> => {
2024-06-01 11:05:13 +05:30
const newPage = options ? await this.browser?.newPage(options)
2024-06-01 11:05:45 +05:30
: await this.browser?.newPage();
2024-11-03 01:09:53 +05:30
await newPage?.setExtraHTTPHeaders({
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
});
2024-06-01 11:05:13 +05:30
await this.currentPage?.close();
this.currentPage = newPage;
if (this.currentPage) {
await this.setupPageEventListeners(this.currentPage);
2024-12-13 21:21:00 +05:30
await this.subscribeToDOM();
2024-06-01 11:05:13 +05:30
} else {
2024-06-01 11:05:45 +05:30
logger.log('error', 'Could not get a new page, returned undefined');
2024-06-01 11:05:13 +05:30
}
};
2024-06-01 10:55:04 +05:30
}