Files
parcer/server/src/browser-management/classes/RemoteBrowser.ts

2057 lines
76 KiB
TypeScript
Raw Normal View History

2024-06-01 10:55:04 +05:30
import {
Page,
Browser,
CDPSession,
2025-11-30 17:41:44 +05:30
BrowserContext
} from 'playwright-core';
2024-06-01 10:55:04 +05:30
import { Socket } from "socket.io";
2024-09-21 18:51:11 +05:30
import { PlaywrightBlocker } from '@cliqz/adblocker-playwright';
2024-07-20 05:57:15 +05:30
import fetch from 'cross-fetch';
2025-01-06 11:43:02 +05:30
import sharp from 'sharp';
2024-06-01 10:55:04 +05:30
import logger from '../../logger';
2025-04-01 23:25:47 +05:30
import { InterpreterSettings } from "../../types";
2024-06-01 10:55:04 +05:30
import { WorkflowGenerator } from "../../workflow-management/classes/Generator";
import { WorkflowInterpreter } from "../../workflow-management/classes/Interpreter";
import { getDecryptedProxyConfig } from '../../routes/proxy';
2024-12-09 17:57:28 +05:30
import { getInjectableScript } from 'idcac-playwright';
2025-11-30 17:41:44 +05:30
import { connectToRemoteBrowser } from '../browserConnection';
2025-06-23 13:21:46 +05:30
declare global {
interface Window {
rrwebSnapshot?: any;
}
}
interface RRWebSnapshot {
type: number;
childNodes?: RRWebSnapshot[];
tagName?: string;
attributes?: Record<string, string>;
textContent?: string;
id: number;
[key: string]: any;
}
interface ProcessedSnapshot {
snapshot: RRWebSnapshot;
resources: {
stylesheets: Array<{
href: string;
content: string;
media?: string;
}>;
images: Array<{
src: string;
dataUrl: string;
alt?: string;
}>;
fonts: Array<{
url: string;
dataUrl: string;
format?: string;
}>;
scripts: Array<{
src: string;
content: string;
type?: string;
}>;
media: Array<{
src: string;
dataUrl: string;
type: string;
}>;
};
baseUrl: string;
viewport: { width: number; height: number };
timestamp: number;
processingStats: {
discoveredResources: {
images: number;
stylesheets: number;
scripts: number;
fonts: number;
media: number;
};
cachedResources: {
stylesheets: number;
images: number;
fonts: number;
scripts: number;
media: number;
};
};
}
2025-01-06 11:47:56 +05:30
const MEMORY_CONFIG = {
2025-03-23 16:24:41 +05:30
gcInterval: 20000, // Check memory more frequently (20s instead of 60s)
maxHeapSize: 1536 * 1024 * 1024, // 1.5GB
heapUsageThreshold: 0.7 // 70% (reduced threshold to react earlier)
2025-01-06 11:47:56 +05:30
};
2025-06-12 14:30:43 +05:30
const DEFAULT_VIEWPORT = {
width: 1280,
height: 720,
deviceScaleFactor: 1,
mobile: false
};
const SCREENCAST_CONFIG: {
format: "jpeg" | "png";
maxWidth: number;
maxHeight: number;
targetFPS: number;
compressionQuality: number;
maxQueueSize: number;
2025-06-12 14:30:43 +05:30
skipFrameThreshold: number,
enableAdaptiveQuality: boolean,
} = {
2025-06-12 14:30:43 +05:30
format: 'jpeg',
maxWidth: DEFAULT_VIEWPORT.width,
maxHeight: DEFAULT_VIEWPORT.height,
targetFPS: 30,
compressionQuality: 0.8,
maxQueueSize: 2,
skipFrameThreshold: 100,
enableAdaptiveQuality: true,
2025-01-06 11:54:31 +05:30
};
2025-01-06 11:47:56 +05:30
2024-06-01 10:55:04 +05:30
/**
* This class represents a remote browser instance.
* It is used to allow a variety of interaction with the Playwright's browser instance.
* Every remote browser holds an instance of a generator and interpreter classes with
* the purpose of generating and interpreting workflows.
* @category BrowserManagement
*/
export class RemoteBrowser {
/**
* Playwright's [browser](https://playwright.dev/docs/api/class-browser) instance.
* @private
*/
private browser: Browser | null = null;
private context: BrowserContext | null = null;
2024-06-01 10:55:04 +05:30
/**
* The Playwright's [CDPSession](https://playwright.dev/docs/api/class-cdpsession) instance,
* used to talk raw Chrome Devtools Protocol.
* @private
*/
2024-06-01 11:05:45 +05:30
private client: CDPSession | null | undefined = null;
2024-06-01 10:55:04 +05:30
/**
* Socket.io socket instance enabling communication with the client (frontend) side.
* @private
*/
2024-06-01 11:05:45 +05:30
private socket: Socket;
2024-06-01 10:55:04 +05:30
/**
* The Playwright's [Page](https://playwright.dev/docs/api/class-page) instance
* as current interactive remote browser's page.
* @private
*/
2024-06-01 11:05:45 +05:30
private currentPage: Page | null | undefined = null;
2024-06-01 10:55:04 +05:30
/**
* Interpreter settings for any started interpretation.
* @private
*/
private interpreterSettings: InterpreterSettings = {
debug: false,
maxConcurrency: 1,
maxRepeats: 1,
};
2025-01-06 21:11:11 +05:30
/**
* The user ID that owns this browser instance
* @private
*/
private userId: string;
private lastEmittedUrl: string | null = null;
2024-06-01 10:55:04 +05:30
/**
* {@link WorkflowGenerator} instance specific to the remote browser.
*/
public generator: WorkflowGenerator;
/**
* {@link WorkflowInterpreter} instance specific to the remote browser.
*/
public interpreter: WorkflowInterpreter;
2025-01-06 11:43:02 +05:30
private screenshotQueue: Buffer[] = [];
private isProcessingScreenshot = false;
private screencastInterval: NodeJS.Timeout | null = null
2025-03-06 05:06:58 +05:30
private isScreencastActive: boolean = false;
2025-06-23 13:21:46 +05:30
private isDOMStreamingActive: boolean = false;
private domUpdateInterval: NodeJS.Timeout | null = null;
private renderingMode: "screenshot" | "dom" = "screenshot";
private lastScrollPosition = { x: 0, y: 0 };
private scrollThreshold = 200; // pixels
private snapshotDebounceTimeout: NodeJS.Timeout | null = null;
private isScrollTriggeredSnapshot = false;
private networkRequestTimeout: NodeJS.Timeout | null = null;
private pendingNetworkRequests: string[] = [];
private readonly NETWORK_QUIET_PERIOD = 8000;
2025-11-03 22:55:14 +05:30
private readonly INITIAL_LOAD_QUIET_PERIOD = 3000;
private networkWaitStartTime: number = 0;
private progressInterval: NodeJS.Timeout | null = null;
private hasShownInitialLoader: boolean = false;
private isInitialLoadInProgress: boolean = false;
2025-06-23 13:21:46 +05:30
2024-06-01 10:55:04 +05:30
/**
* Initializes a new instances of the {@link Generator} and {@link WorkflowInterpreter} classes and
* assigns the socket instance everywhere.
* @param socket socket.io socket instance used to communicate with the client side
* @constructor
*/
2025-04-12 19:19:08 +05:30
public constructor(socket: Socket, userId: string, poolId: string) {
2024-06-01 10:55:04 +05:30
this.socket = socket;
2025-03-06 05:11:06 +05:30
this.userId = userId;
2024-06-01 10:55:04 +05:30
this.interpreter = new WorkflowInterpreter(socket);
2025-04-12 19:19:08 +05:30
this.generator = new WorkflowGenerator(socket, poolId);
2024-06-01 10:55:04 +05:30
}
2025-06-12 14:30:43 +05:30
private cleanupMemory(): void {
if (this.screenshotQueue.length > 10) {
this.screenshotQueue = this.screenshotQueue.slice(-3); // Keep only last 3
}
}
private setupMemoryCleanup(): void {
setInterval(() => {
this.cleanupMemory();
}, 30000); // Every 30 seconds
}
2025-06-23 13:21:46 +05:30
private async processRRWebSnapshot(
snapshot: RRWebSnapshot
): Promise<ProcessedSnapshot> {
const baseUrl = this.currentPage?.url() || "";
const resources = {
stylesheets: [] as Array<{
href: string;
content: string;
media?: string;
}>,
images: [] as Array<{ src: string; dataUrl: string; alt?: string }>,
fonts: [] as Array<{ url: string; dataUrl: string; format?: string }>,
scripts: [] as Array<{ src: string; content: string; type?: string }>,
media: [] as Array<{ src: string; dataUrl: string; type: string }>,
};
const viewport = (await this.currentPage?.viewportSize()) || {
width: 1280,
height: 720,
};
return {
snapshot,
2025-06-23 13:21:46 +05:30
resources,
baseUrl,
viewport,
timestamp: Date.now(),
processingStats: {
discoveredResources: {
images: resources.images.length,
stylesheets: resources.stylesheets.length,
scripts: resources.scripts.length,
fonts: resources.fonts.length,
media: resources.media.length,
},
cachedResources: {
stylesheets: resources.stylesheets.length,
images: resources.images.length,
fonts: resources.fonts.length,
scripts: resources.scripts.length,
media: resources.media.length,
},
},
};
}
private initializeMemoryManagement(): void {
setInterval(() => {
const memoryUsage = process.memoryUsage();
const heapUsageRatio = memoryUsage.heapUsed / MEMORY_CONFIG.maxHeapSize;
2025-03-23 16:24:41 +05:30
if (heapUsageRatio > MEMORY_CONFIG.heapUsageThreshold * 1.2) {
logger.warn('Critical memory pressure detected, triggering emergency cleanup');
this.performMemoryCleanup();
2025-03-23 16:24:41 +05:30
} else if (heapUsageRatio > MEMORY_CONFIG.heapUsageThreshold) {
logger.warn('High memory usage detected, triggering cleanup');
if (this.screenshotQueue.length > 0) {
this.screenshotQueue = [];
logger.info('Screenshot queue cleared due to memory pressure');
}
if (global.gc && heapUsageRatio > MEMORY_CONFIG.heapUsageThreshold * 1.1) {
global.gc();
}
}
2025-03-23 16:24:41 +05:30
if (this.screenshotQueue.length > SCREENCAST_CONFIG.maxQueueSize) {
this.screenshotQueue = this.screenshotQueue.slice(-SCREENCAST_CONFIG.maxQueueSize);
}
}, MEMORY_CONFIG.gcInterval);
}
2025-01-06 11:47:38 +05:30
private async performMemoryCleanup(): Promise<void> {
this.screenshotQueue = [];
this.isProcessingScreenshot = false;
2025-03-23 16:24:41 +05:30
2025-01-06 11:47:38 +05:30
if (global.gc) {
2025-03-23 16:24:41 +05:30
try {
global.gc();
logger.info('Garbage collection requested');
} catch (error) {
logger.error('Error during garbage collection:', error);
}
2025-01-06 11:47:38 +05:30
}
2025-03-23 16:24:41 +05:30
2025-01-06 11:47:38 +05:30
if (this.client) {
try {
await this.stopScreencast();
2025-03-23 16:24:41 +05:30
await new Promise(resolve => setTimeout(resolve, 500));
2025-01-06 11:47:38 +05:30
this.client = null;
if (this.currentPage) {
this.client = await this.currentPage.context().newCDPSession(this.currentPage);
await this.startScreencast();
2025-03-23 16:24:41 +05:30
logger.info('CDP session reset completed');
2025-01-06 11:47:38 +05:30
}
} catch (error) {
logger.error('Error resetting CDP session:', error);
}
}
2025-03-23 16:24:41 +05:30
this.socket.emit('memory-cleanup', {
userId: this.userId,
timestamp: Date.now()
});
2025-01-06 11:47:38 +05:30
}
/**
* Normalizes URLs to prevent navigation loops while maintaining consistent format
*/
private normalizeUrl(url: string): string {
try {
const parsedUrl = new URL(url);
// Remove trailing slashes except for root path
parsedUrl.pathname = parsedUrl.pathname.replace(/\/+$/, '') || '/';
// Ensure consistent protocol handling
parsedUrl.protocol = parsedUrl.protocol.toLowerCase();
return parsedUrl.toString();
} catch {
return url;
}
2024-12-13 21:21:00 +05:30
}
/**
* Determines if a URL change is significant enough to emit
*/
private shouldEmitUrlChange(newUrl: string): boolean {
if (!this.lastEmittedUrl) {
return true;
}
const normalizedNew = this.normalizeUrl(newUrl);
const normalizedLast = this.normalizeUrl(this.lastEmittedUrl);
return normalizedNew !== normalizedLast;
}
2025-06-23 13:21:46 +05:30
/**
* Setup scroll event listener to track user scrolling
*/
private setupScrollEventListener(): void {
this.socket.on(
"dom:scroll",
async (data: { deltaX: number; deltaY: number }) => {
if (!this.isDOMStreamingActive || !this.currentPage) return;
try {
logger.debug(
`Received scroll event: deltaX=${data.deltaX}, deltaY=${data.deltaY}`
);
await this.currentPage.mouse.wheel(data.deltaX, data.deltaY);
2025-09-16 22:48:15 +05:30
await this.currentPage.waitForLoadState("networkidle", { timeout: 5000 });
2025-06-23 13:21:46 +05:30
const scrollInfo = await this.currentPage.evaluate(() => ({
x: window.scrollX,
y: window.scrollY,
maxX: Math.max(
0,
document.documentElement.scrollWidth - window.innerWidth
),
maxY: Math.max(
0,
document.documentElement.scrollHeight - window.innerHeight
),
documentHeight: document.documentElement.scrollHeight,
viewportHeight: window.innerHeight,
}));
const scrollDelta =
Math.abs(scrollInfo.y - this.lastScrollPosition.y) +
Math.abs(scrollInfo.x - this.lastScrollPosition.x);
logger.debug(
`Scroll delta: ${scrollDelta}, threshold: ${this.scrollThreshold}`
);
if (scrollDelta > this.scrollThreshold) {
this.lastScrollPosition = { x: scrollInfo.x, y: scrollInfo.y };
this.isScrollTriggeredSnapshot = true;
if (this.snapshotDebounceTimeout) {
clearTimeout(this.snapshotDebounceTimeout);
}
this.snapshotDebounceTimeout = setTimeout(async () => {
logger.info(
`Triggering snapshot due to scroll. Position: ${scrollInfo.y}/${scrollInfo.maxY}`
);
await this.makeAndEmitDOMSnapshot();
}, 300);
}
} catch (error) {
logger.error("Error handling scroll event:", error);
}
}
);
}
private setupPageChangeListeners(): void {
if (!this.currentPage) return;
this.currentPage.on("domcontentloaded", async () => {
2025-11-03 22:55:14 +05:30
if (!this.isInitialLoadInProgress) {
logger.info("DOM content loaded - triggering snapshot");
await this.makeAndEmitDOMSnapshot();
}
2025-06-23 13:21:46 +05:30
});
this.currentPage.on("response", async (response) => {
const url = response.url();
2025-11-03 22:55:14 +05:30
const isDocumentRequest = response.request().resourceType() === "document";
if (!this.hasShownInitialLoader && isDocumentRequest && !url.includes("about:blank")) {
this.hasShownInitialLoader = true;
this.isInitialLoadInProgress = true;
this.pendingNetworkRequests.push(url);
if (this.networkRequestTimeout) {
clearTimeout(this.networkRequestTimeout);
this.networkRequestTimeout = null;
}
2025-11-03 22:55:14 +05:30
if (this.progressInterval) {
clearInterval(this.progressInterval);
this.progressInterval = null;
}
this.networkWaitStartTime = Date.now();
this.progressInterval = setInterval(() => {
const elapsed = Date.now() - this.networkWaitStartTime;
const navigationProgress = Math.min((elapsed / this.INITIAL_LOAD_QUIET_PERIOD) * 40, 35);
const totalProgress = 60 + navigationProgress;
this.emitLoadingProgress(totalProgress, this.pendingNetworkRequests.length);
}, 500);
logger.debug(
2025-11-03 22:55:14 +05:30
`Initial load network request received: ${url}. Using ${this.INITIAL_LOAD_QUIET_PERIOD}ms quiet period`
);
this.networkRequestTimeout = setTimeout(async () => {
logger.info(
2025-11-03 22:55:14 +05:30
`Initial load network quiet period reached (${this.INITIAL_LOAD_QUIET_PERIOD}ms)`
);
2025-11-03 22:55:14 +05:30
if (this.progressInterval) {
clearInterval(this.progressInterval);
this.progressInterval = null;
}
this.emitLoadingProgress(100, this.pendingNetworkRequests.length);
this.pendingNetworkRequests = [];
this.networkRequestTimeout = null;
2025-11-03 22:55:14 +05:30
this.isInitialLoadInProgress = false;
2025-06-23 13:21:46 +05:30
await this.makeAndEmitDOMSnapshot();
2025-11-03 22:55:14 +05:30
}, this.INITIAL_LOAD_QUIET_PERIOD);
2025-06-23 13:21:46 +05:30
}
});
}
2025-11-03 22:55:14 +05:30
private emitLoadingProgress(progress: number, pendingRequests: number): void {
this.socket.emit("domLoadingProgress", {
progress: Math.round(progress),
pendingRequests,
userId: this.userId,
timestamp: Date.now(),
});
}
private async setupPageEventListeners(page: Page) {
page.on('framenavigated', async (frame) => {
if (frame === page.mainFrame()) {
const currentUrl = page.url();
if (this.shouldEmitUrlChange(currentUrl)) {
this.lastEmittedUrl = currentUrl;
this.socket.emit('urlChanged', {url: currentUrl, userId: this.userId});
}
}
});
// Handle page load events with retry mechanism
2024-12-13 21:21:00 +05:30
page.on('load', async () => {
const injectScript = async (): Promise<boolean> => {
try {
await page.waitForLoadState('networkidle', { timeout: 5000 });
2024-12-13 21:21:00 +05:30
await page.evaluate(getInjectableScript());
return true;
} catch (error: any) {
logger.log('warn', `Script injection attempt failed: ${error.message}`);
return false;
}
};
const success = await injectScript();
console.log("Script injection result:", success);
});
}
2024-12-14 22:30:50 +05:30
private getUserAgent() {
const userAgents = [
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.5845.140 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:117.0) Gecko/20100101 Firefox/117.0',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.1938.81 Safari/537.36 Edg/116.0.1938.81',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.5845.96 Safari/537.36 OPR/101.0.4843.25',
'Mozilla/5.0 (Windows NT 11.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.5938.62 Safari/537.36',
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:118.0) Gecko/20100101 Firefox/118.0',
];
2025-01-06 19:34:13 +05:30
2024-12-14 22:30:50 +05:30
return userAgents[Math.floor(Math.random() * userAgents.length)];
}
/**
* An asynchronous constructor for asynchronously initialized properties.
* Must be called right after creating an instance of RemoteBrowser class.
* @param options remote browser options to be used when launching the browser
* @returns {Promise<void>}
*/
2024-11-03 02:59:30 +05:30
public initialize = async (userId: string): Promise<void> => {
const MAX_RETRIES = 3;
let retryCount = 0;
let success = false;
2025-11-03 22:55:14 +05:30
this.socket.emit("dom-snapshot-loading", {
userId: this.userId,
timestamp: Date.now(),
});
this.emitLoadingProgress(0, 0);
while (!success && retryCount < MAX_RETRIES) {
try {
2025-11-30 17:41:44 +05:30
this.browser = await connectToRemoteBrowser();
if (!this.browser || this.browser.isConnected() === false) {
throw new Error('Browser failed to launch or is not connected');
}
2025-11-03 22:55:14 +05:30
this.emitLoadingProgress(20, 0);
const proxyConfig = await getDecryptedProxyConfig(userId);
let proxyOptions: { server: string, username?: string, password?: string } = { server: '' };
if (proxyConfig.proxy_url) {
proxyOptions = {
server: proxyConfig.proxy_url,
...(proxyConfig.proxy_username && proxyConfig.proxy_password && {
username: proxyConfig.proxy_username,
password: proxyConfig.proxy_password,
}),
};
}
const contextOptions: any = {
// viewport: { height: 400, width: 900 },
// recordVideo: { dir: 'videos/' }
// Force reduced motion to prevent animation issues
reducedMotion: 'reduce',
// Force JavaScript to be enabled
javaScriptEnabled: true,
// Set a reasonable timeout
timeout: 50000,
// Disable hardware acceleration
forcedColors: 'none',
isMobile: false,
hasTouch: false,
userAgent: this.getUserAgent(),
};
if (proxyOptions.server) {
contextOptions.proxy = {
server: proxyOptions.server,
username: proxyOptions.username ? proxyOptions.username : undefined,
password: proxyOptions.password ? proxyOptions.password : undefined,
};
}
await new Promise(resolve => setTimeout(resolve, 500));
const contextPromise = this.browser.newContext(contextOptions);
this.context = await Promise.race([
contextPromise,
new Promise<never>((_, reject) => {
setTimeout(() => reject(new Error('Context creation timed out after 15s')), 15000);
})
]) as BrowserContext;
await this.context.addInitScript(
`const defaultGetter = Object.getOwnPropertyDescriptor(
Navigator.prototype,
"webdriver"
).get;
defaultGetter.apply(navigator);
defaultGetter.toString();
Object.defineProperty(Navigator.prototype, "webdriver", {
set: undefined,
enumerable: true,
configurable: true,
get: new Proxy(defaultGetter, {
apply: (target, thisArg, args) => {
Reflect.apply(target, thisArg, args);
return false;
},
}),
});
const patchedGetter = Object.getOwnPropertyDescriptor(
Navigator.prototype,
"webdriver"
).get;
patchedGetter.apply(navigator);
patchedGetter.toString();`
);
await this.context.addInitScript({ path: './server/src/browser-management/classes/rrweb-bundle.js' });
this.currentPage = await this.context.newPage();
2025-06-23 13:21:46 +05:30
2025-11-03 22:55:14 +05:30
this.emitLoadingProgress(40, 0);
await this.setupPageEventListeners(this.currentPage);
const viewportSize = await this.currentPage.viewportSize();
if (viewportSize) {
this.socket.emit('viewportInfo', {
width: viewportSize.width,
height: viewportSize.height,
userId: this.userId
});
}
try {
const blocker = await PlaywrightBlocker.fromLists(fetch, ['https://easylist.to/easylist/easylist.txt']);
2025-11-30 17:41:44 +05:30
await blocker.enableBlockingInPage(this.currentPage as any);
this.client = await this.currentPage.context().newCDPSession(this.currentPage);
2025-11-30 17:41:44 +05:30
await blocker.disableBlockingInPage(this.currentPage as any);
console.log('Adblocker initialized');
} catch (error: any) {
console.warn('Failed to initialize adblocker, continuing without it:', error.message);
// Still need to set up the CDP session even if blocker fails
this.client = await this.currentPage.context().newCDPSession(this.currentPage);
}
2025-11-03 22:55:14 +05:30
this.emitLoadingProgress(60, 0);
success = true;
logger.log('debug', `Browser initialized successfully for user ${userId}`);
} catch (error: any) {
retryCount++;
logger.log('error', `Browser initialization failed (attempt ${retryCount}/${MAX_RETRIES}): ${error.message}`);
if (this.browser) {
try {
await this.browser.close();
} catch (closeError) {
logger.log('warn', `Failed to close browser during cleanup: ${closeError}`);
}
this.browser = null;
}
if (retryCount >= MAX_RETRIES) {
throw new Error(`Failed to initialize browser after ${MAX_RETRIES} attempts: ${error.message}`);
}
await new Promise(resolve => setTimeout(resolve, 1000));
}
2025-01-06 19:34:13 +05:30
}
2025-03-23 16:24:41 +05:30
2025-06-12 14:30:43 +05:30
this.setupMemoryCleanup();
2025-03-29 20:04:03 +05:30
// this.initializeMemoryManagement();
};
2024-06-01 10:57:32 +05:30
2025-03-14 12:40:28 +05:30
public updateViewportInfo = async (): Promise<void> => {
if (this.currentPage) {
const viewportSize = await this.currentPage.viewportSize();
if (viewportSize) {
this.socket.emit('viewportInfo', {
width: viewportSize.width,
height: viewportSize.height,
userId: this.userId
});
}
}
};
/**
* Extract data from a list of elements on a page
* @param page - Playwright Page object
* @param listSelector - CSS selector for the list container
* @param fields - Record of field configurations
* @param limit - Maximum number of items to extract (default: 5)
* @returns Promise<Array<Record<string, string>>> - Array of extracted data objects
*/
private async extractListData(
page: Page,
listSelector: string,
fields: Record<string, {
label: string;
selectorObj: {
selector: string;
attribute: string;
};
}>,
limit: number = 5
): Promise<Array<Record<string, string>>> {
if (page.isClosed()) {
logger.warn("Page is closed, cannot extract list data");
return [];
}
return await page.evaluate(
async ({ listSelector, fields, limit }: {
listSelector: string;
fields: Record<string, {
label: string;
selectorObj: {
selector: string;
attribute: string;
};
}>;
limit: number;
}) => {
const convertedFields: Record<string, {
selector: string;
attribute: string;
}> = {};
for (const [key, field] of Object.entries(fields)) {
convertedFields[field.label] = {
selector: field.selectorObj.selector,
attribute: field.selectorObj.attribute
};
}
const queryElement = (rootElement: Element | Document, selector: string): Element | null => {
if (!selector.includes('>>') && !selector.includes(':>>')) {
return rootElement.querySelector(selector);
}
const parts = selector.split(/(?:>>|:>>)/).map(part => part.trim());
let currentElement: Element | Document | null = rootElement;
for (let i = 0; i < parts.length; i++) {
if (!currentElement) return null;
if ((currentElement as Element).tagName === 'IFRAME' || (currentElement as Element).tagName === 'FRAME') {
try {
const frameElement = currentElement as HTMLIFrameElement | HTMLFrameElement;
const frameDoc = frameElement.contentDocument || frameElement.contentWindow?.document;
if (!frameDoc) return null;
currentElement = frameDoc.querySelector(parts[i]);
continue;
} catch (e) {
console.warn(`Cannot access ${(currentElement as Element).tagName.toLowerCase()} content:`, e);
return null;
}
}
let nextElement: Element | null = null;
if ('querySelector' in currentElement) {
nextElement = currentElement.querySelector(parts[i]);
}
if (!nextElement && 'shadowRoot' in currentElement && (currentElement as Element).shadowRoot) {
nextElement = (currentElement as Element).shadowRoot!.querySelector(parts[i]);
}
if (!nextElement && 'children' in currentElement) {
const children: any = Array.from((currentElement as Element).children || []);
for (const child of children) {
if (child.shadowRoot) {
nextElement = child.shadowRoot.querySelector(parts[i]);
if (nextElement) break;
}
}
}
currentElement = nextElement;
}
return currentElement as Element | null;
};
const queryElementAll = (rootElement: Element | Document, selector: string): Element[] => {
if (!selector.includes('>>') && !selector.includes(':>>')) {
return Array.from(rootElement.querySelectorAll(selector));
}
const parts = selector.split(/(?:>>|:>>)/).map(part => part.trim());
let currentElements: (Element | Document)[] = [rootElement];
for (const part of parts) {
const nextElements: Element[] = [];
for (const element of currentElements) {
if ((element as Element).tagName === 'IFRAME' || (element as Element).tagName === 'FRAME') {
try {
const frameElement = element as HTMLIFrameElement | HTMLFrameElement;
const frameDoc = frameElement.contentDocument || frameElement.contentWindow?.document;
if (frameDoc) {
nextElements.push(...Array.from(frameDoc.querySelectorAll(part)));
}
} catch (e) {
console.warn(`Cannot access ${(element as Element).tagName.toLowerCase()} content:`, e);
continue;
}
} else {
if ('querySelectorAll' in element) {
nextElements.push(...Array.from(element.querySelectorAll(part)));
}
if ('shadowRoot' in element && (element as Element).shadowRoot) {
nextElements.push(...Array.from((element as Element).shadowRoot!.querySelectorAll(part)));
}
if ('children' in element) {
const children = Array.from((element as Element).children || []);
for (const child of children) {
if (child.shadowRoot) {
nextElements.push(...Array.from(child.shadowRoot.querySelectorAll(part)));
}
}
}
}
}
currentElements = nextElements;
}
return currentElements as Element[];
};
function extractValue(element: Element, attribute: string): string | null {
if (!element) return null;
const baseURL = element.ownerDocument?.location?.href || window.location.origin;
if (element.shadowRoot) {
const shadowContent = element.shadowRoot.textContent;
if (shadowContent?.trim()) {
return shadowContent.trim();
}
}
if (attribute === 'innerText') {
return (element as HTMLElement).innerText.trim();
} else if (attribute === 'innerHTML') {
return element.innerHTML.trim();
} else if (attribute === 'src' || attribute === 'href') {
if (attribute === 'href' && element.tagName !== 'A') {
const parentElement = element.parentElement;
if (parentElement && parentElement.tagName === 'A') {
const parentHref = parentElement.getAttribute('href');
if (parentHref) {
try {
return new URL(parentHref, baseURL).href;
} catch (e) {
return parentHref;
}
}
}
}
const attrValue = element.getAttribute(attribute);
const dataAttr = attrValue || element.getAttribute('data-' + attribute);
if (!dataAttr || dataAttr.trim() === '') {
if (attribute === 'src') {
const style = window.getComputedStyle(element);
const bgImage = style.backgroundImage;
if (bgImage && bgImage !== 'none') {
const matches = bgImage.match(/url\(['"]?([^'")]+)['"]?\)/);
return matches ? new URL(matches[1], baseURL).href : null;
}
}
return null;
}
try {
return new URL(dataAttr, baseURL).href;
} catch (e) {
console.warn('Error creating URL from', dataAttr, e);
return dataAttr; // Return the original value if URL construction fails
}
}
return element.getAttribute(attribute);
}
function findTableAncestor(element: Element): { type: string; element: Element } | null {
let currentElement: Element | null = element;
const MAX_DEPTH = 5;
let depth = 0;
while (currentElement && depth < MAX_DEPTH) {
if (currentElement.getRootNode() instanceof ShadowRoot) {
currentElement = (currentElement.getRootNode() as ShadowRoot).host;
continue;
}
if (currentElement.tagName === 'TD') {
return { type: 'TD', element: currentElement };
} else if (currentElement.tagName === 'TR') {
return { type: 'TR', element: currentElement };
}
if (currentElement.tagName === 'IFRAME' || currentElement.tagName === 'FRAME') {
try {
const frameElement = currentElement as HTMLIFrameElement | HTMLFrameElement;
currentElement = frameElement.contentDocument?.body || null;
} catch (e) {
return null;
}
} else {
currentElement = currentElement.parentElement;
}
depth++;
}
return null;
}
function getCellIndex(td: Element): number {
if (td.getRootNode() instanceof ShadowRoot) {
const shadowRoot = td.getRootNode() as ShadowRoot;
const allCells = Array.from(shadowRoot.querySelectorAll('td'));
return allCells.indexOf(td as HTMLTableCellElement);
}
let index = 0;
let sibling = td;
while (sibling = sibling.previousElementSibling as Element) {
index++;
}
return index;
}
function hasThElement(row: Element, tableFields: Record<string, { selector: string; attribute: string }>): boolean {
for (const [_, { selector }] of Object.entries(tableFields)) {
const element = queryElement(row, selector);
if (element) {
let current: Element | ShadowRoot | Document | null = element;
while (current && current !== row) {
if (current.getRootNode() instanceof ShadowRoot) {
current = (current.getRootNode() as ShadowRoot).host;
continue;
}
if ((current as Element).tagName === 'TH') return true;
if ((current as Element).tagName === 'IFRAME' || (current as Element).tagName === 'FRAME') {
try {
const frameElement = current as HTMLIFrameElement | HTMLFrameElement;
current = frameElement.contentDocument?.body || null;
} catch (e) {
break;
}
} else {
current = (current as Element).parentElement;
}
}
}
}
return false;
}
function filterRowsBasedOnTag(rows: Element[], tableFields: Record<string, { selector: string; attribute: string }>): Element[] {
for (const row of rows) {
if (hasThElement(row, tableFields)) {
return rows;
}
}
return rows.filter(row => {
const directTH = row.getElementsByTagName('TH').length === 0;
const shadowTH = row.shadowRoot ?
row.shadowRoot.querySelector('th') === null : true;
return directTH && shadowTH;
});
}
function calculateClassSimilarity(classList1: string[], classList2: string[]): number {
const set1 = new Set(classList1);
const set2 = new Set(classList2);
const intersection = new Set([...set1].filter(x => set2.has(x)));
const union = new Set([...set1, ...set2]);
return intersection.size / union.size;
}
function findSimilarElements(baseElement: Element, similarityThreshold: number = 0.7): Element[] {
const baseClasses = Array.from(baseElement.classList);
if (baseClasses.length === 0) return [];
const allElements: Element[] = [];
allElements.push(...Array.from(document.getElementsByTagName(baseElement.tagName)));
if (baseElement.getRootNode() instanceof ShadowRoot) {
const shadowHost = (baseElement.getRootNode() as ShadowRoot).host;
allElements.push(...Array.from(shadowHost.getElementsByTagName(baseElement.tagName)));
}
const frames = [
...Array.from(document.getElementsByTagName('iframe')),
...Array.from(document.getElementsByTagName('frame'))
];
for (const frame of frames) {
try {
const frameElement = frame as HTMLIFrameElement | HTMLFrameElement;
const frameDoc = frameElement.contentDocument || frameElement.contentWindow?.document;
if (frameDoc) {
allElements.push(...Array.from(frameDoc.getElementsByTagName(baseElement.tagName)));
}
} catch (e) {
console.warn(`Cannot access ${frame.tagName.toLowerCase()} content:`, e);
}
}
return allElements.filter(element => {
if (element === baseElement) return false;
const similarity = calculateClassSimilarity(
baseClasses,
Array.from(element.classList)
);
return similarity >= similarityThreshold;
});
}
let containers = queryElementAll(document, listSelector);
if (containers.length === 0) return [];
if (limit > 1 && containers.length === 1) {
const baseContainer = containers[0];
const similarContainers = findSimilarElements(baseContainer);
if (similarContainers.length > 0) {
const newContainers = similarContainers.filter(container =>
!container.matches(listSelector)
);
containers = [...containers, ...newContainers];
}
}
const containerFields = containers.map(() => ({
tableFields: {} as Record<string, {
selector: string;
attribute: string;
tableContext?: string;
cellIndex?: number;
}>,
nonTableFields: {} as Record<string, {
selector: string;
attribute: string;
}>
}));
containers.forEach((container, containerIndex) => {
for (const [label, field] of Object.entries(convertedFields)) {
const sampleElement = queryElement(container, field.selector);
if (sampleElement) {
const ancestor = findTableAncestor(sampleElement);
if (ancestor) {
containerFields[containerIndex].tableFields[label] = {
...field,
tableContext: ancestor.type,
cellIndex: ancestor.type === 'TD' ? getCellIndex(ancestor.element) : -1
};
} else {
containerFields[containerIndex].nonTableFields[label] = field;
}
} else {
containerFields[containerIndex].nonTableFields[label] = field;
}
}
});
const tableData: Array<Record<string, string>> = [];
const nonTableData: Array<Record<string, string>> = [];
for (let containerIndex = 0; containerIndex < containers.length; containerIndex++) {
const container = containers[containerIndex];
const { tableFields } = containerFields[containerIndex];
if (Object.keys(tableFields).length > 0) {
const firstField = Object.values(tableFields)[0];
const firstElement = queryElement(container, firstField.selector);
let tableContext: Element | null = firstElement;
while (tableContext && tableContext.tagName !== 'TABLE' && tableContext !== container) {
if (tableContext.getRootNode() instanceof ShadowRoot) {
tableContext = (tableContext.getRootNode() as ShadowRoot).host;
continue;
}
if (tableContext.tagName === 'IFRAME' || tableContext.tagName === 'FRAME') {
try {
const frameElement = tableContext as HTMLIFrameElement | HTMLFrameElement;
tableContext = frameElement.contentDocument?.body || null;
} catch (e) {
break;
}
} else {
tableContext = tableContext.parentElement;
}
}
if (tableContext) {
const rows: Element[] = [];
rows.push(...Array.from(tableContext.getElementsByTagName('TR')));
if (tableContext.tagName === 'IFRAME' || tableContext.tagName === 'FRAME') {
try {
const frameElement = tableContext as HTMLIFrameElement | HTMLFrameElement;
const frameDoc = frameElement.contentDocument || frameElement.contentWindow?.document;
if (frameDoc) {
rows.push(...Array.from(frameDoc.getElementsByTagName('TR')));
}
} catch (e) {
console.warn(`Cannot access ${tableContext.tagName.toLowerCase()} rows:`, e);
}
}
const processedRows = filterRowsBasedOnTag(rows, tableFields);
for (let rowIndex = 0; rowIndex < Math.min(processedRows.length, limit); rowIndex++) {
const record: Record<string, string> = {};
const currentRow = processedRows[rowIndex];
for (const [label, { selector, attribute, cellIndex }] of Object.entries(tableFields)) {
let element: Element | null = null;
if (cellIndex !== undefined && cellIndex >= 0) {
let td: Element | null = currentRow.children[cellIndex] || null;
if (!td && currentRow.shadowRoot) {
const shadowCells = currentRow.shadowRoot.children;
if (shadowCells && shadowCells.length > cellIndex) {
td = shadowCells[cellIndex];
}
}
if (td) {
element = queryElement(td, selector);
if (!element && selector.split(/(?:>>|:>>)/).pop()?.includes('td:nth-child')) {
element = td;
}
if (!element) {
const tagOnlySelector = selector.split('.')[0];
element = queryElement(td, tagOnlySelector);
}
if (!element) {
let currentElement: Element | null = td;
while (currentElement && currentElement.children.length > 0) {
let foundContentChild = false;
for (const child of Array.from(currentElement.children)) {
if (extractValue(child, attribute)) {
currentElement = child;
foundContentChild = true;
break;
}
}
if (!foundContentChild) break;
}
element = currentElement;
}
}
} else {
element = queryElement(currentRow, selector);
}
if (element) {
const value = extractValue(element, attribute);
if (value !== null) {
record[label] = value;
}
}
}
if (Object.keys(record).length > 0) {
tableData.push(record);
}
}
}
}
}
for (let containerIndex = 0; containerIndex < containers.length; containerIndex++) {
if (nonTableData.length >= limit) break;
const container = containers[containerIndex];
const { nonTableFields } = containerFields[containerIndex];
if (Object.keys(nonTableFields).length > 0) {
const record: Record<string, string> = {};
for (const [label, { selector, attribute }] of Object.entries(nonTableFields)) {
const relativeSelector = selector.split(/(?:>>|:>>)/).slice(-1)[0];
const element = queryElement(container, relativeSelector);
if (element) {
const value = extractValue(element, attribute);
if (value !== null) {
record[label] = value;
}
}
}
if (Object.keys(record).length > 0) {
nonTableData.push(record);
}
}
}
const scrapedData = [...tableData, ...nonTableData].slice(0, limit);
return scrapedData;
},
{ listSelector, fields, limit }
) as Array<Record<string, string>>;
}
/**
* Captures a screenshot directly without running the workflow interpreter
* @param settings Screenshot settings containing fullPage, type, etc.
* @returns Promise<void>
*/
public captureDirectScreenshot = async (settings: {
fullPage: boolean;
type: 'png' | 'jpeg';
timeout?: number;
animations?: 'disabled' | 'allow';
caret?: 'hide' | 'initial';
scale?: 'css' | 'device';
}): Promise<void> => {
if (!this.currentPage) {
logger.error("No current page available for screenshot");
this.socket.emit('screenshotError', {
userId: this.userId,
error: 'No active page available'
});
return;
}
try {
this.socket.emit('screenshotCaptureStarted', {
userId: this.userId,
fullPage: settings.fullPage
});
const screenshotBuffer = await this.currentPage.screenshot({
fullPage: settings.fullPage,
type: settings.type || 'png',
timeout: settings.timeout || 30000,
animations: settings.animations || 'allow',
caret: settings.caret || 'hide',
scale: settings.scale || 'device'
});
const base64Data = screenshotBuffer.toString('base64');
const mimeType = `image/${settings.type || 'png'}`;
const dataUrl = `data:${mimeType};base64,${base64Data}`;
this.socket.emit('directScreenshotCaptured', {
userId: this.userId,
screenshot: dataUrl,
mimeType: mimeType,
fullPage: settings.fullPage,
timestamp: Date.now()
});
} catch (error) {
logger.error('Failed to capture direct screenshot:', error);
this.socket.emit('screenshotError', {
userId: this.userId,
error: error instanceof Error ? error.message : 'Unknown error occurred'
});
}
};
2024-06-01 10:57:32 +05:30
/**
* Registers all event listeners needed for the recording editor session.
* Should be called only once after the full initialization of the remote browser.
* @returns void
*/
2024-06-01 11:05:45 +05:30
public registerEditorEvents = (): void => {
2025-07-06 21:44:19 +05:30
// For each event, include userId to make sure events are handled for the correct browser
logger.log("debug", `Registering editor events for user: ${this.userId}`);
2025-07-06 21:44:19 +05:30
this.socket.on(
`captureDirectScreenshot:${this.userId}`,
async (settings) => {
logger.debug(
`Direct screenshot capture requested for user ${this.userId}`
);
await this.captureDirectScreenshot(settings);
2025-07-06 21:44:19 +05:30
}
);
2025-07-06 21:44:19 +05:30
// For backward compatibility
this.socket.on("captureDirectScreenshot", async (settings) => {
await this.captureDirectScreenshot(settings);
});
// Listen for specific events for this user
this.socket.on(`rerender:${this.userId}`, async () => {
logger.debug(`Rerender event received for user ${this.userId}`);
if (this.renderingMode === "dom") {
await this.makeAndEmitDOMSnapshot();
} else {
await this.makeAndEmitScreenshot();
}
});
this.socket.on("rerender", async () => {
logger.debug(
`General rerender event received, checking if for user ${this.userId}`
);
if (this.renderingMode === "dom") {
await this.makeAndEmitDOMSnapshot();
} else {
await this.makeAndEmitScreenshot();
}
});
this.socket.on(`settings:${this.userId}`, (settings) => {
this.interpreterSettings = settings;
logger.debug(`Settings updated for user ${this.userId}`);
});
this.socket.on(`changeTab:${this.userId}`, async (tabIndex) => {
logger.debug(
`Tab change to ${tabIndex} requested for user ${this.userId}`
);
await this.changeTab(tabIndex);
});
this.socket.on(`addTab:${this.userId}`, async () => {
logger.debug(`New tab requested for user ${this.userId}`);
await this.currentPage?.context().newPage();
const lastTabIndex = this.currentPage
? this.currentPage.context().pages().length - 1
: 0;
await this.changeTab(lastTabIndex);
});
this.socket.on(`closeTab:${this.userId}`, async (tabInfo) => {
logger.debug(
`Close tab ${tabInfo.index} requested for user ${this.userId}`
);
const page = this.currentPage?.context().pages()[tabInfo.index];
if (page) {
if (tabInfo.isCurrent) {
if (this.currentPage?.context().pages()[tabInfo.index + 1]) {
// next tab
await this.changeTab(tabInfo.index + 1);
2024-06-01 10:57:32 +05:30
} else {
2025-07-06 21:44:19 +05:30
//previous tab
await this.changeTab(tabInfo.index - 1);
}
2025-07-06 21:44:19 +05:30
}
await page.close();
logger.log(
"debug",
`Tab ${tabInfo.index} was closed for user ${
this.userId
}, new tab count: ${this.currentPage?.context().pages().length}`
);
} else {
logger.log(
"error",
`Tab index ${tabInfo.index} out of range for user ${this.userId}`
);
}
});
this.socket.on(
`setViewportSize:${this.userId}`,
async (data: { width: number; height: number }) => {
const { width, height } = data;
logger.log(
"debug",
`Viewport size change to width=${width}, height=${height} requested for user ${this.userId}`
);
// Update the browser context's viewport dynamically
if (this.context && this.browser) {
this.context = await this.browser.newContext({
viewport: { width, height },
});
logger.log(
"debug",
`Viewport size updated to width=${width}, height=${height} for user ${this.userId}`
);
}
}
);
// For backward compatibility, also register the standard events
this.socket.on(
"settings",
(settings) => (this.interpreterSettings = settings)
);
this.socket.on(
"changeTab",
async (tabIndex) => await this.changeTab(tabIndex)
);
this.socket.on("addTab", async () => {
await this.currentPage?.context().newPage();
const lastTabIndex = this.currentPage
? this.currentPage.context().pages().length - 1
: 0;
await this.changeTab(lastTabIndex);
});
this.socket.on("closeTab", async (tabInfo) => {
const page = this.currentPage?.context().pages()[tabInfo.index];
if (page) {
if (tabInfo.isCurrent) {
if (this.currentPage?.context().pages()[tabInfo.index + 1]) {
await this.changeTab(tabInfo.index + 1);
} else {
await this.changeTab(tabInfo.index - 1);
}
2025-07-06 21:44:19 +05:30
}
await page.close();
}
});
this.socket.on(
"setViewportSize",
async (data: { width: number; height: number }) => {
const { width, height } = data;
if (this.context && this.browser) {
this.context = await this.browser.newContext({
viewport: { width, height },
});
}
}
);
2025-07-06 21:44:19 +05:30
this.socket.on(
"extractListData",
async (data: {
listSelector: string;
fields: Record<string, any>;
currentListId: number;
pagination: any;
}) => {
2025-07-06 21:44:19 +05:30
if (this.currentPage) {
const extractedData = await this.extractListData(
this.currentPage,
data.listSelector,
data.fields
);
this.socket.emit("listDataExtracted", {
currentListId: data.currentListId,
data: extractedData,
});
}
}
);
};
2024-06-01 10:58:23 +05:30
/**
* Subscribes the remote browser for a screencast session
* on [CDP](https://chromedevtools.github.io/devtools-protocol/) level,
* where screenshot is being sent through the socket
* every time the browser's active page updates.
* @returns {Promise<void>}
*/
2024-06-01 11:05:45 +05:30
public subscribeToScreencast = async (): Promise<void> => {
logger.log('debug', `Starting screencast for user: ${this.userId}`);
2024-06-01 10:58:23 +05:30
await this.startScreencast();
if (!this.client) {
2024-06-01 11:05:45 +05:30
logger.log('warn', 'client is not initialized');
2024-06-01 10:58:23 +05:30
return;
}
// Set flag to indicate screencast is active
this.isScreencastActive = true;
2025-03-14 12:40:28 +05:30
await this.updateViewportInfo();
2024-06-01 10:58:23 +05:30
this.client.on('Page.screencastFrame', ({ data: base64, sessionId }) => {
// Only process if screencast is still active for this user
if (!this.isScreencastActive) {
return;
}
2025-01-06 11:55:45 +05:30
this.emitScreenshot(Buffer.from(base64, 'base64'))
2024-06-01 10:58:23 +05:30
setTimeout(async () => {
try {
if (!this.client || !this.isScreencastActive) {
2024-06-01 11:05:45 +05:30
logger.log('warn', 'client is not initialized');
2024-06-01 10:58:23 +05:30
return;
}
await this.client.send('Page.screencastFrameAck', { sessionId: sessionId });
2024-11-29 22:10:48 +05:30
} catch (e: any) {
logger.log('error', `Screencast error: ${e}`);
2024-06-01 10:58:23 +05:30
}
}, 100);
});
};
2024-06-01 10:59:15 +05:30
2025-06-23 13:21:46 +05:30
/**
* Subscribe to DOM streaming - simplified version following screenshot pattern
*/
public async subscribeToDOM(): Promise<void> {
if (!this.client) {
logger.warn("DOM streaming requires scraping browser with CDP client");
return;
}
try {
// Enable required CDP domains
await this.client.send("DOM.enable");
await this.client.send("CSS.enable");
this.isDOMStreamingActive = true;
logger.info("DOM streaming started successfully");
this.setupScrollEventListener();
this.setupPageChangeListeners();
} catch (error) {
logger.error("Failed to start DOM streaming:", error);
this.isDOMStreamingActive = false;
}
}
/**
* CDP-based DOM snapshot creation using captured network resources
*/
public async makeAndEmitDOMSnapshot(): Promise<void> {
if (!this.currentPage || !this.isDOMStreamingActive) {
2025-06-23 13:21:46 +05:30
return;
}
try {
// Check if page is still valid and not closed
if (this.currentPage.isClosed()) {
logger.debug("Skipping DOM snapshot - page is closed");
return;
}
// Double-check page state after network wait
if (this.currentPage.isClosed()) {
logger.debug("Skipping DOM snapshot - page closed during network wait");
return;
}
// Get current scroll position
const currentScrollInfo = await this.currentPage.evaluate(() => ({
x: window.scrollX,
y: window.scrollY,
maxX: Math.max(
0,
document.documentElement.scrollWidth - window.innerWidth
),
maxY: Math.max(
0,
document.documentElement.scrollHeight - window.innerHeight
),
documentHeight: document.documentElement.scrollHeight,
}));
logger.info(
`Creating rrweb snapshot at scroll position: ${currentScrollInfo.y}/${currentScrollInfo.maxY}`
);
// Update our tracked scroll position
this.lastScrollPosition = {
x: currentScrollInfo.x,
y: currentScrollInfo.y,
};
// Final check before snapshot
if (this.currentPage.isClosed()) {
logger.debug("Skipping DOM snapshot - page closed before snapshot");
return;
}
// Capture snapshot using rrweb
const rawSnapshot = await this.currentPage.evaluate(() => {
if (typeof window.rrwebSnapshot === "undefined") {
throw new Error("rrweb-snapshot library not available");
}
return window.rrwebSnapshot.snapshot(document, {
2025-09-16 22:48:15 +05:30
inlineImages: false,
collectFonts: true,
});
2025-06-23 13:21:46 +05:30
});
// Process the snapshot to proxy resources
const processedSnapshot = await this.processRRWebSnapshot(rawSnapshot);
// Add scroll position information
const enhancedSnapshot = {
...processedSnapshot,
scrollPosition: currentScrollInfo,
captureTime: Date.now(),
};
// Emit the processed snapshot
this.emitRRWebSnapshot(enhancedSnapshot);
} catch (error) {
// Handle navigation context destruction gracefully
if (
error instanceof Error &&
(error.message.includes("Execution context was destroyed") ||
2025-06-23 13:21:46 +05:30
error.message.includes("most likely because of a navigation") ||
error.message.includes("Target closed"))
) {
2025-06-23 13:21:46 +05:30
logger.debug("DOM snapshot skipped due to page navigation or closure");
return; // Don't emit error for navigation - this is expected
}
logger.error("Failed to create rrweb snapshot:", error);
this.socket.emit("dom-mode-error", {
userId: this.userId,
message: "Failed to create rrweb snapshot",
error: error instanceof Error ? error.message : String(error),
timestamp: Date.now(),
});
}
}
/**
* Emit DOM snapshot to client - following screenshot pattern
*/
private emitRRWebSnapshot(processedSnapshot: ProcessedSnapshot): void {
this.socket.emit("domcast", {
snapshotData: processedSnapshot,
userId: this.userId,
timestamp: Date.now(),
});
}
/**
* Stop DOM streaming - following screencast pattern
*/
private async stopDOM(): Promise<void> {
this.isDOMStreamingActive = false;
if (this.domUpdateInterval) {
clearInterval(this.domUpdateInterval);
this.domUpdateInterval = null;
}
if (this.networkRequestTimeout) {
clearTimeout(this.networkRequestTimeout);
this.networkRequestTimeout = null;
}
this.pendingNetworkRequests = [];
2025-06-23 13:21:46 +05:30
if (this.client) {
try {
await this.client.send("DOM.disable");
await this.client.send("CSS.disable");
} catch (error) {
logger.warn("Error stopping DOM stream:", error);
}
}
logger.info("DOM streaming stopped successfully");
}
2025-06-30 16:29:07 +05:30
/**rrweb-bundle
2024-06-01 10:59:15 +05:30
* Terminates the screencast session and closes the remote browser.
* If an interpretation was running it will be stopped.
* @returns {Promise<void>}
*/
public async switchOff(): Promise<void> {
try {
this.isScreencastActive = false;
this.isDOMStreamingActive = false;
2025-01-06 21:11:11 +05:30
await this.interpreter.stopInterpretation();
2025-01-06 21:11:11 +05:30
if (this.screencastInterval) {
clearInterval(this.screencastInterval);
}
2025-01-06 21:11:11 +05:30
if (this.domUpdateInterval) {
clearInterval(this.domUpdateInterval);
}
2025-01-06 21:11:11 +05:30
if (this.client) {
await this.stopScreencast();
await this.stopDOM();
}
2025-01-06 21:11:11 +05:30
if (this.browser) {
await this.browser.close();
2024-06-01 10:59:15 +05:30
}
this.screenshotQueue = [];
//this.performanceMonitor.reset();
} catch (error) {
logger.error('Error during browser shutdown:', error);
}
}
2024-06-01 10:59:15 +05:30
2025-01-06 11:54:48 +05:30
private async optimizeScreenshot(screenshot: Buffer): Promise<Buffer> {
try {
return await sharp(screenshot)
2025-03-24 01:10:30 +05:30
.png({
quality: Math.round(SCREENCAST_CONFIG.compressionQuality * 100),
compressionLevel: 6,
adaptiveFiltering: true,
force: true
2025-01-06 11:54:48 +05:30
})
.resize({
width: SCREENCAST_CONFIG.maxWidth,
height: SCREENCAST_CONFIG.maxHeight,
fit: 'inside',
2025-03-20 23:14:07 +05:30
withoutEnlargement: true,
2025-03-24 01:10:30 +05:30
kernel: 'lanczos3'
2025-01-06 11:54:48 +05:30
})
.toBuffer();
} catch (error) {
2025-03-24 01:10:30 +05:30
logger.error('Screenshot optimization failed:', error);
return screenshot;
2025-01-06 11:54:48 +05:30
}
}
2025-03-24 01:10:30 +05:30
2025-01-06 11:54:48 +05:30
2024-07-09 22:58:18 +05:30
/**
2024-06-01 10:59:49 +05:30
* Makes and emits a single screenshot to the client side.
* @returns {Promise<void>}
*/
2024-06-01 11:05:45 +05:30
public makeAndEmitScreenshot = async (): Promise<void> => {
2024-06-01 10:59:49 +05:30
try {
2024-09-21 18:49:09 +05:30
const screenshot = await this.currentPage?.screenshot();
2024-06-01 10:59:49 +05:30
if (screenshot) {
2025-01-06 11:55:45 +05:30
this.emitScreenshot(screenshot);
2024-06-01 10:59:49 +05:30
}
} catch (e) {
const { message } = e as Error;
2024-11-29 22:11:16 +05:30
logger.log('error', `Screenshot error: ${message}`);
2024-06-01 10:59:49 +05:30
}
2024-07-09 22:58:18 +05:30
};
2024-06-01 10:59:49 +05:30
2024-06-01 11:00:28 +05:30
/**
* Updates the active socket instance.
* This will update all registered events for the socket and
* all the properties using the socket.
* @param socket socket.io socket instance used to communicate with the client side
* @returns void
*/
2024-06-01 11:05:45 +05:30
public updateSocket = (socket: Socket): void => {
2024-06-01 11:00:28 +05:30
this.socket = socket;
this.registerEditorEvents();
this.generator?.updateSocket(socket);
this.interpreter?.updateSocket(socket);
};
2024-06-01 11:03:01 +05:30
/**
* Starts the interpretation of the currently generated workflow.
* @returns {Promise<void>}
*/
2024-06-01 11:05:45 +05:30
public interpretCurrentRecording = async (): Promise<void> => {
2024-06-01 11:03:01 +05:30
logger.log('debug', 'Starting interpretation in the editor');
if (this.generator) {
const workflow = this.generator.AddGeneratedFlags(this.generator.getWorkflowFile());
await this.initializeNewPage();
if (this.currentPage) {
2025-03-14 12:40:28 +05:30
// this.currentPage.setViewportSize({ height: 400, width: 900 });
2024-06-01 11:03:01 +05:30
const params = this.generator.getParams();
if (params) {
this.interpreterSettings.params = params.reduce((acc, param) => {
if (this.interpreterSettings.params && Object.keys(this.interpreterSettings.params).includes(param)) {
return { ...acc, [param]: this.interpreterSettings.params[param] };
} else {
return { ...acc, [param]: '', }
}
}, {})
}
logger.log('debug', `Starting interpretation with settings: ${JSON.stringify(this.interpreterSettings, null, 2)}`);
await this.interpreter.interpretRecordingInEditor(
2024-06-01 11:05:45 +05:30
workflow, this.currentPage,
(newPage: Page) => this.currentPage = newPage,
this.interpreterSettings
2024-06-01 11:03:01 +05:30
);
// clear the active index from generator
this.generator.clearLastIndex();
} else {
logger.log('error', 'Could not get a new page, returned undefined');
}
} else {
logger.log('error', 'Generator is not initialized');
}
};
/**
* Stops the workflow interpretation and initializes a new page.
* @returns {Promise<void>}
*/
2024-06-01 11:05:45 +05:30
public stopCurrentInterpretation = async (): Promise<void> => {
await this.interpreter.stopInterpretation();
await this.initializeNewPage();
};
/**
* Returns the current page instance.
* @returns {Page | null | undefined}
*/
2024-06-01 11:05:45 +05:30
public getCurrentPage = (): Page | null | undefined => {
return this.currentPage;
};
/**
* Changes the active page to the page instance on the given index
* available in pages array on the {@link BrowserContext}.
* Automatically stops the screencast session on the previous page and starts the new one.
* @param tabIndex index of the page in the pages array on the {@link BrowserContext}
* @returns {Promise<void>}
*/
2024-06-01 11:05:45 +05:30
private changeTab = async (tabIndex: number): Promise<void> => {
const page = this.currentPage?.context().pages()[tabIndex];
if (page) {
await this.stopScreencast();
await this.stopDOM();
this.currentPage = page;
await this.setupPageEventListeners(this.currentPage);
2024-12-09 22:05:50 +05:30
2024-10-22 15:23:57 +05:30
//await this.currentPage.setViewportSize({ height: 400, width: 900 })
this.client = await this.currentPage.context().newCDPSession(this.currentPage);
// Include userId in the URL change event
this.socket.emit('urlChanged', {
url: this.currentPage.url(),
userId: this.userId
});
2025-07-06 21:44:19 +05:30
if (this.isDOMStreamingActive) {
await this.makeAndEmitDOMSnapshot();
await this.subscribeToDOM();
} else {
await this.makeAndEmitScreenshot();
await this.subscribeToScreencast();
}
} else {
logger.log('error', `${tabIndex} index out of range of pages`)
}
}
2024-06-01 11:05:13 +05:30
/**
* Internal method for a new page initialization. Subscribes this page to the screencast.
* @param options optional page options to be used when creating a new page
* @returns {Promise<void>}
*/
2024-06-01 11:05:45 +05:30
private initializeNewPage = async (options?: Object): Promise<void> => {
2024-06-01 11:05:13 +05:30
await this.stopScreencast();
const newPage = options ? await this.browser?.newPage(options)
2024-06-01 11:05:45 +05:30
: await this.browser?.newPage();
2024-11-03 01:09:53 +05:30
await newPage?.setExtraHTTPHeaders({
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
});
2024-06-01 11:05:13 +05:30
await this.currentPage?.close();
this.currentPage = newPage;
if (this.currentPage) {
await this.setupPageEventListeners(this.currentPage);
2024-12-13 21:21:00 +05:30
2024-06-01 11:05:13 +05:30
this.client = await this.currentPage.context().newCDPSession(this.currentPage);
2025-06-23 13:21:46 +05:30
if (this.renderingMode === "dom") {
await this.subscribeToDOM();
} else {
await this.subscribeToScreencast();
}
2024-06-01 11:05:13 +05:30
} else {
2024-06-01 11:05:45 +05:30
logger.log('error', 'Could not get a new page, returned undefined');
2024-06-01 11:05:13 +05:30
}
};
2024-06-01 11:02:21 +05:30
/**
* Initiates screencast of the remote browser through socket,
* registers listener for rerender event and emits the loaded event.
* Should be called only once after the browser is fully initialized.
* @returns {Promise<void>}
*/
2025-01-06 11:55:45 +05:30
private async startScreencast(): Promise<void> {
2024-06-01 11:02:21 +05:30
if (!this.client) {
2025-01-06 11:55:45 +05:30
logger.warn('Client is not initialized');
2024-06-01 11:02:21 +05:30
return;
}
2025-01-06 11:55:45 +05:30
try {
await this.client.send('Page.startScreencast', {
format: SCREENCAST_CONFIG.format,
2025-03-23 16:24:41 +05:30
quality: Math.round(SCREENCAST_CONFIG.compressionQuality * 100),
2025-03-20 23:14:07 +05:30
maxWidth: SCREENCAST_CONFIG.maxWidth,
maxHeight: SCREENCAST_CONFIG.maxHeight,
2025-03-23 16:24:41 +05:30
everyNthFrame: 1
2025-01-06 11:55:45 +05:30
});
2025-03-23 16:24:41 +05:30
this.isScreencastActive = true;
2025-03-23 16:24:41 +05:30
2025-01-06 11:55:45 +05:30
this.client.on('Page.screencastFrame', async ({ data, sessionId }) => {
try {
2025-03-23 16:24:41 +05:30
if (this.screenshotQueue.length >= SCREENCAST_CONFIG.maxQueueSize && this.isProcessingScreenshot) {
await this.client?.send('Page.screencastFrameAck', { sessionId });
return;
}
2025-01-06 11:55:45 +05:30
const buffer = Buffer.from(data, 'base64');
2025-03-23 16:24:41 +05:30
this.emitScreenshot(buffer);
setTimeout(async () => {
try {
if (this.client) {
await this.client.send('Page.screencastFrameAck', { sessionId });
}
} catch (e) {
logger.error('Error acknowledging screencast frame:', e);
}
}, 10);
2025-01-06 11:55:45 +05:30
} catch (error) {
logger.error('Screencast frame processing failed:', error);
2025-03-23 16:24:41 +05:30
try {
await this.client?.send('Page.screencastFrameAck', { sessionId });
} catch (ackError) {
logger.error('Failed to acknowledge screencast frame:', ackError);
}
2025-01-06 11:55:45 +05:30
}
});
logger.info('Screencast started successfully');
} catch (error) {
logger.error('Failed to start screencast:', error);
}
}
private async stopScreencast(): Promise<void> {
2024-06-01 11:02:21 +05:30
if (!this.client) {
2025-01-06 11:55:45 +05:30
logger.error('Client is not initialized');
return;
}
try {
// Set flag to indicate screencast is active
this.isScreencastActive = false;
2024-06-01 11:02:21 +05:30
await this.client.send('Page.stopScreencast');
2025-01-06 11:55:45 +05:30
this.screenshotQueue = [];
this.isProcessingScreenshot = false;
logger.info('Screencast stopped successfully');
} catch (error) {
logger.error('Failed to stop screencast:', error);
2024-06-01 11:02:21 +05:30
}
2025-01-06 11:55:45 +05:30
}
2024-06-01 11:02:21 +05:30
/**
* Helper for emitting the screenshot of browser's active page through websocket.
* @param payload the screenshot binary data
* @returns void
*/
2025-03-14 12:40:28 +05:30
private emitScreenshot = async (payload: Buffer, viewportSize?: { width: number, height: number }): Promise<void> => {
2025-03-23 16:24:41 +05:30
if (this.screenshotQueue.length > SCREENCAST_CONFIG.maxQueueSize) {
2025-06-12 14:30:43 +05:30
this.screenshotQueue = this.screenshotQueue.slice(-1);
2025-03-23 16:24:41 +05:30
}
if (this.isProcessingScreenshot) {
if (this.screenshotQueue.length < SCREENCAST_CONFIG.maxQueueSize) {
this.screenshotQueue.push(payload);
}
return;
}
2025-03-23 16:24:41 +05:30
this.isProcessingScreenshot = true;
2025-03-23 16:24:41 +05:30
try {
2025-03-20 23:14:07 +05:30
const optimizationPromise = this.optimizeScreenshot(payload);
const timeoutPromise = new Promise<Buffer>((resolve) => {
2025-06-12 14:30:43 +05:30
setTimeout(() => resolve(payload), 100);
2025-03-20 23:14:07 +05:30
});
const optimizedScreenshot = await Promise.race([optimizationPromise, timeoutPromise]);
const base64Data = optimizedScreenshot.toString('base64');
2025-03-23 16:24:41 +05:30
const dataWithMimeType = `data:image/${SCREENCAST_CONFIG.format};base64,${base64Data}`;
payload = null as any;
2025-06-12 14:30:43 +05:30
setImmediate(async () => {
this.socket.emit('screencast', {
2025-03-14 12:40:28 +05:30
image: dataWithMimeType,
userId: this.userId,
viewport: viewportSize || await this.currentPage?.viewportSize() || null
2025-06-12 14:30:43 +05:30
});
2025-03-14 12:40:28 +05:30
});
} catch (error) {
logger.error('Screenshot emission failed:', error);
2025-03-20 23:14:07 +05:30
try {
const base64Data = payload.toString('base64');
2025-03-24 01:10:30 +05:30
const dataWithMimeType = `data:image/png;base64,${base64Data}`;
2025-03-23 16:24:41 +05:30
2025-06-12 14:30:43 +05:30
setImmediate(async () => {
this.socket.emit('screencast', {
image: dataWithMimeType,
userId: this.userId,
viewport: viewportSize || await this.currentPage?.viewportSize() || null
});
2025-03-20 23:14:07 +05:30
});
} catch (e) {
logger.error('Fallback screenshot emission also failed:', e);
}
} finally {
this.isProcessingScreenshot = false;
2025-06-12 14:30:43 +05:30
if (this.screenshotQueue.length > 0) {
2025-06-12 14:30:43 +05:30
const nextScreenshot = this.screenshotQueue.shift();
if (nextScreenshot) {
const delay = this.screenshotQueue.length > 0 ? 16 : 33;
setTimeout(() => {
this.emitScreenshot(nextScreenshot);
}, delay);
}
}
}
};
2024-06-01 10:55:04 +05:30
}