2024-06-01 10:55:04 +05:30
|
|
|
import {
|
|
|
|
|
Page,
|
|
|
|
|
Browser,
|
|
|
|
|
CDPSession,
|
|
|
|
|
BrowserContext,
|
|
|
|
|
} from 'playwright';
|
|
|
|
|
import { Socket } from "socket.io";
|
2024-11-22 23:04:03 +05:30
|
|
|
import { chromium } from 'playwright-extra';
|
|
|
|
|
import stealthPlugin from 'puppeteer-extra-plugin-stealth';
|
2024-09-21 18:51:11 +05:30
|
|
|
import { PlaywrightBlocker } from '@cliqz/adblocker-playwright';
|
2024-07-20 05:57:15 +05:30
|
|
|
import fetch from 'cross-fetch';
|
2025-01-06 11:43:02 +05:30
|
|
|
import sharp from 'sharp';
|
2024-06-01 10:55:04 +05:30
|
|
|
import logger from '../../logger';
|
2025-04-01 23:25:47 +05:30
|
|
|
import { InterpreterSettings } from "../../types";
|
2024-06-01 10:55:04 +05:30
|
|
|
import { WorkflowGenerator } from "../../workflow-management/classes/Generator";
|
|
|
|
|
import { WorkflowInterpreter } from "../../workflow-management/classes/Interpreter";
|
2024-10-27 17:39:58 +05:30
|
|
|
import { getDecryptedProxyConfig } from '../../routes/proxy';
|
2024-12-09 17:57:28 +05:30
|
|
|
import { getInjectableScript } from 'idcac-playwright';
|
2025-01-06 21:10:39 +05:30
|
|
|
|
2025-06-23 13:21:46 +05:30
|
|
|
declare global {
|
|
|
|
|
interface Window {
|
|
|
|
|
rrwebSnapshot?: any;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
interface RRWebSnapshot {
|
|
|
|
|
type: number;
|
|
|
|
|
childNodes?: RRWebSnapshot[];
|
|
|
|
|
tagName?: string;
|
|
|
|
|
attributes?: Record<string, string>;
|
|
|
|
|
textContent?: string;
|
|
|
|
|
id: number;
|
|
|
|
|
[key: string]: any;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
interface ProcessedSnapshot {
|
|
|
|
|
snapshot: RRWebSnapshot;
|
|
|
|
|
resources: {
|
|
|
|
|
stylesheets: Array<{
|
|
|
|
|
href: string;
|
|
|
|
|
content: string;
|
|
|
|
|
media?: string;
|
|
|
|
|
}>;
|
|
|
|
|
images: Array<{
|
|
|
|
|
src: string;
|
|
|
|
|
dataUrl: string;
|
|
|
|
|
alt?: string;
|
|
|
|
|
}>;
|
|
|
|
|
fonts: Array<{
|
|
|
|
|
url: string;
|
|
|
|
|
dataUrl: string;
|
|
|
|
|
format?: string;
|
|
|
|
|
}>;
|
|
|
|
|
scripts: Array<{
|
|
|
|
|
src: string;
|
|
|
|
|
content: string;
|
|
|
|
|
type?: string;
|
|
|
|
|
}>;
|
|
|
|
|
media: Array<{
|
|
|
|
|
src: string;
|
|
|
|
|
dataUrl: string;
|
|
|
|
|
type: string;
|
|
|
|
|
}>;
|
|
|
|
|
};
|
|
|
|
|
baseUrl: string;
|
|
|
|
|
viewport: { width: number; height: number };
|
|
|
|
|
timestamp: number;
|
|
|
|
|
processingStats: {
|
|
|
|
|
discoveredResources: {
|
|
|
|
|
images: number;
|
|
|
|
|
stylesheets: number;
|
|
|
|
|
scripts: number;
|
|
|
|
|
fonts: number;
|
|
|
|
|
media: number;
|
|
|
|
|
};
|
|
|
|
|
cachedResources: {
|
|
|
|
|
stylesheets: number;
|
|
|
|
|
images: number;
|
|
|
|
|
fonts: number;
|
|
|
|
|
scripts: number;
|
|
|
|
|
media: number;
|
|
|
|
|
};
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
2024-11-22 23:04:03 +05:30
|
|
|
chromium.use(stealthPlugin());
|
2024-07-20 06:01:35 +05:30
|
|
|
|
2025-01-06 11:47:56 +05:30
|
|
|
const MEMORY_CONFIG = {
|
2025-03-23 16:24:41 +05:30
|
|
|
gcInterval: 20000, // Check memory more frequently (20s instead of 60s)
|
|
|
|
|
maxHeapSize: 1536 * 1024 * 1024, // 1.5GB
|
|
|
|
|
heapUsageThreshold: 0.7 // 70% (reduced threshold to react earlier)
|
2025-01-06 11:47:56 +05:30
|
|
|
};
|
|
|
|
|
|
2025-06-12 14:30:43 +05:30
|
|
|
const DEFAULT_VIEWPORT = {
|
|
|
|
|
width: 1280,
|
|
|
|
|
height: 720,
|
|
|
|
|
deviceScaleFactor: 1,
|
|
|
|
|
mobile: false
|
|
|
|
|
};
|
|
|
|
|
|
2025-01-06 11:56:10 +05:30
|
|
|
const SCREENCAST_CONFIG: {
|
|
|
|
|
format: "jpeg" | "png";
|
|
|
|
|
maxWidth: number;
|
|
|
|
|
maxHeight: number;
|
|
|
|
|
targetFPS: number;
|
|
|
|
|
compressionQuality: number;
|
|
|
|
|
maxQueueSize: number;
|
2025-06-12 14:30:43 +05:30
|
|
|
skipFrameThreshold: number,
|
|
|
|
|
enableAdaptiveQuality: boolean,
|
2025-01-06 11:56:10 +05:30
|
|
|
} = {
|
2025-06-12 14:30:43 +05:30
|
|
|
format: 'jpeg',
|
|
|
|
|
maxWidth: DEFAULT_VIEWPORT.width,
|
|
|
|
|
maxHeight: DEFAULT_VIEWPORT.height,
|
|
|
|
|
targetFPS: 30,
|
|
|
|
|
compressionQuality: 0.8,
|
|
|
|
|
maxQueueSize: 2,
|
|
|
|
|
skipFrameThreshold: 100,
|
|
|
|
|
enableAdaptiveQuality: true,
|
2025-01-06 11:54:31 +05:30
|
|
|
};
|
2025-01-06 11:47:56 +05:30
|
|
|
|
2024-06-01 10:55:04 +05:30
|
|
|
/**
|
|
|
|
|
* This class represents a remote browser instance.
|
|
|
|
|
* It is used to allow a variety of interaction with the Playwright's browser instance.
|
|
|
|
|
* Every remote browser holds an instance of a generator and interpreter classes with
|
|
|
|
|
* the purpose of generating and interpreting workflows.
|
|
|
|
|
* @category BrowserManagement
|
|
|
|
|
*/
|
|
|
|
|
export class RemoteBrowser {
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Playwright's [browser](https://playwright.dev/docs/api/class-browser) instance.
|
|
|
|
|
* @private
|
|
|
|
|
*/
|
|
|
|
|
private browser: Browser | null = null;
|
|
|
|
|
|
2024-10-22 02:43:28 +05:30
|
|
|
private context: BrowserContext | null = null;
|
|
|
|
|
|
2024-06-01 10:55:04 +05:30
|
|
|
/**
|
|
|
|
|
* The Playwright's [CDPSession](https://playwright.dev/docs/api/class-cdpsession) instance,
|
|
|
|
|
* used to talk raw Chrome Devtools Protocol.
|
|
|
|
|
* @private
|
|
|
|
|
*/
|
2024-06-01 11:05:45 +05:30
|
|
|
private client: CDPSession | null | undefined = null;
|
2024-06-01 10:55:04 +05:30
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Socket.io socket instance enabling communication with the client (frontend) side.
|
|
|
|
|
* @private
|
|
|
|
|
*/
|
2024-06-01 11:05:45 +05:30
|
|
|
private socket: Socket;
|
2024-06-01 10:55:04 +05:30
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* The Playwright's [Page](https://playwright.dev/docs/api/class-page) instance
|
|
|
|
|
* as current interactive remote browser's page.
|
|
|
|
|
* @private
|
|
|
|
|
*/
|
2024-06-01 11:05:45 +05:30
|
|
|
private currentPage: Page | null | undefined = null;
|
2024-06-01 10:55:04 +05:30
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Interpreter settings for any started interpretation.
|
|
|
|
|
* @private
|
|
|
|
|
*/
|
|
|
|
|
private interpreterSettings: InterpreterSettings = {
|
|
|
|
|
debug: false,
|
|
|
|
|
maxConcurrency: 1,
|
|
|
|
|
maxRepeats: 1,
|
|
|
|
|
};
|
2025-01-06 21:11:11 +05:30
|
|
|
|
2025-03-06 04:41:35 +05:30
|
|
|
/**
|
|
|
|
|
* The user ID that owns this browser instance
|
|
|
|
|
* @private
|
|
|
|
|
*/
|
|
|
|
|
private userId: string;
|
|
|
|
|
|
2024-12-10 20:32:01 +05:30
|
|
|
private lastEmittedUrl: string | null = null;
|
|
|
|
|
|
2024-06-01 10:55:04 +05:30
|
|
|
/**
|
|
|
|
|
* {@link WorkflowGenerator} instance specific to the remote browser.
|
|
|
|
|
*/
|
|
|
|
|
public generator: WorkflowGenerator;
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* {@link WorkflowInterpreter} instance specific to the remote browser.
|
|
|
|
|
*/
|
|
|
|
|
public interpreter: WorkflowInterpreter;
|
|
|
|
|
|
2025-01-04 15:57:41 +05:30
|
|
|
|
2025-01-06 11:43:02 +05:30
|
|
|
private screenshotQueue: Buffer[] = [];
|
|
|
|
|
private isProcessingScreenshot = false;
|
2025-01-06 21:10:39 +05:30
|
|
|
private screencastInterval: NodeJS.Timeout | null = null
|
2025-03-06 05:06:58 +05:30
|
|
|
private isScreencastActive: boolean = false;
|
2025-01-04 15:57:41 +05:30
|
|
|
|
2025-06-23 13:21:46 +05:30
|
|
|
private isDOMStreamingActive: boolean = false;
|
|
|
|
|
private domUpdateInterval: NodeJS.Timeout | null = null;
|
|
|
|
|
private renderingMode: "screenshot" | "dom" = "screenshot";
|
|
|
|
|
|
|
|
|
|
private lastScrollPosition = { x: 0, y: 0 };
|
|
|
|
|
private scrollThreshold = 200; // pixels
|
|
|
|
|
private snapshotDebounceTimeout: NodeJS.Timeout | null = null;
|
|
|
|
|
private isScrollTriggeredSnapshot = false;
|
|
|
|
|
|
2025-06-30 16:13:54 +05:30
|
|
|
private networkRequestTimeout: NodeJS.Timeout | null = null;
|
|
|
|
|
private pendingNetworkRequests: string[] = [];
|
|
|
|
|
private readonly NETWORK_QUIET_PERIOD = 8000;
|
|
|
|
|
|
2024-06-01 10:55:04 +05:30
|
|
|
/**
|
|
|
|
|
* Initializes a new instances of the {@link Generator} and {@link WorkflowInterpreter} classes and
|
|
|
|
|
* assigns the socket instance everywhere.
|
|
|
|
|
* @param socket socket.io socket instance used to communicate with the client side
|
|
|
|
|
* @constructor
|
|
|
|
|
*/
|
2025-04-12 19:19:08 +05:30
|
|
|
public constructor(socket: Socket, userId: string, poolId: string) {
|
2024-06-01 10:55:04 +05:30
|
|
|
this.socket = socket;
|
2025-03-06 05:11:06 +05:30
|
|
|
this.userId = userId;
|
2024-06-01 10:55:04 +05:30
|
|
|
this.interpreter = new WorkflowInterpreter(socket);
|
2025-04-12 19:19:08 +05:30
|
|
|
this.generator = new WorkflowGenerator(socket, poolId);
|
2024-06-01 10:55:04 +05:30
|
|
|
}
|
2024-06-01 10:56:25 +05:30
|
|
|
|
2025-06-12 14:30:43 +05:30
|
|
|
private cleanupMemory(): void {
|
|
|
|
|
if (this.screenshotQueue.length > 10) {
|
|
|
|
|
this.screenshotQueue = this.screenshotQueue.slice(-3); // Keep only last 3
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private setupMemoryCleanup(): void {
|
|
|
|
|
setInterval(() => {
|
|
|
|
|
this.cleanupMemory();
|
|
|
|
|
}, 30000); // Every 30 seconds
|
|
|
|
|
}
|
|
|
|
|
|
2025-06-23 13:21:46 +05:30
|
|
|
private async processRRWebSnapshot(
|
|
|
|
|
snapshot: RRWebSnapshot
|
|
|
|
|
): Promise<ProcessedSnapshot> {
|
|
|
|
|
const baseUrl = this.currentPage?.url() || "";
|
|
|
|
|
|
|
|
|
|
const resources = {
|
|
|
|
|
stylesheets: [] as Array<{
|
|
|
|
|
href: string;
|
|
|
|
|
content: string;
|
|
|
|
|
media?: string;
|
|
|
|
|
}>,
|
|
|
|
|
images: [] as Array<{ src: string; dataUrl: string; alt?: string }>,
|
|
|
|
|
fonts: [] as Array<{ url: string; dataUrl: string; format?: string }>,
|
|
|
|
|
scripts: [] as Array<{ src: string; content: string; type?: string }>,
|
|
|
|
|
media: [] as Array<{ src: string; dataUrl: string; type: string }>,
|
|
|
|
|
};
|
2025-06-30 15:10:50 +05:30
|
|
|
|
2025-06-23 13:21:46 +05:30
|
|
|
const viewport = (await this.currentPage?.viewportSize()) || {
|
|
|
|
|
width: 1280,
|
|
|
|
|
height: 720,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
return {
|
2025-06-30 16:13:54 +05:30
|
|
|
snapshot,
|
2025-06-23 13:21:46 +05:30
|
|
|
resources,
|
|
|
|
|
baseUrl,
|
|
|
|
|
viewport,
|
|
|
|
|
timestamp: Date.now(),
|
|
|
|
|
processingStats: {
|
|
|
|
|
discoveredResources: {
|
|
|
|
|
images: resources.images.length,
|
|
|
|
|
stylesheets: resources.stylesheets.length,
|
|
|
|
|
scripts: resources.scripts.length,
|
|
|
|
|
fonts: resources.fonts.length,
|
|
|
|
|
media: resources.media.length,
|
|
|
|
|
},
|
|
|
|
|
cachedResources: {
|
|
|
|
|
stylesheets: resources.stylesheets.length,
|
|
|
|
|
images: resources.images.length,
|
|
|
|
|
fonts: resources.fonts.length,
|
|
|
|
|
scripts: resources.scripts.length,
|
|
|
|
|
media: resources.media.length,
|
|
|
|
|
},
|
|
|
|
|
},
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
2025-01-06 11:46:06 +05:30
|
|
|
private initializeMemoryManagement(): void {
|
|
|
|
|
setInterval(() => {
|
|
|
|
|
const memoryUsage = process.memoryUsage();
|
|
|
|
|
const heapUsageRatio = memoryUsage.heapUsed / MEMORY_CONFIG.maxHeapSize;
|
2025-03-23 16:24:41 +05:30
|
|
|
|
|
|
|
|
if (heapUsageRatio > MEMORY_CONFIG.heapUsageThreshold * 1.2) {
|
|
|
|
|
logger.warn('Critical memory pressure detected, triggering emergency cleanup');
|
2025-01-06 11:46:06 +05:30
|
|
|
this.performMemoryCleanup();
|
2025-03-23 16:24:41 +05:30
|
|
|
} else if (heapUsageRatio > MEMORY_CONFIG.heapUsageThreshold) {
|
|
|
|
|
logger.warn('High memory usage detected, triggering cleanup');
|
|
|
|
|
|
|
|
|
|
if (this.screenshotQueue.length > 0) {
|
|
|
|
|
this.screenshotQueue = [];
|
|
|
|
|
logger.info('Screenshot queue cleared due to memory pressure');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (global.gc && heapUsageRatio > MEMORY_CONFIG.heapUsageThreshold * 1.1) {
|
|
|
|
|
global.gc();
|
|
|
|
|
}
|
2025-01-06 11:46:06 +05:30
|
|
|
}
|
2025-03-23 16:24:41 +05:30
|
|
|
|
2025-01-06 11:46:06 +05:30
|
|
|
if (this.screenshotQueue.length > SCREENCAST_CONFIG.maxQueueSize) {
|
|
|
|
|
this.screenshotQueue = this.screenshotQueue.slice(-SCREENCAST_CONFIG.maxQueueSize);
|
|
|
|
|
}
|
|
|
|
|
}, MEMORY_CONFIG.gcInterval);
|
|
|
|
|
}
|
|
|
|
|
|
2025-01-06 11:47:38 +05:30
|
|
|
private async performMemoryCleanup(): Promise<void> {
|
|
|
|
|
this.screenshotQueue = [];
|
|
|
|
|
this.isProcessingScreenshot = false;
|
2025-03-23 16:24:41 +05:30
|
|
|
|
2025-01-06 11:47:38 +05:30
|
|
|
if (global.gc) {
|
2025-03-23 16:24:41 +05:30
|
|
|
try {
|
|
|
|
|
global.gc();
|
|
|
|
|
logger.info('Garbage collection requested');
|
|
|
|
|
} catch (error) {
|
|
|
|
|
logger.error('Error during garbage collection:', error);
|
|
|
|
|
}
|
2025-01-06 11:47:38 +05:30
|
|
|
}
|
2025-03-23 16:24:41 +05:30
|
|
|
|
2025-01-06 11:47:38 +05:30
|
|
|
if (this.client) {
|
|
|
|
|
try {
|
|
|
|
|
await this.stopScreencast();
|
2025-03-23 16:24:41 +05:30
|
|
|
|
|
|
|
|
await new Promise(resolve => setTimeout(resolve, 500));
|
|
|
|
|
|
2025-01-06 11:47:38 +05:30
|
|
|
this.client = null;
|
|
|
|
|
if (this.currentPage) {
|
|
|
|
|
this.client = await this.currentPage.context().newCDPSession(this.currentPage);
|
|
|
|
|
await this.startScreencast();
|
2025-03-23 16:24:41 +05:30
|
|
|
logger.info('CDP session reset completed');
|
2025-01-06 11:47:38 +05:30
|
|
|
}
|
|
|
|
|
} catch (error) {
|
|
|
|
|
logger.error('Error resetting CDP session:', error);
|
|
|
|
|
}
|
|
|
|
|
}
|
2025-03-23 16:24:41 +05:30
|
|
|
|
|
|
|
|
this.socket.emit('memory-cleanup', {
|
|
|
|
|
userId: this.userId,
|
|
|
|
|
timestamp: Date.now()
|
|
|
|
|
});
|
2025-01-06 11:47:38 +05:30
|
|
|
}
|
|
|
|
|
|
2024-12-10 20:32:01 +05:30
|
|
|
/**
|
|
|
|
|
* Normalizes URLs to prevent navigation loops while maintaining consistent format
|
|
|
|
|
*/
|
|
|
|
|
private normalizeUrl(url: string): string {
|
|
|
|
|
try {
|
|
|
|
|
const parsedUrl = new URL(url);
|
|
|
|
|
// Remove trailing slashes except for root path
|
|
|
|
|
parsedUrl.pathname = parsedUrl.pathname.replace(/\/+$/, '') || '/';
|
|
|
|
|
// Ensure consistent protocol handling
|
|
|
|
|
parsedUrl.protocol = parsedUrl.protocol.toLowerCase();
|
|
|
|
|
return parsedUrl.toString();
|
|
|
|
|
} catch {
|
|
|
|
|
return url;
|
|
|
|
|
}
|
2024-12-13 21:21:00 +05:30
|
|
|
}
|
2024-12-10 20:32:01 +05:30
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Determines if a URL change is significant enough to emit
|
|
|
|
|
*/
|
|
|
|
|
private shouldEmitUrlChange(newUrl: string): boolean {
|
|
|
|
|
if (!this.lastEmittedUrl) {
|
|
|
|
|
return true;
|
|
|
|
|
}
|
|
|
|
|
const normalizedNew = this.normalizeUrl(newUrl);
|
|
|
|
|
const normalizedLast = this.normalizeUrl(this.lastEmittedUrl);
|
|
|
|
|
return normalizedNew !== normalizedLast;
|
|
|
|
|
}
|
|
|
|
|
|
2025-06-23 13:21:46 +05:30
|
|
|
/**
|
|
|
|
|
* Setup scroll event listener to track user scrolling
|
|
|
|
|
*/
|
|
|
|
|
private setupScrollEventListener(): void {
|
|
|
|
|
this.socket.on(
|
|
|
|
|
"dom:scroll",
|
|
|
|
|
async (data: { deltaX: number; deltaY: number }) => {
|
|
|
|
|
if (!this.isDOMStreamingActive || !this.currentPage) return;
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
logger.debug(
|
|
|
|
|
`Received scroll event: deltaX=${data.deltaX}, deltaY=${data.deltaY}`
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
await this.currentPage.mouse.wheel(data.deltaX, data.deltaY);
|
|
|
|
|
|
|
|
|
|
const scrollInfo = await this.currentPage.evaluate(() => ({
|
|
|
|
|
x: window.scrollX,
|
|
|
|
|
y: window.scrollY,
|
|
|
|
|
maxX: Math.max(
|
|
|
|
|
0,
|
|
|
|
|
document.documentElement.scrollWidth - window.innerWidth
|
|
|
|
|
),
|
|
|
|
|
maxY: Math.max(
|
|
|
|
|
0,
|
|
|
|
|
document.documentElement.scrollHeight - window.innerHeight
|
|
|
|
|
),
|
|
|
|
|
documentHeight: document.documentElement.scrollHeight,
|
|
|
|
|
viewportHeight: window.innerHeight,
|
|
|
|
|
}));
|
|
|
|
|
|
|
|
|
|
const scrollDelta =
|
|
|
|
|
Math.abs(scrollInfo.y - this.lastScrollPosition.y) +
|
|
|
|
|
Math.abs(scrollInfo.x - this.lastScrollPosition.x);
|
|
|
|
|
|
|
|
|
|
logger.debug(
|
|
|
|
|
`Scroll delta: ${scrollDelta}, threshold: ${this.scrollThreshold}`
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
if (scrollDelta > this.scrollThreshold) {
|
|
|
|
|
this.lastScrollPosition = { x: scrollInfo.x, y: scrollInfo.y };
|
|
|
|
|
this.isScrollTriggeredSnapshot = true;
|
|
|
|
|
|
|
|
|
|
if (this.snapshotDebounceTimeout) {
|
|
|
|
|
clearTimeout(this.snapshotDebounceTimeout);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
this.snapshotDebounceTimeout = setTimeout(async () => {
|
|
|
|
|
logger.info(
|
|
|
|
|
`Triggering snapshot due to scroll. Position: ${scrollInfo.y}/${scrollInfo.maxY}`
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
await this.makeAndEmitDOMSnapshot();
|
|
|
|
|
}, 300);
|
|
|
|
|
}
|
|
|
|
|
} catch (error) {
|
|
|
|
|
logger.error("Error handling scroll event:", error);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private setupPageChangeListeners(): void {
|
|
|
|
|
if (!this.currentPage) return;
|
|
|
|
|
|
|
|
|
|
this.currentPage.on("domcontentloaded", async () => {
|
|
|
|
|
logger.info("DOM content loaded - triggering snapshot");
|
|
|
|
|
await this.makeAndEmitDOMSnapshot();
|
|
|
|
|
});
|
|
|
|
|
|
2025-06-30 16:15:52 +05:30
|
|
|
this.currentPage.on("response", async (response) => {
|
|
|
|
|
const url = response.url();
|
|
|
|
|
if (
|
|
|
|
|
response.request().resourceType() === "document" ||
|
|
|
|
|
url.includes("api/") ||
|
|
|
|
|
url.includes("ajax")
|
|
|
|
|
) {
|
|
|
|
|
this.pendingNetworkRequests.push(url);
|
|
|
|
|
|
|
|
|
|
if (this.networkRequestTimeout) {
|
|
|
|
|
clearTimeout(this.networkRequestTimeout);
|
|
|
|
|
this.networkRequestTimeout = null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
logger.debug(
|
|
|
|
|
`Network request received: ${url}. Total pending: ${this.pendingNetworkRequests.length}`
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
this.networkRequestTimeout = setTimeout(async () => {
|
|
|
|
|
logger.info(
|
|
|
|
|
`Network quiet period reached. Processing ${this.pendingNetworkRequests.length} requests`
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
this.pendingNetworkRequests = [];
|
|
|
|
|
this.networkRequestTimeout = null;
|
|
|
|
|
|
|
|
|
|
await this.makeAndEmitDOMSnapshot();
|
|
|
|
|
}, this.NETWORK_QUIET_PERIOD);
|
|
|
|
|
}
|
|
|
|
|
});
|
2025-06-23 13:21:46 +05:30
|
|
|
}
|
|
|
|
|
|
2024-12-10 20:32:01 +05:30
|
|
|
private async setupPageEventListeners(page: Page) {
|
|
|
|
|
page.on('framenavigated', async (frame) => {
|
|
|
|
|
if (frame === page.mainFrame()) {
|
|
|
|
|
const currentUrl = page.url();
|
|
|
|
|
if (this.shouldEmitUrlChange(currentUrl)) {
|
|
|
|
|
this.lastEmittedUrl = currentUrl;
|
2025-03-06 05:16:44 +05:30
|
|
|
this.socket.emit('urlChanged', {url: currentUrl, userId: this.userId});
|
2024-12-10 20:32:01 +05:30
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
// Handle page load events with retry mechanism
|
2024-12-13 21:21:00 +05:30
|
|
|
page.on('load', async () => {
|
2024-12-10 20:32:01 +05:30
|
|
|
const injectScript = async (): Promise<boolean> => {
|
|
|
|
|
try {
|
|
|
|
|
await page.waitForLoadState('networkidle', { timeout: 5000 });
|
2024-12-13 21:21:00 +05:30
|
|
|
|
2024-12-10 20:32:01 +05:30
|
|
|
await page.evaluate(getInjectableScript());
|
|
|
|
|
return true;
|
|
|
|
|
} catch (error: any) {
|
|
|
|
|
logger.log('warn', `Script injection attempt failed: ${error.message}`);
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const success = await injectScript();
|
|
|
|
|
console.log("Script injection result:", success);
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
2024-12-14 22:30:50 +05:30
|
|
|
private getUserAgent() {
|
|
|
|
|
const userAgents = [
|
|
|
|
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.5845.140 Safari/537.36',
|
|
|
|
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:117.0) Gecko/20100101 Firefox/117.0',
|
|
|
|
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.1938.81 Safari/537.36 Edg/116.0.1938.81',
|
|
|
|
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.5845.96 Safari/537.36 OPR/101.0.4843.25',
|
|
|
|
|
'Mozilla/5.0 (Windows NT 11.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.5938.62 Safari/537.36',
|
|
|
|
|
'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:118.0) Gecko/20100101 Firefox/118.0',
|
|
|
|
|
];
|
2025-01-06 19:34:13 +05:30
|
|
|
|
2024-12-14 22:30:50 +05:30
|
|
|
return userAgents[Math.floor(Math.random() * userAgents.length)];
|
|
|
|
|
}
|
|
|
|
|
|
2024-06-01 10:56:25 +05:30
|
|
|
/**
|
|
|
|
|
* An asynchronous constructor for asynchronously initialized properties.
|
|
|
|
|
* Must be called right after creating an instance of RemoteBrowser class.
|
|
|
|
|
* @param options remote browser options to be used when launching the browser
|
|
|
|
|
* @returns {Promise<void>}
|
|
|
|
|
*/
|
2024-11-03 02:59:30 +05:30
|
|
|
public initialize = async (userId: string): Promise<void> => {
|
2025-03-24 16:19:45 +05:30
|
|
|
const MAX_RETRIES = 3;
|
|
|
|
|
let retryCount = 0;
|
|
|
|
|
let success = false;
|
|
|
|
|
|
|
|
|
|
while (!success && retryCount < MAX_RETRIES) {
|
|
|
|
|
try {
|
|
|
|
|
this.browser = <Browser>(await chromium.launch({
|
|
|
|
|
headless: true,
|
|
|
|
|
args: [
|
|
|
|
|
"--disable-blink-features=AutomationControlled",
|
|
|
|
|
"--disable-web-security",
|
|
|
|
|
"--disable-features=IsolateOrigins,site-per-process",
|
|
|
|
|
"--disable-site-isolation-trials",
|
|
|
|
|
"--disable-extensions",
|
|
|
|
|
"--no-sandbox",
|
|
|
|
|
"--disable-dev-shm-usage",
|
|
|
|
|
"--force-color-profile=srgb",
|
|
|
|
|
"--force-device-scale-factor=2",
|
2025-05-09 17:40:05 +05:30
|
|
|
"--ignore-certificate-errors"
|
2025-03-24 16:19:45 +05:30
|
|
|
],
|
|
|
|
|
}));
|
|
|
|
|
|
|
|
|
|
if (!this.browser || this.browser.isConnected() === false) {
|
|
|
|
|
throw new Error('Browser failed to launch or is not connected');
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const proxyConfig = await getDecryptedProxyConfig(userId);
|
|
|
|
|
let proxyOptions: { server: string, username?: string, password?: string } = { server: '' };
|
|
|
|
|
|
|
|
|
|
if (proxyConfig.proxy_url) {
|
|
|
|
|
proxyOptions = {
|
|
|
|
|
server: proxyConfig.proxy_url,
|
|
|
|
|
...(proxyConfig.proxy_username && proxyConfig.proxy_password && {
|
|
|
|
|
username: proxyConfig.proxy_username,
|
|
|
|
|
password: proxyConfig.proxy_password,
|
|
|
|
|
}),
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const contextOptions: any = {
|
|
|
|
|
// viewport: { height: 400, width: 900 },
|
|
|
|
|
// recordVideo: { dir: 'videos/' }
|
|
|
|
|
// Force reduced motion to prevent animation issues
|
|
|
|
|
reducedMotion: 'reduce',
|
|
|
|
|
// Force JavaScript to be enabled
|
|
|
|
|
javaScriptEnabled: true,
|
|
|
|
|
// Set a reasonable timeout
|
|
|
|
|
timeout: 50000,
|
|
|
|
|
// Disable hardware acceleration
|
|
|
|
|
forcedColors: 'none',
|
|
|
|
|
isMobile: false,
|
|
|
|
|
hasTouch: false,
|
|
|
|
|
userAgent: this.getUserAgent(),
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
if (proxyOptions.server) {
|
|
|
|
|
contextOptions.proxy = {
|
|
|
|
|
server: proxyOptions.server,
|
|
|
|
|
username: proxyOptions.username ? proxyOptions.username : undefined,
|
|
|
|
|
password: proxyOptions.password ? proxyOptions.password : undefined,
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
await new Promise(resolve => setTimeout(resolve, 500));
|
|
|
|
|
|
|
|
|
|
const contextPromise = this.browser.newContext(contextOptions);
|
|
|
|
|
this.context = await Promise.race([
|
|
|
|
|
contextPromise,
|
|
|
|
|
new Promise<never>((_, reject) => {
|
|
|
|
|
setTimeout(() => reject(new Error('Context creation timed out after 15s')), 15000);
|
|
|
|
|
})
|
|
|
|
|
]) as BrowserContext;
|
|
|
|
|
|
|
|
|
|
await this.context.addInitScript(
|
|
|
|
|
`const defaultGetter = Object.getOwnPropertyDescriptor(
|
|
|
|
|
Navigator.prototype,
|
|
|
|
|
"webdriver"
|
|
|
|
|
).get;
|
|
|
|
|
defaultGetter.apply(navigator);
|
|
|
|
|
defaultGetter.toString();
|
|
|
|
|
Object.defineProperty(Navigator.prototype, "webdriver", {
|
|
|
|
|
set: undefined,
|
|
|
|
|
enumerable: true,
|
|
|
|
|
configurable: true,
|
|
|
|
|
get: new Proxy(defaultGetter, {
|
|
|
|
|
apply: (target, thisArg, args) => {
|
|
|
|
|
Reflect.apply(target, thisArg, args);
|
|
|
|
|
return false;
|
|
|
|
|
},
|
|
|
|
|
}),
|
|
|
|
|
});
|
|
|
|
|
const patchedGetter = Object.getOwnPropertyDescriptor(
|
|
|
|
|
Navigator.prototype,
|
|
|
|
|
"webdriver"
|
|
|
|
|
).get;
|
|
|
|
|
patchedGetter.apply(navigator);
|
|
|
|
|
patchedGetter.toString();`
|
|
|
|
|
);
|
2025-06-23 13:21:46 +05:30
|
|
|
|
2025-06-30 16:29:07 +05:30
|
|
|
await this.context.addInitScript({ path: './server/src/browser-management/classes/rrweb-bundle.js' });
|
2025-06-23 13:21:46 +05:30
|
|
|
|
2025-06-30 15:09:24 +05:30
|
|
|
this.currentPage = await this.context.newPage();
|
|
|
|
|
|
2025-03-24 16:19:45 +05:30
|
|
|
await this.setupPageEventListeners(this.currentPage);
|
|
|
|
|
|
|
|
|
|
const viewportSize = await this.currentPage.viewportSize();
|
|
|
|
|
if (viewportSize) {
|
|
|
|
|
this.socket.emit('viewportInfo', {
|
|
|
|
|
width: viewportSize.width,
|
|
|
|
|
height: viewportSize.height,
|
|
|
|
|
userId: this.userId
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
const blocker = await PlaywrightBlocker.fromLists(fetch, ['https://easylist.to/easylist/easylist.txt']);
|
|
|
|
|
await blocker.enableBlockingInPage(this.currentPage);
|
|
|
|
|
this.client = await this.currentPage.context().newCDPSession(this.currentPage);
|
|
|
|
|
await blocker.disableBlockingInPage(this.currentPage);
|
|
|
|
|
console.log('Adblocker initialized');
|
|
|
|
|
} catch (error: any) {
|
|
|
|
|
console.warn('Failed to initialize adblocker, continuing without it:', error.message);
|
|
|
|
|
// Still need to set up the CDP session even if blocker fails
|
|
|
|
|
this.client = await this.currentPage.context().newCDPSession(this.currentPage);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
success = true;
|
|
|
|
|
logger.log('debug', `Browser initialized successfully for user ${userId}`);
|
|
|
|
|
} catch (error: any) {
|
|
|
|
|
retryCount++;
|
|
|
|
|
logger.log('error', `Browser initialization failed (attempt ${retryCount}/${MAX_RETRIES}): ${error.message}`);
|
|
|
|
|
|
|
|
|
|
if (this.browser) {
|
|
|
|
|
try {
|
|
|
|
|
await this.browser.close();
|
|
|
|
|
} catch (closeError) {
|
|
|
|
|
logger.log('warn', `Failed to close browser during cleanup: ${closeError}`);
|
|
|
|
|
}
|
|
|
|
|
this.browser = null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (retryCount >= MAX_RETRIES) {
|
|
|
|
|
throw new Error(`Failed to initialize browser after ${MAX_RETRIES} attempts: ${error.message}`);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
await new Promise(resolve => setTimeout(resolve, 1000));
|
|
|
|
|
}
|
2025-01-06 19:34:13 +05:30
|
|
|
}
|
2025-03-23 16:24:41 +05:30
|
|
|
|
2025-06-12 14:30:43 +05:30
|
|
|
this.setupMemoryCleanup();
|
2025-03-29 20:04:03 +05:30
|
|
|
// this.initializeMemoryManagement();
|
2024-06-01 10:56:25 +05:30
|
|
|
};
|
2024-06-01 10:57:32 +05:30
|
|
|
|
2025-03-14 12:40:28 +05:30
|
|
|
public updateViewportInfo = async (): Promise<void> => {
|
|
|
|
|
if (this.currentPage) {
|
|
|
|
|
const viewportSize = await this.currentPage.viewportSize();
|
|
|
|
|
if (viewportSize) {
|
|
|
|
|
this.socket.emit('viewportInfo', {
|
|
|
|
|
width: viewportSize.width,
|
|
|
|
|
height: viewportSize.height,
|
|
|
|
|
userId: this.userId
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
2025-05-07 09:11:59 +05:30
|
|
|
/**
|
|
|
|
|
* Extract data from a list of elements on a page
|
|
|
|
|
* @param page - Playwright Page object
|
|
|
|
|
* @param listSelector - CSS selector for the list container
|
|
|
|
|
* @param fields - Record of field configurations
|
|
|
|
|
* @param limit - Maximum number of items to extract (default: 5)
|
|
|
|
|
* @returns Promise<Array<Record<string, string>>> - Array of extracted data objects
|
|
|
|
|
*/
|
|
|
|
|
private async extractListData(
|
|
|
|
|
page: Page,
|
|
|
|
|
listSelector: string,
|
|
|
|
|
fields: Record<string, {
|
|
|
|
|
label: string;
|
|
|
|
|
selectorObj: {
|
|
|
|
|
selector: string;
|
|
|
|
|
attribute: string;
|
|
|
|
|
};
|
|
|
|
|
}>,
|
|
|
|
|
limit: number = 5
|
|
|
|
|
): Promise<Array<Record<string, string>>> {
|
2025-06-30 16:19:46 +05:30
|
|
|
if (page.isClosed()) {
|
|
|
|
|
logger.warn("Page is closed, cannot extract list data");
|
|
|
|
|
return [];
|
|
|
|
|
}
|
|
|
|
|
|
2025-05-07 09:11:59 +05:30
|
|
|
return await page.evaluate(
|
|
|
|
|
async ({ listSelector, fields, limit }: {
|
|
|
|
|
listSelector: string;
|
|
|
|
|
fields: Record<string, {
|
|
|
|
|
label: string;
|
|
|
|
|
selectorObj: {
|
|
|
|
|
selector: string;
|
|
|
|
|
attribute: string;
|
|
|
|
|
};
|
|
|
|
|
}>;
|
|
|
|
|
limit: number;
|
|
|
|
|
}) => {
|
|
|
|
|
const convertedFields: Record<string, {
|
|
|
|
|
selector: string;
|
|
|
|
|
attribute: string;
|
|
|
|
|
}> = {};
|
|
|
|
|
|
|
|
|
|
for (const [key, field] of Object.entries(fields)) {
|
|
|
|
|
convertedFields[field.label] = {
|
|
|
|
|
selector: field.selectorObj.selector,
|
|
|
|
|
attribute: field.selectorObj.attribute
|
|
|
|
|
};
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const queryElement = (rootElement: Element | Document, selector: string): Element | null => {
|
|
|
|
|
if (!selector.includes('>>') && !selector.includes(':>>')) {
|
|
|
|
|
return rootElement.querySelector(selector);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const parts = selector.split(/(?:>>|:>>)/).map(part => part.trim());
|
|
|
|
|
let currentElement: Element | Document | null = rootElement;
|
|
|
|
|
|
|
|
|
|
for (let i = 0; i < parts.length; i++) {
|
|
|
|
|
if (!currentElement) return null;
|
|
|
|
|
|
|
|
|
|
if ((currentElement as Element).tagName === 'IFRAME' || (currentElement as Element).tagName === 'FRAME') {
|
|
|
|
|
try {
|
|
|
|
|
const frameElement = currentElement as HTMLIFrameElement | HTMLFrameElement;
|
|
|
|
|
const frameDoc = frameElement.contentDocument || frameElement.contentWindow?.document;
|
|
|
|
|
if (!frameDoc) return null;
|
|
|
|
|
currentElement = frameDoc.querySelector(parts[i]);
|
|
|
|
|
continue;
|
|
|
|
|
} catch (e) {
|
|
|
|
|
console.warn(`Cannot access ${(currentElement as Element).tagName.toLowerCase()} content:`, e);
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let nextElement: Element | null = null;
|
|
|
|
|
|
|
|
|
|
if ('querySelector' in currentElement) {
|
|
|
|
|
nextElement = currentElement.querySelector(parts[i]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!nextElement && 'shadowRoot' in currentElement && (currentElement as Element).shadowRoot) {
|
|
|
|
|
nextElement = (currentElement as Element).shadowRoot!.querySelector(parts[i]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!nextElement && 'children' in currentElement) {
|
|
|
|
|
const children: any = Array.from((currentElement as Element).children || []);
|
|
|
|
|
for (const child of children) {
|
|
|
|
|
if (child.shadowRoot) {
|
|
|
|
|
nextElement = child.shadowRoot.querySelector(parts[i]);
|
|
|
|
|
if (nextElement) break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
currentElement = nextElement;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return currentElement as Element | null;
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const queryElementAll = (rootElement: Element | Document, selector: string): Element[] => {
|
|
|
|
|
if (!selector.includes('>>') && !selector.includes(':>>')) {
|
|
|
|
|
return Array.from(rootElement.querySelectorAll(selector));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const parts = selector.split(/(?:>>|:>>)/).map(part => part.trim());
|
|
|
|
|
let currentElements: (Element | Document)[] = [rootElement];
|
|
|
|
|
|
|
|
|
|
for (const part of parts) {
|
|
|
|
|
const nextElements: Element[] = [];
|
|
|
|
|
|
|
|
|
|
for (const element of currentElements) {
|
|
|
|
|
if ((element as Element).tagName === 'IFRAME' || (element as Element).tagName === 'FRAME') {
|
|
|
|
|
try {
|
|
|
|
|
const frameElement = element as HTMLIFrameElement | HTMLFrameElement;
|
|
|
|
|
const frameDoc = frameElement.contentDocument || frameElement.contentWindow?.document;
|
|
|
|
|
if (frameDoc) {
|
|
|
|
|
nextElements.push(...Array.from(frameDoc.querySelectorAll(part)));
|
|
|
|
|
}
|
|
|
|
|
} catch (e) {
|
|
|
|
|
console.warn(`Cannot access ${(element as Element).tagName.toLowerCase()} content:`, e);
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
if ('querySelectorAll' in element) {
|
|
|
|
|
nextElements.push(...Array.from(element.querySelectorAll(part)));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ('shadowRoot' in element && (element as Element).shadowRoot) {
|
|
|
|
|
nextElements.push(...Array.from((element as Element).shadowRoot!.querySelectorAll(part)));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ('children' in element) {
|
|
|
|
|
const children = Array.from((element as Element).children || []);
|
|
|
|
|
for (const child of children) {
|
|
|
|
|
if (child.shadowRoot) {
|
|
|
|
|
nextElements.push(...Array.from(child.shadowRoot.querySelectorAll(part)));
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
currentElements = nextElements;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return currentElements as Element[];
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
function extractValue(element: Element, attribute: string): string | null {
|
|
|
|
|
if (!element) return null;
|
|
|
|
|
|
|
|
|
|
const baseURL = element.ownerDocument?.location?.href || window.location.origin;
|
|
|
|
|
|
|
|
|
|
if (element.shadowRoot) {
|
|
|
|
|
const shadowContent = element.shadowRoot.textContent;
|
|
|
|
|
if (shadowContent?.trim()) {
|
|
|
|
|
return shadowContent.trim();
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (attribute === 'innerText') {
|
|
|
|
|
return (element as HTMLElement).innerText.trim();
|
|
|
|
|
} else if (attribute === 'innerHTML') {
|
|
|
|
|
return element.innerHTML.trim();
|
|
|
|
|
} else if (attribute === 'src' || attribute === 'href') {
|
|
|
|
|
if (attribute === 'href' && element.tagName !== 'A') {
|
|
|
|
|
const parentElement = element.parentElement;
|
|
|
|
|
if (parentElement && parentElement.tagName === 'A') {
|
|
|
|
|
const parentHref = parentElement.getAttribute('href');
|
|
|
|
|
if (parentHref) {
|
|
|
|
|
try {
|
|
|
|
|
return new URL(parentHref, baseURL).href;
|
|
|
|
|
} catch (e) {
|
|
|
|
|
return parentHref;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const attrValue = element.getAttribute(attribute);
|
|
|
|
|
const dataAttr = attrValue || element.getAttribute('data-' + attribute);
|
|
|
|
|
|
|
|
|
|
if (!dataAttr || dataAttr.trim() === '') {
|
|
|
|
|
if (attribute === 'src') {
|
|
|
|
|
const style = window.getComputedStyle(element);
|
|
|
|
|
const bgImage = style.backgroundImage;
|
|
|
|
|
if (bgImage && bgImage !== 'none') {
|
|
|
|
|
const matches = bgImage.match(/url\(['"]?([^'")]+)['"]?\)/);
|
|
|
|
|
return matches ? new URL(matches[1], baseURL).href : null;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
return new URL(dataAttr, baseURL).href;
|
|
|
|
|
} catch (e) {
|
|
|
|
|
console.warn('Error creating URL from', dataAttr, e);
|
|
|
|
|
return dataAttr; // Return the original value if URL construction fails
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return element.getAttribute(attribute);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function findTableAncestor(element: Element): { type: string; element: Element } | null {
|
|
|
|
|
let currentElement: Element | null = element;
|
|
|
|
|
const MAX_DEPTH = 5;
|
|
|
|
|
let depth = 0;
|
|
|
|
|
|
|
|
|
|
while (currentElement && depth < MAX_DEPTH) {
|
|
|
|
|
if (currentElement.getRootNode() instanceof ShadowRoot) {
|
|
|
|
|
currentElement = (currentElement.getRootNode() as ShadowRoot).host;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (currentElement.tagName === 'TD') {
|
|
|
|
|
return { type: 'TD', element: currentElement };
|
|
|
|
|
} else if (currentElement.tagName === 'TR') {
|
|
|
|
|
return { type: 'TR', element: currentElement };
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (currentElement.tagName === 'IFRAME' || currentElement.tagName === 'FRAME') {
|
|
|
|
|
try {
|
|
|
|
|
const frameElement = currentElement as HTMLIFrameElement | HTMLFrameElement;
|
|
|
|
|
currentElement = frameElement.contentDocument?.body || null;
|
|
|
|
|
} catch (e) {
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
currentElement = currentElement.parentElement;
|
|
|
|
|
}
|
|
|
|
|
depth++;
|
|
|
|
|
}
|
|
|
|
|
return null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function getCellIndex(td: Element): number {
|
|
|
|
|
if (td.getRootNode() instanceof ShadowRoot) {
|
|
|
|
|
const shadowRoot = td.getRootNode() as ShadowRoot;
|
|
|
|
|
const allCells = Array.from(shadowRoot.querySelectorAll('td'));
|
|
|
|
|
return allCells.indexOf(td as HTMLTableCellElement);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let index = 0;
|
|
|
|
|
let sibling = td;
|
|
|
|
|
while (sibling = sibling.previousElementSibling as Element) {
|
|
|
|
|
index++;
|
|
|
|
|
}
|
|
|
|
|
return index;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function hasThElement(row: Element, tableFields: Record<string, { selector: string; attribute: string }>): boolean {
|
|
|
|
|
for (const [_, { selector }] of Object.entries(tableFields)) {
|
|
|
|
|
const element = queryElement(row, selector);
|
|
|
|
|
if (element) {
|
|
|
|
|
let current: Element | ShadowRoot | Document | null = element;
|
|
|
|
|
while (current && current !== row) {
|
|
|
|
|
if (current.getRootNode() instanceof ShadowRoot) {
|
|
|
|
|
current = (current.getRootNode() as ShadowRoot).host;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ((current as Element).tagName === 'TH') return true;
|
|
|
|
|
|
|
|
|
|
if ((current as Element).tagName === 'IFRAME' || (current as Element).tagName === 'FRAME') {
|
|
|
|
|
try {
|
|
|
|
|
const frameElement = current as HTMLIFrameElement | HTMLFrameElement;
|
|
|
|
|
current = frameElement.contentDocument?.body || null;
|
|
|
|
|
} catch (e) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
current = (current as Element).parentElement;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return false;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function filterRowsBasedOnTag(rows: Element[], tableFields: Record<string, { selector: string; attribute: string }>): Element[] {
|
|
|
|
|
for (const row of rows) {
|
|
|
|
|
if (hasThElement(row, tableFields)) {
|
|
|
|
|
return rows;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
return rows.filter(row => {
|
|
|
|
|
const directTH = row.getElementsByTagName('TH').length === 0;
|
|
|
|
|
const shadowTH = row.shadowRoot ?
|
|
|
|
|
row.shadowRoot.querySelector('th') === null : true;
|
|
|
|
|
return directTH && shadowTH;
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function calculateClassSimilarity(classList1: string[], classList2: string[]): number {
|
|
|
|
|
const set1 = new Set(classList1);
|
|
|
|
|
const set2 = new Set(classList2);
|
|
|
|
|
const intersection = new Set([...set1].filter(x => set2.has(x)));
|
|
|
|
|
const union = new Set([...set1, ...set2]);
|
|
|
|
|
return intersection.size / union.size;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
function findSimilarElements(baseElement: Element, similarityThreshold: number = 0.7): Element[] {
|
|
|
|
|
const baseClasses = Array.from(baseElement.classList);
|
|
|
|
|
if (baseClasses.length === 0) return [];
|
|
|
|
|
|
|
|
|
|
const allElements: Element[] = [];
|
|
|
|
|
|
|
|
|
|
allElements.push(...Array.from(document.getElementsByTagName(baseElement.tagName)));
|
|
|
|
|
|
|
|
|
|
if (baseElement.getRootNode() instanceof ShadowRoot) {
|
|
|
|
|
const shadowHost = (baseElement.getRootNode() as ShadowRoot).host;
|
|
|
|
|
allElements.push(...Array.from(shadowHost.getElementsByTagName(baseElement.tagName)));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const frames = [
|
|
|
|
|
...Array.from(document.getElementsByTagName('iframe')),
|
|
|
|
|
...Array.from(document.getElementsByTagName('frame'))
|
|
|
|
|
];
|
|
|
|
|
|
|
|
|
|
for (const frame of frames) {
|
|
|
|
|
try {
|
|
|
|
|
const frameElement = frame as HTMLIFrameElement | HTMLFrameElement;
|
|
|
|
|
const frameDoc = frameElement.contentDocument || frameElement.contentWindow?.document;
|
|
|
|
|
if (frameDoc) {
|
|
|
|
|
allElements.push(...Array.from(frameDoc.getElementsByTagName(baseElement.tagName)));
|
|
|
|
|
}
|
|
|
|
|
} catch (e) {
|
|
|
|
|
console.warn(`Cannot access ${frame.tagName.toLowerCase()} content:`, e);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
return allElements.filter(element => {
|
|
|
|
|
if (element === baseElement) return false;
|
|
|
|
|
const similarity = calculateClassSimilarity(
|
|
|
|
|
baseClasses,
|
|
|
|
|
Array.from(element.classList)
|
|
|
|
|
);
|
|
|
|
|
return similarity >= similarityThreshold;
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
let containers = queryElementAll(document, listSelector);
|
|
|
|
|
|
|
|
|
|
if (containers.length === 0) return [];
|
|
|
|
|
|
|
|
|
|
if (limit > 1 && containers.length === 1) {
|
|
|
|
|
const baseContainer = containers[0];
|
|
|
|
|
const similarContainers = findSimilarElements(baseContainer);
|
|
|
|
|
|
|
|
|
|
if (similarContainers.length > 0) {
|
|
|
|
|
const newContainers = similarContainers.filter(container =>
|
|
|
|
|
!container.matches(listSelector)
|
|
|
|
|
);
|
|
|
|
|
containers = [...containers, ...newContainers];
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const containerFields = containers.map(() => ({
|
|
|
|
|
tableFields: {} as Record<string, {
|
|
|
|
|
selector: string;
|
|
|
|
|
attribute: string;
|
|
|
|
|
tableContext?: string;
|
|
|
|
|
cellIndex?: number;
|
|
|
|
|
}>,
|
|
|
|
|
nonTableFields: {} as Record<string, {
|
|
|
|
|
selector: string;
|
|
|
|
|
attribute: string;
|
|
|
|
|
}>
|
|
|
|
|
}));
|
|
|
|
|
|
|
|
|
|
containers.forEach((container, containerIndex) => {
|
|
|
|
|
for (const [label, field] of Object.entries(convertedFields)) {
|
|
|
|
|
const sampleElement = queryElement(container, field.selector);
|
|
|
|
|
|
|
|
|
|
if (sampleElement) {
|
|
|
|
|
const ancestor = findTableAncestor(sampleElement);
|
|
|
|
|
if (ancestor) {
|
|
|
|
|
containerFields[containerIndex].tableFields[label] = {
|
|
|
|
|
...field,
|
|
|
|
|
tableContext: ancestor.type,
|
|
|
|
|
cellIndex: ancestor.type === 'TD' ? getCellIndex(ancestor.element) : -1
|
|
|
|
|
};
|
|
|
|
|
} else {
|
|
|
|
|
containerFields[containerIndex].nonTableFields[label] = field;
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
containerFields[containerIndex].nonTableFields[label] = field;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
const tableData: Array<Record<string, string>> = [];
|
|
|
|
|
const nonTableData: Array<Record<string, string>> = [];
|
|
|
|
|
|
|
|
|
|
for (let containerIndex = 0; containerIndex < containers.length; containerIndex++) {
|
|
|
|
|
const container = containers[containerIndex];
|
|
|
|
|
const { tableFields } = containerFields[containerIndex];
|
|
|
|
|
|
|
|
|
|
if (Object.keys(tableFields).length > 0) {
|
|
|
|
|
const firstField = Object.values(tableFields)[0];
|
|
|
|
|
const firstElement = queryElement(container, firstField.selector);
|
|
|
|
|
let tableContext: Element | null = firstElement;
|
|
|
|
|
|
|
|
|
|
while (tableContext && tableContext.tagName !== 'TABLE' && tableContext !== container) {
|
|
|
|
|
if (tableContext.getRootNode() instanceof ShadowRoot) {
|
|
|
|
|
tableContext = (tableContext.getRootNode() as ShadowRoot).host;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (tableContext.tagName === 'IFRAME' || tableContext.tagName === 'FRAME') {
|
|
|
|
|
try {
|
|
|
|
|
const frameElement = tableContext as HTMLIFrameElement | HTMLFrameElement;
|
|
|
|
|
tableContext = frameElement.contentDocument?.body || null;
|
|
|
|
|
} catch (e) {
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
tableContext = tableContext.parentElement;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (tableContext) {
|
|
|
|
|
const rows: Element[] = [];
|
|
|
|
|
|
|
|
|
|
rows.push(...Array.from(tableContext.getElementsByTagName('TR')));
|
|
|
|
|
|
|
|
|
|
if (tableContext.tagName === 'IFRAME' || tableContext.tagName === 'FRAME') {
|
|
|
|
|
try {
|
|
|
|
|
const frameElement = tableContext as HTMLIFrameElement | HTMLFrameElement;
|
|
|
|
|
const frameDoc = frameElement.contentDocument || frameElement.contentWindow?.document;
|
|
|
|
|
if (frameDoc) {
|
|
|
|
|
rows.push(...Array.from(frameDoc.getElementsByTagName('TR')));
|
|
|
|
|
}
|
|
|
|
|
} catch (e) {
|
|
|
|
|
console.warn(`Cannot access ${tableContext.tagName.toLowerCase()} rows:`, e);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const processedRows = filterRowsBasedOnTag(rows, tableFields);
|
|
|
|
|
|
|
|
|
|
for (let rowIndex = 0; rowIndex < Math.min(processedRows.length, limit); rowIndex++) {
|
|
|
|
|
const record: Record<string, string> = {};
|
|
|
|
|
const currentRow = processedRows[rowIndex];
|
|
|
|
|
|
|
|
|
|
for (const [label, { selector, attribute, cellIndex }] of Object.entries(tableFields)) {
|
|
|
|
|
let element: Element | null = null;
|
|
|
|
|
|
|
|
|
|
if (cellIndex !== undefined && cellIndex >= 0) {
|
|
|
|
|
let td: Element | null = currentRow.children[cellIndex] || null;
|
|
|
|
|
|
|
|
|
|
if (!td && currentRow.shadowRoot) {
|
|
|
|
|
const shadowCells = currentRow.shadowRoot.children;
|
|
|
|
|
if (shadowCells && shadowCells.length > cellIndex) {
|
|
|
|
|
td = shadowCells[cellIndex];
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (td) {
|
|
|
|
|
element = queryElement(td, selector);
|
|
|
|
|
|
|
|
|
|
if (!element && selector.split(/(?:>>|:>>)/).pop()?.includes('td:nth-child')) {
|
|
|
|
|
element = td;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!element) {
|
|
|
|
|
const tagOnlySelector = selector.split('.')[0];
|
|
|
|
|
element = queryElement(td, tagOnlySelector);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (!element) {
|
|
|
|
|
let currentElement: Element | null = td;
|
|
|
|
|
while (currentElement && currentElement.children.length > 0) {
|
|
|
|
|
let foundContentChild = false;
|
|
|
|
|
for (const child of Array.from(currentElement.children)) {
|
|
|
|
|
if (extractValue(child, attribute)) {
|
|
|
|
|
currentElement = child;
|
|
|
|
|
foundContentChild = true;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (!foundContentChild) break;
|
|
|
|
|
}
|
|
|
|
|
element = currentElement;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
element = queryElement(currentRow, selector);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (element) {
|
|
|
|
|
const value = extractValue(element, attribute);
|
|
|
|
|
if (value !== null) {
|
|
|
|
|
record[label] = value;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (Object.keys(record).length > 0) {
|
|
|
|
|
tableData.push(record);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (let containerIndex = 0; containerIndex < containers.length; containerIndex++) {
|
|
|
|
|
if (nonTableData.length >= limit) break;
|
|
|
|
|
|
|
|
|
|
const container = containers[containerIndex];
|
|
|
|
|
const { nonTableFields } = containerFields[containerIndex];
|
|
|
|
|
|
|
|
|
|
if (Object.keys(nonTableFields).length > 0) {
|
|
|
|
|
const record: Record<string, string> = {};
|
|
|
|
|
|
|
|
|
|
for (const [label, { selector, attribute }] of Object.entries(nonTableFields)) {
|
|
|
|
|
const relativeSelector = selector.split(/(?:>>|:>>)/).slice(-1)[0];
|
|
|
|
|
const element = queryElement(container, relativeSelector);
|
|
|
|
|
|
|
|
|
|
if (element) {
|
|
|
|
|
const value = extractValue(element, attribute);
|
|
|
|
|
if (value !== null) {
|
|
|
|
|
record[label] = value;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (Object.keys(record).length > 0) {
|
|
|
|
|
nonTableData.push(record);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const scrapedData = [...tableData, ...nonTableData].slice(0, limit);
|
|
|
|
|
return scrapedData;
|
|
|
|
|
},
|
|
|
|
|
{ listSelector, fields, limit }
|
|
|
|
|
) as Array<Record<string, string>>;
|
|
|
|
|
}
|
|
|
|
|
|
2025-06-25 13:09:50 +05:30
|
|
|
/**
|
|
|
|
|
* Captures a screenshot directly without running the workflow interpreter
|
|
|
|
|
* @param settings Screenshot settings containing fullPage, type, etc.
|
|
|
|
|
* @returns Promise<void>
|
|
|
|
|
*/
|
|
|
|
|
public captureDirectScreenshot = async (settings: {
|
|
|
|
|
fullPage: boolean;
|
|
|
|
|
type: 'png' | 'jpeg';
|
|
|
|
|
timeout?: number;
|
|
|
|
|
animations?: 'disabled' | 'allow';
|
|
|
|
|
caret?: 'hide' | 'initial';
|
|
|
|
|
scale?: 'css' | 'device';
|
|
|
|
|
}): Promise<void> => {
|
|
|
|
|
if (!this.currentPage) {
|
|
|
|
|
logger.error("No current page available for screenshot");
|
|
|
|
|
this.socket.emit('screenshotError', {
|
|
|
|
|
userId: this.userId,
|
|
|
|
|
error: 'No active page available'
|
|
|
|
|
});
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
this.socket.emit('screenshotCaptureStarted', {
|
|
|
|
|
userId: this.userId,
|
|
|
|
|
fullPage: settings.fullPage
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
const screenshotBuffer = await this.currentPage.screenshot({
|
|
|
|
|
fullPage: settings.fullPage,
|
|
|
|
|
type: settings.type || 'png',
|
|
|
|
|
timeout: settings.timeout || 30000,
|
|
|
|
|
animations: settings.animations || 'allow',
|
|
|
|
|
caret: settings.caret || 'hide',
|
|
|
|
|
scale: settings.scale || 'device'
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
const base64Data = screenshotBuffer.toString('base64');
|
|
|
|
|
const mimeType = `image/${settings.type || 'png'}`;
|
|
|
|
|
const dataUrl = `data:${mimeType};base64,${base64Data}`;
|
|
|
|
|
|
|
|
|
|
this.socket.emit('directScreenshotCaptured', {
|
|
|
|
|
userId: this.userId,
|
|
|
|
|
screenshot: dataUrl,
|
|
|
|
|
mimeType: mimeType,
|
|
|
|
|
fullPage: settings.fullPage,
|
|
|
|
|
timestamp: Date.now()
|
|
|
|
|
});
|
|
|
|
|
} catch (error) {
|
|
|
|
|
logger.error('Failed to capture direct screenshot:', error);
|
|
|
|
|
this.socket.emit('screenshotError', {
|
|
|
|
|
userId: this.userId,
|
|
|
|
|
error: error instanceof Error ? error.message : 'Unknown error occurred'
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
2024-06-01 10:57:32 +05:30
|
|
|
/**
|
|
|
|
|
* Registers all event listeners needed for the recording editor session.
|
|
|
|
|
* Should be called only once after the full initialization of the remote browser.
|
|
|
|
|
* @returns void
|
|
|
|
|
*/
|
2024-06-01 11:05:45 +05:30
|
|
|
public registerEditorEvents = (): void => {
|
2025-03-07 22:46:28 +05:30
|
|
|
// For each event, include userId to make sure events are handled for the correct browser
|
|
|
|
|
logger.log('debug', `Registering editor events for user: ${this.userId}`);
|
2025-06-25 13:09:50 +05:30
|
|
|
|
|
|
|
|
this.socket.on(`captureDirectScreenshot:${this.userId}`, async (settings) => {
|
|
|
|
|
logger.debug(`Direct screenshot capture requested for user ${this.userId}`);
|
|
|
|
|
await this.captureDirectScreenshot(settings);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
// For backward compatibility
|
|
|
|
|
this.socket.on('captureDirectScreenshot', async (settings) => {
|
|
|
|
|
await this.captureDirectScreenshot(settings);
|
|
|
|
|
});
|
2025-03-07 22:46:28 +05:30
|
|
|
|
|
|
|
|
// Listen for specific events for this user
|
|
|
|
|
this.socket.on(`rerender:${this.userId}`, async () => {
|
|
|
|
|
logger.debug(`Rerender event received for user ${this.userId}`);
|
|
|
|
|
await this.makeAndEmitScreenshot();
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
// For backward compatibility, also listen to the general event
|
|
|
|
|
this.socket.on('rerender', async () => {
|
|
|
|
|
logger.debug(`General rerender event received, checking if for user ${this.userId}`);
|
|
|
|
|
await this.makeAndEmitScreenshot();
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
this.socket.on(`settings:${this.userId}`, (settings) => {
|
|
|
|
|
this.interpreterSettings = settings;
|
|
|
|
|
logger.debug(`Settings updated for user ${this.userId}`);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
this.socket.on(`changeTab:${this.userId}`, async (tabIndex) => {
|
|
|
|
|
logger.debug(`Tab change to ${tabIndex} requested for user ${this.userId}`);
|
|
|
|
|
await this.changeTab(tabIndex);
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
this.socket.on(`addTab:${this.userId}`, async () => {
|
|
|
|
|
logger.debug(`New tab requested for user ${this.userId}`);
|
2024-06-01 10:57:32 +05:30
|
|
|
await this.currentPage?.context().newPage();
|
|
|
|
|
const lastTabIndex = this.currentPage ? this.currentPage.context().pages().length - 1 : 0;
|
|
|
|
|
await this.changeTab(lastTabIndex);
|
|
|
|
|
});
|
2025-03-07 22:46:28 +05:30
|
|
|
|
|
|
|
|
this.socket.on(`closeTab:${this.userId}`, async (tabInfo) => {
|
|
|
|
|
logger.debug(`Close tab ${tabInfo.index} requested for user ${this.userId}`);
|
2024-06-01 10:57:32 +05:30
|
|
|
const page = this.currentPage?.context().pages()[tabInfo.index];
|
|
|
|
|
if (page) {
|
2024-06-01 11:05:45 +05:30
|
|
|
if (tabInfo.isCurrent) {
|
2024-06-01 10:57:32 +05:30
|
|
|
if (this.currentPage?.context().pages()[tabInfo.index + 1]) {
|
|
|
|
|
// next tab
|
|
|
|
|
await this.changeTab(tabInfo.index + 1);
|
|
|
|
|
} else {
|
|
|
|
|
//previous tab
|
|
|
|
|
await this.changeTab(tabInfo.index - 1);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
await page.close();
|
|
|
|
|
logger.log(
|
2024-06-01 11:05:45 +05:30
|
|
|
'debug',
|
2025-03-07 22:46:28 +05:30
|
|
|
`Tab ${tabInfo.index} was closed for user ${this.userId}, new tab count: ${this.currentPage?.context().pages().length}`
|
|
|
|
|
);
|
2024-06-01 10:57:32 +05:30
|
|
|
} else {
|
2025-03-07 22:46:28 +05:30
|
|
|
logger.log('error', `Tab index ${tabInfo.index} out of range for user ${this.userId}`);
|
2024-06-01 10:57:32 +05:30
|
|
|
}
|
|
|
|
|
});
|
2025-03-07 22:46:28 +05:30
|
|
|
|
|
|
|
|
this.socket.on(`setViewportSize:${this.userId}`, async (data: { width: number, height: number }) => {
|
2024-10-22 02:43:28 +05:30
|
|
|
const { width, height } = data;
|
2025-03-07 22:46:28 +05:30
|
|
|
logger.log('debug', `Viewport size change to width=${width}, height=${height} requested for user ${this.userId}`);
|
2024-10-27 18:05:22 +05:30
|
|
|
|
2024-10-22 02:43:28 +05:30
|
|
|
// Update the browser context's viewport dynamically
|
|
|
|
|
if (this.context && this.browser) {
|
|
|
|
|
this.context = await this.browser.newContext({ viewport: { width, height } });
|
2025-03-07 22:46:28 +05:30
|
|
|
logger.log('debug', `Viewport size updated to width=${width}, height=${height} for user ${this.userId}`);
|
2024-10-22 02:43:28 +05:30
|
|
|
}
|
|
|
|
|
});
|
2025-03-07 22:46:28 +05:30
|
|
|
|
|
|
|
|
// For backward compatibility, also register the standard events
|
|
|
|
|
this.socket.on('settings', (settings) => this.interpreterSettings = settings);
|
|
|
|
|
this.socket.on('changeTab', async (tabIndex) => await this.changeTab(tabIndex));
|
|
|
|
|
this.socket.on('addTab', async () => {
|
|
|
|
|
await this.currentPage?.context().newPage();
|
|
|
|
|
const lastTabIndex = this.currentPage ? this.currentPage.context().pages().length - 1 : 0;
|
|
|
|
|
await this.changeTab(lastTabIndex);
|
|
|
|
|
});
|
|
|
|
|
this.socket.on('closeTab', async (tabInfo) => {
|
|
|
|
|
const page = this.currentPage?.context().pages()[tabInfo.index];
|
|
|
|
|
if (page) {
|
|
|
|
|
if (tabInfo.isCurrent) {
|
|
|
|
|
if (this.currentPage?.context().pages()[tabInfo.index + 1]) {
|
|
|
|
|
await this.changeTab(tabInfo.index + 1);
|
|
|
|
|
} else {
|
|
|
|
|
await this.changeTab(tabInfo.index - 1);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
await page.close();
|
|
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
this.socket.on('setViewportSize', async (data: { width: number, height: number }) => {
|
|
|
|
|
const { width, height } = data;
|
|
|
|
|
if (this.context && this.browser) {
|
|
|
|
|
this.context = await this.browser.newContext({ viewport: { width, height } });
|
|
|
|
|
}
|
|
|
|
|
});
|
2025-05-07 09:11:59 +05:30
|
|
|
|
|
|
|
|
this.socket.on('extractListData', async (data: {
|
|
|
|
|
listSelector: string,
|
|
|
|
|
fields: Record<string, any>,
|
|
|
|
|
currentListId: number,
|
|
|
|
|
pagination: any
|
|
|
|
|
}) => {
|
|
|
|
|
if (this.currentPage) {
|
|
|
|
|
const extractedData = await this.extractListData(
|
|
|
|
|
this.currentPage,
|
|
|
|
|
data.listSelector,
|
|
|
|
|
data.fields
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
this.socket.emit('listDataExtracted', {
|
|
|
|
|
currentListId: data.currentListId,
|
|
|
|
|
data: extractedData
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
});
|
2025-03-07 22:46:28 +05:30
|
|
|
};
|
2024-06-01 10:58:23 +05:30
|
|
|
/**
|
|
|
|
|
* Subscribes the remote browser for a screencast session
|
|
|
|
|
* on [CDP](https://chromedevtools.github.io/devtools-protocol/) level,
|
|
|
|
|
* where screenshot is being sent through the socket
|
|
|
|
|
* every time the browser's active page updates.
|
|
|
|
|
* @returns {Promise<void>}
|
|
|
|
|
*/
|
2024-06-01 11:05:45 +05:30
|
|
|
public subscribeToScreencast = async (): Promise<void> => {
|
2025-03-07 22:46:28 +05:30
|
|
|
logger.log('debug', `Starting screencast for user: ${this.userId}`);
|
2024-06-01 10:58:23 +05:30
|
|
|
await this.startScreencast();
|
|
|
|
|
if (!this.client) {
|
2024-06-01 11:05:45 +05:30
|
|
|
logger.log('warn', 'client is not initialized');
|
2024-06-01 10:58:23 +05:30
|
|
|
return;
|
|
|
|
|
}
|
2025-03-07 22:46:28 +05:30
|
|
|
// Set flag to indicate screencast is active
|
|
|
|
|
this.isScreencastActive = true;
|
|
|
|
|
|
2025-03-14 12:40:28 +05:30
|
|
|
await this.updateViewportInfo();
|
|
|
|
|
|
2024-06-01 10:58:23 +05:30
|
|
|
this.client.on('Page.screencastFrame', ({ data: base64, sessionId }) => {
|
2025-03-07 22:46:28 +05:30
|
|
|
// Only process if screencast is still active for this user
|
|
|
|
|
if (!this.isScreencastActive) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
2025-01-06 11:55:45 +05:30
|
|
|
this.emitScreenshot(Buffer.from(base64, 'base64'))
|
2024-06-01 10:58:23 +05:30
|
|
|
setTimeout(async () => {
|
|
|
|
|
try {
|
2025-03-07 22:46:28 +05:30
|
|
|
if (!this.client || !this.isScreencastActive) {
|
2024-06-01 11:05:45 +05:30
|
|
|
logger.log('warn', 'client is not initialized');
|
2024-06-01 10:58:23 +05:30
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
await this.client.send('Page.screencastFrameAck', { sessionId: sessionId });
|
2024-11-29 22:10:48 +05:30
|
|
|
} catch (e: any) {
|
|
|
|
|
logger.log('error', `Screencast error: ${e}`);
|
2024-06-01 10:58:23 +05:30
|
|
|
}
|
|
|
|
|
}, 100);
|
|
|
|
|
});
|
|
|
|
|
};
|
2024-06-01 10:59:15 +05:30
|
|
|
|
2025-06-23 13:21:46 +05:30
|
|
|
/**
|
|
|
|
|
* Subscribe to DOM streaming - simplified version following screenshot pattern
|
|
|
|
|
*/
|
|
|
|
|
public async subscribeToDOM(): Promise<void> {
|
|
|
|
|
if (!this.client) {
|
|
|
|
|
logger.warn("DOM streaming requires scraping browser with CDP client");
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
// Enable required CDP domains
|
|
|
|
|
await this.client.send("DOM.enable");
|
|
|
|
|
await this.client.send("CSS.enable");
|
|
|
|
|
|
|
|
|
|
this.isDOMStreamingActive = true;
|
|
|
|
|
logger.info("DOM streaming started successfully");
|
|
|
|
|
|
|
|
|
|
// Initial DOM snapshot
|
|
|
|
|
await this.makeAndEmitDOMSnapshot();
|
|
|
|
|
|
|
|
|
|
this.setupScrollEventListener();
|
|
|
|
|
this.setupPageChangeListeners();
|
|
|
|
|
} catch (error) {
|
|
|
|
|
logger.error("Failed to start DOM streaming:", error);
|
|
|
|
|
this.isDOMStreamingActive = false;
|
|
|
|
|
}
|
|
|
|
|
}
|
2025-06-30 15:14:42 +05:30
|
|
|
|
2025-06-23 13:21:46 +05:30
|
|
|
/**
|
|
|
|
|
* CDP-based DOM snapshot creation using captured network resources
|
|
|
|
|
*/
|
|
|
|
|
public async makeAndEmitDOMSnapshot(): Promise<void> {
|
|
|
|
|
if (
|
|
|
|
|
!this.currentPage ||
|
|
|
|
|
!this.isDOMStreamingActive
|
|
|
|
|
) {
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
// Check if page is still valid and not closed
|
|
|
|
|
if (this.currentPage.isClosed()) {
|
|
|
|
|
logger.debug("Skipping DOM snapshot - page is closed");
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Double-check page state after network wait
|
|
|
|
|
if (this.currentPage.isClosed()) {
|
|
|
|
|
logger.debug("Skipping DOM snapshot - page closed during network wait");
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Get current scroll position
|
|
|
|
|
const currentScrollInfo = await this.currentPage.evaluate(() => ({
|
|
|
|
|
x: window.scrollX,
|
|
|
|
|
y: window.scrollY,
|
|
|
|
|
maxX: Math.max(
|
|
|
|
|
0,
|
|
|
|
|
document.documentElement.scrollWidth - window.innerWidth
|
|
|
|
|
),
|
|
|
|
|
maxY: Math.max(
|
|
|
|
|
0,
|
|
|
|
|
document.documentElement.scrollHeight - window.innerHeight
|
|
|
|
|
),
|
|
|
|
|
documentHeight: document.documentElement.scrollHeight,
|
|
|
|
|
}));
|
|
|
|
|
|
|
|
|
|
logger.info(
|
|
|
|
|
`Creating rrweb snapshot at scroll position: ${currentScrollInfo.y}/${currentScrollInfo.maxY}`
|
|
|
|
|
);
|
|
|
|
|
|
|
|
|
|
// Update our tracked scroll position
|
|
|
|
|
this.lastScrollPosition = {
|
|
|
|
|
x: currentScrollInfo.x,
|
|
|
|
|
y: currentScrollInfo.y,
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Final check before snapshot
|
|
|
|
|
if (this.currentPage.isClosed()) {
|
|
|
|
|
logger.debug("Skipping DOM snapshot - page closed before snapshot");
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// Capture snapshot using rrweb
|
|
|
|
|
const rawSnapshot = await this.currentPage.evaluate(() => {
|
|
|
|
|
if (typeof window.rrwebSnapshot === "undefined") {
|
|
|
|
|
throw new Error("rrweb-snapshot library not available");
|
|
|
|
|
}
|
2025-06-30 15:09:24 +05:30
|
|
|
return window.rrwebSnapshot.snapshot(document, {
|
|
|
|
|
inlineImages: true,
|
|
|
|
|
collectFonts: true,
|
|
|
|
|
});
|
2025-06-23 13:21:46 +05:30
|
|
|
});
|
|
|
|
|
|
|
|
|
|
// Process the snapshot to proxy resources
|
|
|
|
|
const processedSnapshot = await this.processRRWebSnapshot(rawSnapshot);
|
|
|
|
|
|
|
|
|
|
// Add scroll position information
|
|
|
|
|
const enhancedSnapshot = {
|
|
|
|
|
...processedSnapshot,
|
|
|
|
|
scrollPosition: currentScrollInfo,
|
|
|
|
|
captureTime: Date.now(),
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
// Emit the processed snapshot
|
|
|
|
|
this.emitRRWebSnapshot(enhancedSnapshot);
|
|
|
|
|
} catch (error) {
|
|
|
|
|
// Handle navigation context destruction gracefully
|
|
|
|
|
if (error instanceof Error &&
|
|
|
|
|
(error.message.includes("Execution context was destroyed") ||
|
|
|
|
|
error.message.includes("most likely because of a navigation") ||
|
|
|
|
|
error.message.includes("Target closed"))) {
|
|
|
|
|
logger.debug("DOM snapshot skipped due to page navigation or closure");
|
|
|
|
|
return; // Don't emit error for navigation - this is expected
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
logger.error("Failed to create rrweb snapshot:", error);
|
|
|
|
|
this.socket.emit("dom-mode-error", {
|
|
|
|
|
userId: this.userId,
|
|
|
|
|
message: "Failed to create rrweb snapshot",
|
|
|
|
|
error: error instanceof Error ? error.message : String(error),
|
|
|
|
|
timestamp: Date.now(),
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Emit DOM snapshot to client - following screenshot pattern
|
|
|
|
|
*/
|
|
|
|
|
private emitRRWebSnapshot(processedSnapshot: ProcessedSnapshot): void {
|
|
|
|
|
this.socket.emit("domcast", {
|
|
|
|
|
snapshotData: processedSnapshot,
|
|
|
|
|
userId: this.userId,
|
|
|
|
|
timestamp: Date.now(),
|
|
|
|
|
});
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Stop DOM streaming - following screencast pattern
|
|
|
|
|
*/
|
|
|
|
|
private async stopDOM(): Promise<void> {
|
|
|
|
|
this.isDOMStreamingActive = false;
|
|
|
|
|
|
|
|
|
|
if (this.domUpdateInterval) {
|
|
|
|
|
clearInterval(this.domUpdateInterval);
|
|
|
|
|
this.domUpdateInterval = null;
|
|
|
|
|
}
|
|
|
|
|
|
2025-06-30 16:23:05 +05:30
|
|
|
if (this.networkRequestTimeout) {
|
|
|
|
|
clearTimeout(this.networkRequestTimeout);
|
|
|
|
|
this.networkRequestTimeout = null;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
this.pendingNetworkRequests = [];
|
|
|
|
|
|
2025-06-23 13:21:46 +05:30
|
|
|
if (this.client) {
|
|
|
|
|
try {
|
|
|
|
|
await this.client.send("DOM.disable");
|
|
|
|
|
await this.client.send("CSS.disable");
|
|
|
|
|
} catch (error) {
|
|
|
|
|
logger.warn("Error stopping DOM stream:", error);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
logger.info("DOM streaming stopped successfully");
|
|
|
|
|
}
|
|
|
|
|
|
2025-06-30 16:29:07 +05:30
|
|
|
/**rrweb-bundle
|
2024-06-01 10:59:15 +05:30
|
|
|
* Terminates the screencast session and closes the remote browser.
|
|
|
|
|
* If an interpretation was running it will be stopped.
|
|
|
|
|
* @returns {Promise<void>}
|
|
|
|
|
*/
|
2025-01-06 19:38:22 +05:30
|
|
|
public async switchOff(): Promise<void> {
|
|
|
|
|
try {
|
2025-03-07 22:46:28 +05:30
|
|
|
this.isScreencastActive = false;
|
2025-06-23 13:21:46 +05:30
|
|
|
this.isDOMStreamingActive = false;
|
2025-03-07 22:46:28 +05:30
|
|
|
|
2025-01-06 19:38:22 +05:30
|
|
|
await this.interpreter.stopInterpretation();
|
2025-01-06 21:11:11 +05:30
|
|
|
|
2025-01-06 19:38:22 +05:30
|
|
|
if (this.screencastInterval) {
|
|
|
|
|
clearInterval(this.screencastInterval);
|
|
|
|
|
}
|
2025-01-06 21:11:11 +05:30
|
|
|
|
2025-06-30 16:23:05 +05:30
|
|
|
if (this.domUpdateInterval) {
|
|
|
|
|
clearInterval(this.domUpdateInterval);
|
|
|
|
|
}
|
|
|
|
|
|
2025-01-06 19:38:22 +05:30
|
|
|
if (this.client) {
|
|
|
|
|
await this.stopScreencast();
|
2025-06-23 13:21:46 +05:30
|
|
|
await this.stopDOM();
|
2025-01-06 19:38:22 +05:30
|
|
|
}
|
2025-01-06 21:11:11 +05:30
|
|
|
|
2025-01-06 19:38:22 +05:30
|
|
|
if (this.browser) {
|
|
|
|
|
await this.browser.close();
|
|
|
|
|
}
|
2025-01-06 21:11:11 +05:30
|
|
|
|
2025-01-06 19:38:22 +05:30
|
|
|
this.screenshotQueue = [];
|
|
|
|
|
//this.performanceMonitor.reset();
|
2025-01-06 21:11:11 +05:30
|
|
|
|
2025-01-06 19:38:22 +05:30
|
|
|
} catch (error) {
|
|
|
|
|
logger.error('Error during browser shutdown:', error);
|
2024-06-01 10:59:15 +05:30
|
|
|
}
|
2025-01-06 19:38:22 +05:30
|
|
|
}
|
2024-06-01 10:59:15 +05:30
|
|
|
|
2025-01-06 11:54:48 +05:30
|
|
|
private async optimizeScreenshot(screenshot: Buffer): Promise<Buffer> {
|
|
|
|
|
try {
|
|
|
|
|
return await sharp(screenshot)
|
2025-03-24 01:10:30 +05:30
|
|
|
.png({
|
|
|
|
|
quality: Math.round(SCREENCAST_CONFIG.compressionQuality * 100),
|
|
|
|
|
compressionLevel: 6,
|
|
|
|
|
adaptiveFiltering: true,
|
|
|
|
|
force: true
|
2025-01-06 11:54:48 +05:30
|
|
|
})
|
|
|
|
|
.resize({
|
|
|
|
|
width: SCREENCAST_CONFIG.maxWidth,
|
|
|
|
|
height: SCREENCAST_CONFIG.maxHeight,
|
|
|
|
|
fit: 'inside',
|
2025-03-20 23:14:07 +05:30
|
|
|
withoutEnlargement: true,
|
2025-03-24 01:10:30 +05:30
|
|
|
kernel: 'lanczos3'
|
2025-01-06 11:54:48 +05:30
|
|
|
})
|
|
|
|
|
.toBuffer();
|
|
|
|
|
} catch (error) {
|
2025-03-24 01:10:30 +05:30
|
|
|
logger.error('Screenshot optimization failed:', error);
|
|
|
|
|
return screenshot;
|
2025-01-06 11:54:48 +05:30
|
|
|
}
|
|
|
|
|
}
|
2025-03-24 01:10:30 +05:30
|
|
|
|
2025-01-06 11:54:48 +05:30
|
|
|
|
2024-07-09 22:58:18 +05:30
|
|
|
/**
|
2024-06-01 10:59:49 +05:30
|
|
|
* Makes and emits a single screenshot to the client side.
|
|
|
|
|
* @returns {Promise<void>}
|
|
|
|
|
*/
|
2024-06-01 11:05:45 +05:30
|
|
|
public makeAndEmitScreenshot = async (): Promise<void> => {
|
2024-06-01 10:59:49 +05:30
|
|
|
try {
|
2024-09-21 18:49:09 +05:30
|
|
|
const screenshot = await this.currentPage?.screenshot();
|
2024-06-01 10:59:49 +05:30
|
|
|
if (screenshot) {
|
2025-01-06 11:55:45 +05:30
|
|
|
this.emitScreenshot(screenshot);
|
2024-06-01 10:59:49 +05:30
|
|
|
}
|
|
|
|
|
} catch (e) {
|
|
|
|
|
const { message } = e as Error;
|
2024-11-29 22:11:16 +05:30
|
|
|
logger.log('error', `Screenshot error: ${message}`);
|
2024-06-01 10:59:49 +05:30
|
|
|
}
|
2024-07-09 22:58:18 +05:30
|
|
|
};
|
2024-06-01 10:59:49 +05:30
|
|
|
|
2024-06-01 11:00:28 +05:30
|
|
|
/**
|
|
|
|
|
* Updates the active socket instance.
|
|
|
|
|
* This will update all registered events for the socket and
|
|
|
|
|
* all the properties using the socket.
|
|
|
|
|
* @param socket socket.io socket instance used to communicate with the client side
|
|
|
|
|
* @returns void
|
|
|
|
|
*/
|
2024-06-01 11:05:45 +05:30
|
|
|
public updateSocket = (socket: Socket): void => {
|
2024-06-01 11:00:28 +05:30
|
|
|
this.socket = socket;
|
|
|
|
|
this.registerEditorEvents();
|
|
|
|
|
this.generator?.updateSocket(socket);
|
|
|
|
|
this.interpreter?.updateSocket(socket);
|
|
|
|
|
};
|
|
|
|
|
|
2024-06-01 11:03:01 +05:30
|
|
|
/**
|
|
|
|
|
* Starts the interpretation of the currently generated workflow.
|
|
|
|
|
* @returns {Promise<void>}
|
|
|
|
|
*/
|
2024-06-01 11:05:45 +05:30
|
|
|
public interpretCurrentRecording = async (): Promise<void> => {
|
2024-06-01 11:03:01 +05:30
|
|
|
logger.log('debug', 'Starting interpretation in the editor');
|
|
|
|
|
if (this.generator) {
|
|
|
|
|
const workflow = this.generator.AddGeneratedFlags(this.generator.getWorkflowFile());
|
|
|
|
|
await this.initializeNewPage();
|
|
|
|
|
if (this.currentPage) {
|
2025-03-14 12:40:28 +05:30
|
|
|
// this.currentPage.setViewportSize({ height: 400, width: 900 });
|
2024-06-01 11:03:01 +05:30
|
|
|
const params = this.generator.getParams();
|
|
|
|
|
if (params) {
|
|
|
|
|
this.interpreterSettings.params = params.reduce((acc, param) => {
|
|
|
|
|
if (this.interpreterSettings.params && Object.keys(this.interpreterSettings.params).includes(param)) {
|
|
|
|
|
return { ...acc, [param]: this.interpreterSettings.params[param] };
|
|
|
|
|
} else {
|
|
|
|
|
return { ...acc, [param]: '', }
|
|
|
|
|
}
|
|
|
|
|
}, {})
|
|
|
|
|
}
|
|
|
|
|
logger.log('debug', `Starting interpretation with settings: ${JSON.stringify(this.interpreterSettings, null, 2)}`);
|
|
|
|
|
await this.interpreter.interpretRecordingInEditor(
|
2024-06-01 11:05:45 +05:30
|
|
|
workflow, this.currentPage,
|
|
|
|
|
(newPage: Page) => this.currentPage = newPage,
|
|
|
|
|
this.interpreterSettings
|
2024-06-01 11:03:01 +05:30
|
|
|
);
|
|
|
|
|
// clear the active index from generator
|
|
|
|
|
this.generator.clearLastIndex();
|
|
|
|
|
} else {
|
|
|
|
|
logger.log('error', 'Could not get a new page, returned undefined');
|
|
|
|
|
}
|
|
|
|
|
} else {
|
|
|
|
|
logger.log('error', 'Generator is not initialized');
|
|
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
2024-06-01 11:03:44 +05:30
|
|
|
/**
|
|
|
|
|
* Stops the workflow interpretation and initializes a new page.
|
|
|
|
|
* @returns {Promise<void>}
|
|
|
|
|
*/
|
2024-06-01 11:05:45 +05:30
|
|
|
public stopCurrentInterpretation = async (): Promise<void> => {
|
2024-06-01 11:03:44 +05:30
|
|
|
await this.interpreter.stopInterpretation();
|
|
|
|
|
await this.initializeNewPage();
|
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
/**
|
|
|
|
|
* Returns the current page instance.
|
|
|
|
|
* @returns {Page | null | undefined}
|
|
|
|
|
*/
|
2024-06-01 11:05:45 +05:30
|
|
|
public getCurrentPage = (): Page | null | undefined => {
|
2024-06-01 11:03:44 +05:30
|
|
|
return this.currentPage;
|
|
|
|
|
};
|
|
|
|
|
|
2024-06-01 11:04:38 +05:30
|
|
|
/**
|
|
|
|
|
* Changes the active page to the page instance on the given index
|
|
|
|
|
* available in pages array on the {@link BrowserContext}.
|
|
|
|
|
* Automatically stops the screencast session on the previous page and starts the new one.
|
|
|
|
|
* @param tabIndex index of the page in the pages array on the {@link BrowserContext}
|
|
|
|
|
* @returns {Promise<void>}
|
|
|
|
|
*/
|
2024-06-01 11:05:45 +05:30
|
|
|
private changeTab = async (tabIndex: number): Promise<void> => {
|
2024-06-01 11:04:38 +05:30
|
|
|
const page = this.currentPage?.context().pages()[tabIndex];
|
|
|
|
|
if (page) {
|
|
|
|
|
await this.stopScreencast();
|
|
|
|
|
this.currentPage = page;
|
2024-12-02 17:05:19 +05:30
|
|
|
|
2024-12-10 20:32:01 +05:30
|
|
|
await this.setupPageEventListeners(this.currentPage);
|
2024-12-09 22:05:50 +05:30
|
|
|
|
2024-10-22 15:23:57 +05:30
|
|
|
//await this.currentPage.setViewportSize({ height: 400, width: 900 })
|
2024-06-01 11:04:38 +05:30
|
|
|
this.client = await this.currentPage.context().newCDPSession(this.currentPage);
|
2025-03-07 22:46:28 +05:30
|
|
|
// Include userId in the URL change event
|
|
|
|
|
this.socket.emit('urlChanged', {
|
|
|
|
|
url: this.currentPage.url(),
|
|
|
|
|
userId: this.userId
|
|
|
|
|
});
|
2024-06-01 11:04:38 +05:30
|
|
|
await this.makeAndEmitScreenshot();
|
|
|
|
|
await this.subscribeToScreencast();
|
|
|
|
|
} else {
|
|
|
|
|
logger.log('error', `${tabIndex} index out of range of pages`)
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2024-06-01 11:05:13 +05:30
|
|
|
/**
|
|
|
|
|
* Internal method for a new page initialization. Subscribes this page to the screencast.
|
|
|
|
|
* @param options optional page options to be used when creating a new page
|
|
|
|
|
* @returns {Promise<void>}
|
|
|
|
|
*/
|
2024-06-01 11:05:45 +05:30
|
|
|
private initializeNewPage = async (options?: Object): Promise<void> => {
|
2024-06-01 11:05:13 +05:30
|
|
|
await this.stopScreencast();
|
|
|
|
|
const newPage = options ? await this.browser?.newPage(options)
|
2024-06-01 11:05:45 +05:30
|
|
|
: await this.browser?.newPage();
|
2024-11-03 01:09:53 +05:30
|
|
|
await newPage?.setExtraHTTPHeaders({
|
|
|
|
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
|
|
|
|
|
});
|
2024-06-01 11:05:13 +05:30
|
|
|
|
|
|
|
|
await this.currentPage?.close();
|
|
|
|
|
this.currentPage = newPage;
|
|
|
|
|
if (this.currentPage) {
|
2024-12-10 20:32:01 +05:30
|
|
|
await this.setupPageEventListeners(this.currentPage);
|
2024-12-13 21:21:00 +05:30
|
|
|
|
2024-06-01 11:05:13 +05:30
|
|
|
this.client = await this.currentPage.context().newCDPSession(this.currentPage);
|
2025-06-23 13:21:46 +05:30
|
|
|
if (this.renderingMode === "dom") {
|
|
|
|
|
await this.subscribeToDOM();
|
|
|
|
|
} else {
|
|
|
|
|
await this.subscribeToScreencast();
|
|
|
|
|
}
|
2024-06-01 11:05:13 +05:30
|
|
|
} else {
|
2024-06-01 11:05:45 +05:30
|
|
|
logger.log('error', 'Could not get a new page, returned undefined');
|
2024-06-01 11:05:13 +05:30
|
|
|
}
|
|
|
|
|
};
|
|
|
|
|
|
2024-06-01 11:02:21 +05:30
|
|
|
/**
|
|
|
|
|
* Initiates screencast of the remote browser through socket,
|
|
|
|
|
* registers listener for rerender event and emits the loaded event.
|
|
|
|
|
* Should be called only once after the browser is fully initialized.
|
|
|
|
|
* @returns {Promise<void>}
|
|
|
|
|
*/
|
2025-01-06 11:55:45 +05:30
|
|
|
private async startScreencast(): Promise<void> {
|
2024-06-01 11:02:21 +05:30
|
|
|
if (!this.client) {
|
2025-01-06 11:55:45 +05:30
|
|
|
logger.warn('Client is not initialized');
|
2024-06-01 11:02:21 +05:30
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2025-01-06 11:55:45 +05:30
|
|
|
try {
|
|
|
|
|
await this.client.send('Page.startScreencast', {
|
|
|
|
|
format: SCREENCAST_CONFIG.format,
|
2025-03-23 16:24:41 +05:30
|
|
|
quality: Math.round(SCREENCAST_CONFIG.compressionQuality * 100),
|
2025-03-20 23:14:07 +05:30
|
|
|
maxWidth: SCREENCAST_CONFIG.maxWidth,
|
|
|
|
|
maxHeight: SCREENCAST_CONFIG.maxHeight,
|
2025-03-23 16:24:41 +05:30
|
|
|
everyNthFrame: 1
|
2025-01-06 11:55:45 +05:30
|
|
|
});
|
2025-03-23 16:24:41 +05:30
|
|
|
|
2025-03-07 22:46:28 +05:30
|
|
|
this.isScreencastActive = true;
|
2025-03-23 16:24:41 +05:30
|
|
|
|
2025-01-06 11:55:45 +05:30
|
|
|
this.client.on('Page.screencastFrame', async ({ data, sessionId }) => {
|
|
|
|
|
try {
|
2025-03-23 16:24:41 +05:30
|
|
|
if (this.screenshotQueue.length >= SCREENCAST_CONFIG.maxQueueSize && this.isProcessingScreenshot) {
|
|
|
|
|
await this.client?.send('Page.screencastFrameAck', { sessionId });
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
2025-01-06 11:55:45 +05:30
|
|
|
const buffer = Buffer.from(data, 'base64');
|
2025-03-23 16:24:41 +05:30
|
|
|
this.emitScreenshot(buffer);
|
|
|
|
|
|
|
|
|
|
setTimeout(async () => {
|
|
|
|
|
try {
|
|
|
|
|
if (this.client) {
|
|
|
|
|
await this.client.send('Page.screencastFrameAck', { sessionId });
|
|
|
|
|
}
|
|
|
|
|
} catch (e) {
|
|
|
|
|
logger.error('Error acknowledging screencast frame:', e);
|
|
|
|
|
}
|
|
|
|
|
}, 10);
|
2025-01-06 11:55:45 +05:30
|
|
|
} catch (error) {
|
|
|
|
|
logger.error('Screencast frame processing failed:', error);
|
2025-03-23 16:24:41 +05:30
|
|
|
|
|
|
|
|
try {
|
|
|
|
|
await this.client?.send('Page.screencastFrameAck', { sessionId });
|
|
|
|
|
} catch (ackError) {
|
|
|
|
|
logger.error('Failed to acknowledge screencast frame:', ackError);
|
|
|
|
|
}
|
2025-01-06 11:55:45 +05:30
|
|
|
}
|
|
|
|
|
});
|
|
|
|
|
logger.info('Screencast started successfully');
|
|
|
|
|
} catch (error) {
|
|
|
|
|
logger.error('Failed to start screencast:', error);
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private async stopScreencast(): Promise<void> {
|
2024-06-01 11:02:21 +05:30
|
|
|
if (!this.client) {
|
2025-01-06 11:55:45 +05:30
|
|
|
logger.error('Client is not initialized');
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
try {
|
2025-03-07 22:46:28 +05:30
|
|
|
// Set flag to indicate screencast is active
|
|
|
|
|
this.isScreencastActive = false;
|
2024-06-01 11:02:21 +05:30
|
|
|
await this.client.send('Page.stopScreencast');
|
2025-01-06 11:55:45 +05:30
|
|
|
this.screenshotQueue = [];
|
|
|
|
|
this.isProcessingScreenshot = false;
|
|
|
|
|
logger.info('Screencast stopped successfully');
|
|
|
|
|
} catch (error) {
|
|
|
|
|
logger.error('Failed to stop screencast:', error);
|
2024-06-01 11:02:21 +05:30
|
|
|
}
|
2025-01-06 11:55:45 +05:30
|
|
|
}
|
|
|
|
|
|
2024-06-01 11:02:21 +05:30
|
|
|
|
2024-06-01 11:01:12 +05:30
|
|
|
/**
|
|
|
|
|
* Helper for emitting the screenshot of browser's active page through websocket.
|
|
|
|
|
* @param payload the screenshot binary data
|
|
|
|
|
* @returns void
|
|
|
|
|
*/
|
2025-03-14 12:40:28 +05:30
|
|
|
private emitScreenshot = async (payload: Buffer, viewportSize?: { width: number, height: number }): Promise<void> => {
|
2025-03-23 16:24:41 +05:30
|
|
|
if (this.screenshotQueue.length > SCREENCAST_CONFIG.maxQueueSize) {
|
2025-06-12 14:30:43 +05:30
|
|
|
this.screenshotQueue = this.screenshotQueue.slice(-1);
|
2025-03-23 16:24:41 +05:30
|
|
|
}
|
|
|
|
|
|
2025-01-06 11:55:15 +05:30
|
|
|
if (this.isProcessingScreenshot) {
|
|
|
|
|
if (this.screenshotQueue.length < SCREENCAST_CONFIG.maxQueueSize) {
|
|
|
|
|
this.screenshotQueue.push(payload);
|
|
|
|
|
}
|
|
|
|
|
return;
|
|
|
|
|
}
|
2025-03-23 16:24:41 +05:30
|
|
|
|
2025-01-06 11:55:15 +05:30
|
|
|
this.isProcessingScreenshot = true;
|
2025-03-23 16:24:41 +05:30
|
|
|
|
2025-01-06 11:55:15 +05:30
|
|
|
try {
|
2025-03-20 23:14:07 +05:30
|
|
|
const optimizationPromise = this.optimizeScreenshot(payload);
|
|
|
|
|
const timeoutPromise = new Promise<Buffer>((resolve) => {
|
2025-06-12 14:30:43 +05:30
|
|
|
setTimeout(() => resolve(payload), 100);
|
2025-03-20 23:14:07 +05:30
|
|
|
});
|
|
|
|
|
|
|
|
|
|
const optimizedScreenshot = await Promise.race([optimizationPromise, timeoutPromise]);
|
2025-01-06 21:10:39 +05:30
|
|
|
const base64Data = optimizedScreenshot.toString('base64');
|
2025-03-23 16:24:41 +05:30
|
|
|
const dataWithMimeType = `data:image/${SCREENCAST_CONFIG.format};base64,${base64Data}`;
|
|
|
|
|
|
|
|
|
|
payload = null as any;
|
|
|
|
|
|
2025-06-12 14:30:43 +05:30
|
|
|
setImmediate(async () => {
|
|
|
|
|
this.socket.emit('screencast', {
|
2025-03-14 12:40:28 +05:30
|
|
|
image: dataWithMimeType,
|
|
|
|
|
userId: this.userId,
|
|
|
|
|
viewport: viewportSize || await this.currentPage?.viewportSize() || null
|
2025-06-12 14:30:43 +05:30
|
|
|
});
|
2025-03-14 12:40:28 +05:30
|
|
|
});
|
2025-01-06 11:55:15 +05:30
|
|
|
} catch (error) {
|
|
|
|
|
logger.error('Screenshot emission failed:', error);
|
2025-03-20 23:14:07 +05:30
|
|
|
try {
|
|
|
|
|
const base64Data = payload.toString('base64');
|
2025-03-24 01:10:30 +05:30
|
|
|
const dataWithMimeType = `data:image/png;base64,${base64Data}`;
|
2025-03-23 16:24:41 +05:30
|
|
|
|
2025-06-12 14:30:43 +05:30
|
|
|
setImmediate(async () => {
|
|
|
|
|
this.socket.emit('screencast', {
|
|
|
|
|
image: dataWithMimeType,
|
|
|
|
|
userId: this.userId,
|
|
|
|
|
viewport: viewportSize || await this.currentPage?.viewportSize() || null
|
|
|
|
|
});
|
2025-03-20 23:14:07 +05:30
|
|
|
});
|
|
|
|
|
} catch (e) {
|
|
|
|
|
logger.error('Fallback screenshot emission also failed:', e);
|
|
|
|
|
}
|
2025-01-06 11:55:15 +05:30
|
|
|
} finally {
|
|
|
|
|
this.isProcessingScreenshot = false;
|
2025-06-12 14:30:43 +05:30
|
|
|
|
2025-01-06 11:55:15 +05:30
|
|
|
if (this.screenshotQueue.length > 0) {
|
2025-06-12 14:30:43 +05:30
|
|
|
const nextScreenshot = this.screenshotQueue.shift();
|
|
|
|
|
if (nextScreenshot) {
|
|
|
|
|
const delay = this.screenshotQueue.length > 0 ? 16 : 33;
|
|
|
|
|
setTimeout(() => {
|
|
|
|
|
this.emitScreenshot(nextScreenshot);
|
|
|
|
|
}, delay);
|
|
|
|
|
}
|
2025-01-06 11:55:15 +05:30
|
|
|
}
|
|
|
|
|
}
|
2025-01-06 21:10:39 +05:30
|
|
|
};
|
2025-01-06 11:55:15 +05:30
|
|
|
|
2024-06-01 10:55:04 +05:30
|
|
|
}
|