feat: rm network monitoring and caching
This commit is contained in:
@@ -80,7 +80,6 @@ interface ProcessedSnapshot {
|
|||||||
scripts: number;
|
scripts: number;
|
||||||
media: number;
|
media: number;
|
||||||
};
|
};
|
||||||
totalCacheSize: number;
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -199,35 +198,9 @@ export class RemoteBrowser {
|
|||||||
private snapshotDebounceTimeout: NodeJS.Timeout | null = null;
|
private snapshotDebounceTimeout: NodeJS.Timeout | null = null;
|
||||||
private isScrollTriggeredSnapshot = false;
|
private isScrollTriggeredSnapshot = false;
|
||||||
|
|
||||||
/**
|
private networkRequestTimeout: NodeJS.Timeout | null = null;
|
||||||
* Cache for network resources captured via CDP
|
private pendingNetworkRequests: string[] = [];
|
||||||
* @private
|
private readonly NETWORK_QUIET_PERIOD = 8000;
|
||||||
*/
|
|
||||||
private networkResourceCache: Map<
|
|
||||||
string,
|
|
||||||
{
|
|
||||||
url: string;
|
|
||||||
content: string;
|
|
||||||
mimeType: string;
|
|
||||||
base64Encoded: boolean;
|
|
||||||
timestamp: number;
|
|
||||||
resourceType?: string;
|
|
||||||
statusCode?: number;
|
|
||||||
headers?: Record<string, any>;
|
|
||||||
}
|
|
||||||
> = new Map();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Set to track active network requests
|
|
||||||
* @private
|
|
||||||
*/
|
|
||||||
private activeRequests: Set<string> = new Set();
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Flag to indicate if network monitoring is active
|
|
||||||
* @private
|
|
||||||
*/
|
|
||||||
private isNetworkMonitoringActive: boolean = false;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Initializes a new instances of the {@link Generator} and {@link WorkflowInterpreter} classes and
|
* Initializes a new instances of the {@link Generator} and {@link WorkflowInterpreter} classes and
|
||||||
@@ -254,224 +227,6 @@ export class RemoteBrowser {
|
|||||||
}, 30000); // Every 30 seconds
|
}, 30000); // Every 30 seconds
|
||||||
}
|
}
|
||||||
|
|
||||||
private processCSS(
|
|
||||||
cssContent: string,
|
|
||||||
cssUrl: string,
|
|
||||||
baseUrl: string,
|
|
||||||
resources?: any
|
|
||||||
): string {
|
|
||||||
try {
|
|
||||||
let processedContent = cssContent;
|
|
||||||
|
|
||||||
logger.debug(`Processing CSS from: ${cssUrl}`);
|
|
||||||
|
|
||||||
// Process @font-face declarations and collect font resources
|
|
||||||
processedContent = processedContent.replace(
|
|
||||||
/@font-face\s*\{([^}]*)\}/gi,
|
|
||||||
(fontFaceMatch, fontFaceContent) => {
|
|
||||||
let newFontFaceContent = fontFaceContent;
|
|
||||||
|
|
||||||
logger.debug(
|
|
||||||
`Processing @font-face block: ${fontFaceContent.substring(
|
|
||||||
0,
|
|
||||||
100
|
|
||||||
)}...`
|
|
||||||
);
|
|
||||||
|
|
||||||
newFontFaceContent = newFontFaceContent.replace(
|
|
||||||
/src\s*:\s*([^;}]+)[;}]/gi,
|
|
||||||
(srcMatch: any, srcValue: any) => {
|
|
||||||
let newSrcValue = srcValue;
|
|
||||||
|
|
||||||
newSrcValue = newSrcValue.replace(
|
|
||||||
/url\s*\(\s*['"]?([^'")]+)['"]?\s*\)(\s*format\s*\(\s*['"]?[^'")]*['"]?\s*\))?/gi,
|
|
||||||
(urlMatch: any, url: string, formatPart: any) => {
|
|
||||||
const originalUrl = url.trim();
|
|
||||||
|
|
||||||
logger.debug(`Found font URL in @font-face: ${originalUrl}`);
|
|
||||||
|
|
||||||
if (
|
|
||||||
originalUrl.startsWith("data:") ||
|
|
||||||
originalUrl.startsWith("blob:")
|
|
||||||
) {
|
|
||||||
return urlMatch;
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
let absoluteUrl: string;
|
|
||||||
try {
|
|
||||||
absoluteUrl = new URL(originalUrl).href;
|
|
||||||
} catch (e) {
|
|
||||||
absoluteUrl = new URL(originalUrl, cssUrl || baseUrl)
|
|
||||||
.href;
|
|
||||||
}
|
|
||||||
|
|
||||||
const cachedResource =
|
|
||||||
this.networkResourceCache.get(absoluteUrl);
|
|
||||||
if (cachedResource && resources) {
|
|
||||||
const dataUrl = cachedResource.base64Encoded
|
|
||||||
? `data:${cachedResource.mimeType};base64,${cachedResource.content}`
|
|
||||||
: `data:${cachedResource.mimeType};base64,${Buffer.from(
|
|
||||||
cachedResource.content,
|
|
||||||
"utf-8"
|
|
||||||
).toString("base64")}`;
|
|
||||||
|
|
||||||
resources.fonts.push({
|
|
||||||
url: absoluteUrl,
|
|
||||||
dataUrl,
|
|
||||||
format: originalUrl.split(".").pop()?.split("?")[0],
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
// Keep original URL in CSS
|
|
||||||
return urlMatch;
|
|
||||||
} catch (e) {
|
|
||||||
logger.warn(
|
|
||||||
"Failed to process font URL in @font-face:",
|
|
||||||
originalUrl,
|
|
||||||
e
|
|
||||||
);
|
|
||||||
return urlMatch;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
);
|
|
||||||
|
|
||||||
return `src: ${newSrcValue};`;
|
|
||||||
}
|
|
||||||
);
|
|
||||||
|
|
||||||
return `@font-face {${newFontFaceContent}}`;
|
|
||||||
}
|
|
||||||
);
|
|
||||||
|
|
||||||
// Process other url() references and collect resources
|
|
||||||
processedContent = processedContent.replace(
|
|
||||||
/url\s*\(\s*['"]?([^'")]+)['"]?\s*\)/gi,
|
|
||||||
(match, url) => {
|
|
||||||
const originalUrl = url.trim();
|
|
||||||
|
|
||||||
if (
|
|
||||||
originalUrl.startsWith("data:") ||
|
|
||||||
originalUrl.startsWith("blob:")
|
|
||||||
) {
|
|
||||||
return match;
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
let absoluteUrl: string;
|
|
||||||
try {
|
|
||||||
absoluteUrl = new URL(originalUrl).href;
|
|
||||||
} catch (e) {
|
|
||||||
absoluteUrl = new URL(originalUrl, cssUrl || baseUrl).href;
|
|
||||||
}
|
|
||||||
|
|
||||||
const cachedResource = this.networkResourceCache.get(absoluteUrl);
|
|
||||||
if (cachedResource && resources) {
|
|
||||||
const lowerMimeType = cachedResource.mimeType.toLowerCase();
|
|
||||||
|
|
||||||
if (lowerMimeType.includes("image/")) {
|
|
||||||
const dataUrl = cachedResource.base64Encoded
|
|
||||||
? `data:${cachedResource.mimeType};base64,${cachedResource.content}`
|
|
||||||
: `data:${cachedResource.mimeType};base64,${Buffer.from(
|
|
||||||
cachedResource.content,
|
|
||||||
"utf-8"
|
|
||||||
).toString("base64")}`;
|
|
||||||
|
|
||||||
resources.images.push({
|
|
||||||
src: absoluteUrl,
|
|
||||||
dataUrl,
|
|
||||||
alt: "",
|
|
||||||
});
|
|
||||||
} else if (
|
|
||||||
lowerMimeType.includes("font/") ||
|
|
||||||
lowerMimeType.includes("application/font")
|
|
||||||
) {
|
|
||||||
const dataUrl = cachedResource.base64Encoded
|
|
||||||
? `data:${cachedResource.mimeType};base64,${cachedResource.content}`
|
|
||||||
: `data:${cachedResource.mimeType};base64,${Buffer.from(
|
|
||||||
cachedResource.content,
|
|
||||||
"utf-8"
|
|
||||||
).toString("base64")}`;
|
|
||||||
|
|
||||||
resources.fonts.push({
|
|
||||||
url: absoluteUrl,
|
|
||||||
dataUrl,
|
|
||||||
format: originalUrl.split(".").pop()?.split("?")[0],
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Keep original URL in CSS
|
|
||||||
return match;
|
|
||||||
} catch (e) {
|
|
||||||
logger.warn(`Failed to process CSS URL: ${originalUrl}`, e);
|
|
||||||
return match;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
);
|
|
||||||
|
|
||||||
// Process @import statements and collect stylesheets
|
|
||||||
processedContent = processedContent.replace(
|
|
||||||
/@import\s+(?:url\s*\(\s*)?['"]?([^'")]+)['"]?\s*\)?([^;]*);?/gi,
|
|
||||||
(match, url, mediaQuery) => {
|
|
||||||
const originalUrl = url.trim();
|
|
||||||
|
|
||||||
if (
|
|
||||||
originalUrl.startsWith("data:") ||
|
|
||||||
originalUrl.startsWith("blob:")
|
|
||||||
) {
|
|
||||||
return match;
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
let absoluteUrl: string;
|
|
||||||
try {
|
|
||||||
absoluteUrl = new URL(originalUrl).href;
|
|
||||||
} catch (e) {
|
|
||||||
absoluteUrl = new URL(originalUrl, cssUrl || baseUrl).href;
|
|
||||||
}
|
|
||||||
|
|
||||||
const cachedResource = this.networkResourceCache.get(absoluteUrl);
|
|
||||||
if (
|
|
||||||
cachedResource &&
|
|
||||||
resources &&
|
|
||||||
cachedResource.mimeType.includes("css")
|
|
||||||
) {
|
|
||||||
const content = cachedResource.base64Encoded
|
|
||||||
? Buffer.from(cachedResource.content, "base64").toString(
|
|
||||||
"utf-8"
|
|
||||||
)
|
|
||||||
: cachedResource.content;
|
|
||||||
|
|
||||||
resources.stylesheets.push({
|
|
||||||
href: absoluteUrl,
|
|
||||||
content: this.processCSS(
|
|
||||||
content,
|
|
||||||
absoluteUrl,
|
|
||||||
baseUrl,
|
|
||||||
resources
|
|
||||||
),
|
|
||||||
media: mediaQuery ? mediaQuery.trim() : "all",
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
// Keep original @import
|
|
||||||
return match;
|
|
||||||
} catch (e) {
|
|
||||||
logger.warn(`Failed to process CSS @import: ${originalUrl}`, e);
|
|
||||||
return match;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
);
|
|
||||||
|
|
||||||
logger.debug(`CSS processing completed for: ${cssUrl}`);
|
|
||||||
return processedContent;
|
|
||||||
} catch (error) {
|
|
||||||
logger.error("Failed to process CSS content:", error);
|
|
||||||
return cssContent; // Return original content if processing fails
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private async processRRWebSnapshot(
|
private async processRRWebSnapshot(
|
||||||
snapshot: RRWebSnapshot
|
snapshot: RRWebSnapshot
|
||||||
): Promise<ProcessedSnapshot> {
|
): Promise<ProcessedSnapshot> {
|
||||||
@@ -489,188 +244,13 @@ export class RemoteBrowser {
|
|||||||
media: [] as Array<{ src: string; dataUrl: string; type: string }>,
|
media: [] as Array<{ src: string; dataUrl: string; type: string }>,
|
||||||
};
|
};
|
||||||
|
|
||||||
const processNode = (node: RRWebSnapshot): RRWebSnapshot => {
|
|
||||||
const processedNode = { ...node };
|
|
||||||
|
|
||||||
// Process attributes if they exist
|
|
||||||
if (node.attributes) {
|
|
||||||
const newAttributes = { ...node.attributes };
|
|
||||||
|
|
||||||
// Process common attributes that contain URLs
|
|
||||||
const urlAttributes = ["src", "href", "data", "poster", "background"];
|
|
||||||
|
|
||||||
for (const attr of urlAttributes) {
|
|
||||||
if (newAttributes[attr]) {
|
|
||||||
const originalUrl = newAttributes[attr];
|
|
||||||
|
|
||||||
// Categorize and collect the resource instead of proxying
|
|
||||||
const lowerAttr = attr.toLowerCase();
|
|
||||||
const lowerUrl = originalUrl.toLowerCase();
|
|
||||||
|
|
||||||
if (lowerAttr === "src" && node.tagName?.toLowerCase() === "img") {
|
|
||||||
const cachedResource = this.networkResourceCache.get(originalUrl);
|
|
||||||
if (
|
|
||||||
cachedResource &&
|
|
||||||
cachedResource.mimeType.includes("image/")
|
|
||||||
) {
|
|
||||||
const dataUrl = cachedResource.base64Encoded
|
|
||||||
? `data:${cachedResource.mimeType};base64,${cachedResource.content}`
|
|
||||||
: `data:${cachedResource.mimeType};base64,${Buffer.from(
|
|
||||||
cachedResource.content,
|
|
||||||
"utf-8"
|
|
||||||
).toString("base64")}`;
|
|
||||||
|
|
||||||
resources.images.push({
|
|
||||||
src: originalUrl,
|
|
||||||
dataUrl,
|
|
||||||
alt: newAttributes.alt,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
} else if (
|
|
||||||
lowerAttr === "href" &&
|
|
||||||
node.tagName?.toLowerCase() === "link"
|
|
||||||
) {
|
|
||||||
const rel = newAttributes.rel?.toLowerCase() || "";
|
|
||||||
|
|
||||||
if (rel.includes("stylesheet")) {
|
|
||||||
const cachedResource =
|
|
||||||
this.networkResourceCache.get(originalUrl);
|
|
||||||
if (cachedResource && cachedResource.mimeType.includes("css")) {
|
|
||||||
let content = cachedResource.base64Encoded
|
|
||||||
? Buffer.from(cachedResource.content, "base64").toString(
|
|
||||||
"utf-8"
|
|
||||||
)
|
|
||||||
: cachedResource.content;
|
|
||||||
|
|
||||||
// Process CSS to collect embedded resources
|
|
||||||
content = this.processCSS(
|
|
||||||
content,
|
|
||||||
originalUrl,
|
|
||||||
baseUrl,
|
|
||||||
resources
|
|
||||||
);
|
|
||||||
|
|
||||||
resources.stylesheets.push({
|
|
||||||
href: originalUrl,
|
|
||||||
content,
|
|
||||||
media: newAttributes.media || "all",
|
|
||||||
});
|
|
||||||
}
|
|
||||||
} else if (
|
|
||||||
rel.includes("font") ||
|
|
||||||
lowerUrl.match(/\.(woff2?|ttf|otf|eot)(\?.*)?$/i)
|
|
||||||
) {
|
|
||||||
const cachedResource =
|
|
||||||
this.networkResourceCache.get(originalUrl);
|
|
||||||
if (cachedResource) {
|
|
||||||
const dataUrl = cachedResource.base64Encoded
|
|
||||||
? `data:${cachedResource.mimeType};base64,${cachedResource.content}`
|
|
||||||
: `data:${cachedResource.mimeType};base64,${Buffer.from(
|
|
||||||
cachedResource.content,
|
|
||||||
"utf-8"
|
|
||||||
).toString("base64")}`;
|
|
||||||
|
|
||||||
resources.fonts.push({
|
|
||||||
url: originalUrl,
|
|
||||||
dataUrl,
|
|
||||||
format: lowerUrl.split(".").pop()?.split("?")[0],
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Process srcset attribute - collect resources but keep original URLs
|
|
||||||
if (newAttributes.srcset) {
|
|
||||||
const originalSrcset = newAttributes.srcset;
|
|
||||||
originalSrcset.split(",").forEach((srcsetItem) => {
|
|
||||||
const parts = srcsetItem.trim().split(/\s+/);
|
|
||||||
const url = parts[0];
|
|
||||||
|
|
||||||
if (url && !url.startsWith("data:") && !url.startsWith("blob:")) {
|
|
||||||
const cachedResource = this.networkResourceCache.get(url);
|
|
||||||
if (
|
|
||||||
cachedResource &&
|
|
||||||
cachedResource.mimeType.includes("image/")
|
|
||||||
) {
|
|
||||||
const dataUrl = cachedResource.base64Encoded
|
|
||||||
? `data:${cachedResource.mimeType};base64,${cachedResource.content}`
|
|
||||||
: `data:${cachedResource.mimeType};base64,${Buffer.from(
|
|
||||||
cachedResource.content,
|
|
||||||
"utf-8"
|
|
||||||
).toString("base64")}`;
|
|
||||||
|
|
||||||
resources.images.push({
|
|
||||||
src: url,
|
|
||||||
dataUrl,
|
|
||||||
alt: newAttributes.alt,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
processedNode.attributes = newAttributes;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Process text content for style elements
|
|
||||||
if (node.tagName?.toLowerCase() === "style" && node.textContent) {
|
|
||||||
let content = node.textContent;
|
|
||||||
|
|
||||||
// Process CSS content to collect embedded resources
|
|
||||||
content = this.processCSS(content, "", baseUrl, resources);
|
|
||||||
processedNode.textContent = content;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Recursively process child nodes
|
|
||||||
if (node.childNodes) {
|
|
||||||
processedNode.childNodes = node.childNodes.map(processNode);
|
|
||||||
}
|
|
||||||
|
|
||||||
return processedNode;
|
|
||||||
};
|
|
||||||
|
|
||||||
const processedSnapshot = processNode(snapshot);
|
|
||||||
|
|
||||||
// Add cached scripts and media
|
|
||||||
for (const [url, resource] of this.networkResourceCache.entries()) {
|
|
||||||
if (resource.mimeType.toLowerCase().includes("javascript")) {
|
|
||||||
const content = resource.base64Encoded
|
|
||||||
? Buffer.from(resource.content, "base64").toString("utf-8")
|
|
||||||
: resource.content;
|
|
||||||
|
|
||||||
resources.scripts.push({
|
|
||||||
src: url,
|
|
||||||
content,
|
|
||||||
type: "text/javascript",
|
|
||||||
});
|
|
||||||
} else if (
|
|
||||||
resource.mimeType.toLowerCase().includes("video/") ||
|
|
||||||
resource.mimeType.toLowerCase().includes("audio/")
|
|
||||||
) {
|
|
||||||
const dataUrl = resource.base64Encoded
|
|
||||||
? `data:${resource.mimeType};base64,${resource.content}`
|
|
||||||
: `data:${resource.mimeType};base64,${Buffer.from(
|
|
||||||
resource.content,
|
|
||||||
"utf-8"
|
|
||||||
).toString("base64")}`;
|
|
||||||
|
|
||||||
resources.media.push({
|
|
||||||
src: url,
|
|
||||||
dataUrl,
|
|
||||||
type: resource.mimeType,
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const viewport = (await this.currentPage?.viewportSize()) || {
|
const viewport = (await this.currentPage?.viewportSize()) || {
|
||||||
width: 1280,
|
width: 1280,
|
||||||
height: 720,
|
height: 720,
|
||||||
};
|
};
|
||||||
|
|
||||||
return {
|
return {
|
||||||
snapshot: processedSnapshot,
|
snapshot,
|
||||||
resources,
|
resources,
|
||||||
baseUrl,
|
baseUrl,
|
||||||
viewport,
|
viewport,
|
||||||
@@ -690,246 +270,10 @@ export class RemoteBrowser {
|
|||||||
scripts: resources.scripts.length,
|
scripts: resources.scripts.length,
|
||||||
media: resources.media.length,
|
media: resources.media.length,
|
||||||
},
|
},
|
||||||
totalCacheSize: this.networkResourceCache.size,
|
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Check if a resource should be cached based on its MIME type and URL
|
|
||||||
* @private
|
|
||||||
*/
|
|
||||||
private shouldCacheResource(mimeType: string, url: string): boolean {
|
|
||||||
const lowerMimeType = mimeType.toLowerCase();
|
|
||||||
const lowerUrl = url.toLowerCase();
|
|
||||||
|
|
||||||
// CSS Resources
|
|
||||||
if (
|
|
||||||
lowerMimeType.includes("text/css") ||
|
|
||||||
lowerMimeType.includes("application/css") ||
|
|
||||||
lowerUrl.endsWith(".css")
|
|
||||||
) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Font Resources
|
|
||||||
if (
|
|
||||||
lowerMimeType.includes("font/") ||
|
|
||||||
lowerMimeType.includes("application/font") ||
|
|
||||||
lowerMimeType.includes("application/x-font") ||
|
|
||||||
lowerUrl.match(/\.(woff2?|ttf|otf|eot)(\?.*)?$/)
|
|
||||||
) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Image Resources
|
|
||||||
if (
|
|
||||||
lowerMimeType.includes("image/") ||
|
|
||||||
lowerUrl.match(/\.(jpg|jpeg|png|gif|webp|svg|ico|bmp|tiff|avif)(\?.*)?$/)
|
|
||||||
) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// JavaScript Resources
|
|
||||||
if (
|
|
||||||
lowerMimeType.includes("javascript") ||
|
|
||||||
lowerMimeType.includes("text/js") ||
|
|
||||||
lowerMimeType.includes("application/js") ||
|
|
||||||
lowerUrl.match(/\.js(\?.*)?$/)
|
|
||||||
) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Media Resources
|
|
||||||
if (
|
|
||||||
lowerMimeType.includes("video/") ||
|
|
||||||
lowerMimeType.includes("audio/") ||
|
|
||||||
lowerUrl.match(
|
|
||||||
/\.(mp4|webm|ogg|avi|mov|wmv|flv|mp3|wav|m4a|aac|flac)(\?.*)?$/
|
|
||||||
)
|
|
||||||
) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Document Resources
|
|
||||||
if (
|
|
||||||
lowerMimeType.includes("application/pdf") ||
|
|
||||||
lowerMimeType.includes("application/msword") ||
|
|
||||||
lowerMimeType.includes("application/vnd.ms-") ||
|
|
||||||
lowerMimeType.includes("application/vnd.openxmlformats-") ||
|
|
||||||
lowerUrl.match(/\.(pdf|doc|docx|xls|xlsx|ppt|pptx)(\?.*)?$/)
|
|
||||||
) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Manifest and Icon Resources
|
|
||||||
if (
|
|
||||||
lowerMimeType.includes("application/manifest+json") ||
|
|
||||||
lowerUrl.includes("manifest.json") ||
|
|
||||||
lowerUrl.includes("browserconfig.xml")
|
|
||||||
) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// SVG Resources (can be images or fonts)
|
|
||||||
if (lowerMimeType.includes("image/svg+xml") || lowerUrl.endsWith(".svg")) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Other common web resources
|
|
||||||
if (
|
|
||||||
lowerMimeType.includes("application/octet-stream") &&
|
|
||||||
lowerUrl.match(/\.(woff2?|ttf|otf|eot|css|js)(\?.*)?$/)
|
|
||||||
) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Clean up old cached resources to prevent memory leaks
|
|
||||||
* @private
|
|
||||||
*/
|
|
||||||
private cleanupResourceCache(): void {
|
|
||||||
const now = Date.now();
|
|
||||||
const maxAge = 5 * 60 * 1000; // 5 minutes
|
|
||||||
|
|
||||||
for (const [url, resource] of this.networkResourceCache.entries()) {
|
|
||||||
if (now - resource.timestamp > maxAge) {
|
|
||||||
this.networkResourceCache.delete(url);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (this.networkResourceCache.size > 200) {
|
|
||||||
const entries = Array.from(this.networkResourceCache.entries());
|
|
||||||
entries.sort((a, b) => a[1].timestamp - b[1].timestamp);
|
|
||||||
|
|
||||||
for (let i = 0; i < 50; i++) {
|
|
||||||
this.networkResourceCache.delete(entries[i][0]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
logger.debug(
|
|
||||||
`Resource cache cleaned up. Current size: ${this.networkResourceCache.size}`
|
|
||||||
);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Initialize network monitoring via CDP to capture all resources
|
|
||||||
* @private
|
|
||||||
*/
|
|
||||||
private async initializeNetworkMonitoring(): Promise<void> {
|
|
||||||
if (!this.client || this.isNetworkMonitoringActive) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
await this.client.send("Network.enable");
|
|
||||||
await this.client.send("Runtime.enable");
|
|
||||||
await this.client.send("Page.enable");
|
|
||||||
|
|
||||||
await this.client.send("Network.setRequestInterception", {
|
|
||||||
patterns: [
|
|
||||||
{ urlPattern: "*", resourceType: "Stylesheet" },
|
|
||||||
{ urlPattern: "*", resourceType: "Image" },
|
|
||||||
{ urlPattern: "*", resourceType: "Font" },
|
|
||||||
{ urlPattern: "*", resourceType: "Script" },
|
|
||||||
{ urlPattern: "*", resourceType: "Media" },
|
|
||||||
{ urlPattern: "*", resourceType: "Document" },
|
|
||||||
{ urlPattern: "*", resourceType: "Manifest" },
|
|
||||||
{ urlPattern: "*", resourceType: "Other" },
|
|
||||||
],
|
|
||||||
});
|
|
||||||
|
|
||||||
this.isNetworkMonitoringActive = true;
|
|
||||||
logger.info("Enhanced network monitoring enabled via CDP");
|
|
||||||
|
|
||||||
this.client.on(
|
|
||||||
"Network.responseReceived",
|
|
||||||
async ({ requestId, response, type }) => {
|
|
||||||
const mimeType = response.mimeType?.toLowerCase() || "";
|
|
||||||
const url = response.url;
|
|
||||||
const resourceType = type;
|
|
||||||
|
|
||||||
logger.debug(
|
|
||||||
`Resource received: ${resourceType} - ${mimeType} - ${url}`
|
|
||||||
);
|
|
||||||
|
|
||||||
if (this.shouldCacheResource(mimeType, url)) {
|
|
||||||
this.activeRequests.add(requestId);
|
|
||||||
|
|
||||||
try {
|
|
||||||
const { body, base64Encoded } = await this.client!.send(
|
|
||||||
"Network.getResponseBody",
|
|
||||||
{ requestId }
|
|
||||||
);
|
|
||||||
|
|
||||||
this.networkResourceCache.set(url, {
|
|
||||||
url,
|
|
||||||
content: body,
|
|
||||||
mimeType: response.mimeType || "application/octet-stream",
|
|
||||||
base64Encoded,
|
|
||||||
timestamp: Date.now(),
|
|
||||||
resourceType,
|
|
||||||
statusCode: response.status,
|
|
||||||
headers: response.headers,
|
|
||||||
});
|
|
||||||
|
|
||||||
logger.debug(
|
|
||||||
`Cached ${resourceType} resource: ${url} (${mimeType})`
|
|
||||||
);
|
|
||||||
} catch (error) {
|
|
||||||
logger.warn(
|
|
||||||
`Failed to capture ${resourceType} resource body for ${url}:`,
|
|
||||||
error
|
|
||||||
);
|
|
||||||
} finally {
|
|
||||||
this.activeRequests.delete(requestId);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
);
|
|
||||||
|
|
||||||
this.client.on(
|
|
||||||
"Network.requestIntercepted",
|
|
||||||
async ({ interceptionId, request }) => {
|
|
||||||
try {
|
|
||||||
await this.client!.send("Network.continueInterceptedRequest", {
|
|
||||||
interceptionId,
|
|
||||||
});
|
|
||||||
logger.debug(`Request intercepted and continued: ${request.url}`);
|
|
||||||
} catch (error) {
|
|
||||||
logger.warn(
|
|
||||||
`Failed to continue intercepted request for ${request.url}:`,
|
|
||||||
error
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
);
|
|
||||||
|
|
||||||
this.client.on(
|
|
||||||
"Network.loadingFailed",
|
|
||||||
({ requestId, errorText, type }) => {
|
|
||||||
this.activeRequests.delete(requestId);
|
|
||||||
logger.debug(`Network request failed (${type}): ${errorText}`);
|
|
||||||
}
|
|
||||||
);
|
|
||||||
|
|
||||||
this.client.on("Network.loadingFinished", ({ requestId }) => {
|
|
||||||
this.activeRequests.delete(requestId);
|
|
||||||
});
|
|
||||||
|
|
||||||
// Clean up cache periodically
|
|
||||||
setInterval(() => {
|
|
||||||
this.cleanupResourceCache();
|
|
||||||
}, 60000);
|
|
||||||
} catch (error) {
|
|
||||||
logger.error("Failed to initialize enhanced network monitoring:", error);
|
|
||||||
this.isNetworkMonitoringActive = false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private initializeMemoryManagement(): void {
|
private initializeMemoryManagement(): void {
|
||||||
setInterval(() => {
|
setInterval(() => {
|
||||||
const memoryUsage = process.memoryUsage();
|
const memoryUsage = process.memoryUsage();
|
||||||
@@ -1098,9 +442,27 @@ export class RemoteBrowser {
|
|||||||
url.includes("api/") ||
|
url.includes("api/") ||
|
||||||
url.includes("ajax")
|
url.includes("ajax")
|
||||||
) {
|
) {
|
||||||
setTimeout(async () => {
|
this.pendingNetworkRequests.push(url);
|
||||||
|
|
||||||
|
if (this.networkRequestTimeout) {
|
||||||
|
clearTimeout(this.networkRequestTimeout);
|
||||||
|
this.networkRequestTimeout = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
`Network request received: ${url}. Total pending: ${this.pendingNetworkRequests.length}`
|
||||||
|
);
|
||||||
|
|
||||||
|
this.networkRequestTimeout = setTimeout(async () => {
|
||||||
|
logger.info(
|
||||||
|
`Network quiet period reached. Processing ${this.pendingNetworkRequests.length} requests`
|
||||||
|
);
|
||||||
|
|
||||||
|
this.pendingNetworkRequests = [];
|
||||||
|
this.networkRequestTimeout = null;
|
||||||
|
|
||||||
await this.makeAndEmitDOMSnapshot();
|
await this.makeAndEmitDOMSnapshot();
|
||||||
}, 800);
|
}, this.NETWORK_QUIET_PERIOD);
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@@ -1253,11 +615,11 @@ export class RemoteBrowser {
|
|||||||
patchedGetter.apply(navigator);
|
patchedGetter.apply(navigator);
|
||||||
patchedGetter.toString();`
|
patchedGetter.toString();`
|
||||||
);
|
);
|
||||||
|
|
||||||
|
await this.context.addInitScript({ path: './server/src/browser-management/classes/rrweb-bundle.js' });
|
||||||
|
|
||||||
this.currentPage = await this.context.newPage();
|
this.currentPage = await this.context.newPage();
|
||||||
|
|
||||||
await this.currentPage.addInitScript({ path: './server/src/browser-management/classes/rrweb-bundle.js' });
|
|
||||||
|
|
||||||
await this.setupPageEventListeners(this.currentPage);
|
await this.setupPageEventListeners(this.currentPage);
|
||||||
|
|
||||||
const viewportSize = await this.currentPage.viewportSize();
|
const viewportSize = await this.currentPage.viewportSize();
|
||||||
@@ -1275,20 +637,10 @@ export class RemoteBrowser {
|
|||||||
this.client = await this.currentPage.context().newCDPSession(this.currentPage);
|
this.client = await this.currentPage.context().newCDPSession(this.currentPage);
|
||||||
await blocker.disableBlockingInPage(this.currentPage);
|
await blocker.disableBlockingInPage(this.currentPage);
|
||||||
console.log('Adblocker initialized');
|
console.log('Adblocker initialized');
|
||||||
|
|
||||||
if (this.client) {
|
|
||||||
await this.initializeNetworkMonitoring();
|
|
||||||
logger.info("Network monitoring initialized successfully");
|
|
||||||
}
|
|
||||||
} catch (error: any) {
|
} catch (error: any) {
|
||||||
console.warn('Failed to initialize adblocker, continuing without it:', error.message);
|
console.warn('Failed to initialize adblocker, continuing without it:', error.message);
|
||||||
// Still need to set up the CDP session even if blocker fails
|
// Still need to set up the CDP session even if blocker fails
|
||||||
this.client = await this.currentPage.context().newCDPSession(this.currentPage);
|
this.client = await this.currentPage.context().newCDPSession(this.currentPage);
|
||||||
|
|
||||||
if (this.client) {
|
|
||||||
await this.initializeNetworkMonitoring();
|
|
||||||
logger.info("Network monitoring initialized successfully");
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
success = true;
|
success = true;
|
||||||
@@ -1351,6 +703,11 @@ export class RemoteBrowser {
|
|||||||
}>,
|
}>,
|
||||||
limit: number = 5
|
limit: number = 5
|
||||||
): Promise<Array<Record<string, string>>> {
|
): Promise<Array<Record<string, string>>> {
|
||||||
|
if (page.isClosed()) {
|
||||||
|
logger.warn("Page is closed, cannot extract list data");
|
||||||
|
return [];
|
||||||
|
}
|
||||||
|
|
||||||
return await page.evaluate(
|
return await page.evaluate(
|
||||||
async ({ listSelector, fields, limit }: {
|
async ({ listSelector, fields, limit }: {
|
||||||
listSelector: string;
|
listSelector: string;
|
||||||
@@ -2120,52 +1477,11 @@ export class RemoteBrowser {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Wait for network requests to become idle
|
|
||||||
* @private
|
|
||||||
*/
|
|
||||||
private async waitForNetworkIdle(timeout: number = 2000): Promise<void> {
|
|
||||||
const startTime = Date.now();
|
|
||||||
|
|
||||||
while (this.activeRequests.size > 0 && Date.now() - startTime < timeout) {
|
|
||||||
await new Promise((resolve) => setTimeout(resolve, 100));
|
|
||||||
}
|
|
||||||
|
|
||||||
if (this.activeRequests.size > 0) {
|
|
||||||
logger.debug(
|
|
||||||
`Network idle timeout reached with ${this.activeRequests.size} pending requests`
|
|
||||||
);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Stop network monitoring
|
|
||||||
* @private
|
|
||||||
*/
|
|
||||||
private async stopNetworkMonitoring(): Promise<void> {
|
|
||||||
if (!this.client || !this.isNetworkMonitoringActive) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
await this.client.send("Network.disable");
|
|
||||||
this.isNetworkMonitoringActive = false;
|
|
||||||
this.networkResourceCache.clear();
|
|
||||||
this.activeRequests.clear();
|
|
||||||
logger.info("Network monitoring stopped");
|
|
||||||
} catch (error) {
|
|
||||||
logger.error("Error stopping network monitoring:", error);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* CDP-based DOM snapshot creation using captured network resources
|
* CDP-based DOM snapshot creation using captured network resources
|
||||||
*/
|
*/
|
||||||
public async makeAndEmitDOMSnapshot(): Promise<void> {
|
public async makeAndEmitDOMSnapshot(): Promise<void> {
|
||||||
if (
|
if (!this.currentPage || !this.isDOMStreamingActive) {
|
||||||
!this.currentPage ||
|
|
||||||
!this.isDOMStreamingActive
|
|
||||||
) {
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -2176,9 +1492,6 @@ export class RemoteBrowser {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wait for network to become idle
|
|
||||||
await this.waitForNetworkIdle();
|
|
||||||
|
|
||||||
// Double-check page state after network wait
|
// Double-check page state after network wait
|
||||||
if (this.currentPage.isClosed()) {
|
if (this.currentPage.isClosed()) {
|
||||||
logger.debug("Skipping DOM snapshot - page closed during network wait");
|
logger.debug("Skipping DOM snapshot - page closed during network wait");
|
||||||
@@ -2221,7 +1534,11 @@ export class RemoteBrowser {
|
|||||||
if (typeof window.rrwebSnapshot === "undefined") {
|
if (typeof window.rrwebSnapshot === "undefined") {
|
||||||
throw new Error("rrweb-snapshot library not available");
|
throw new Error("rrweb-snapshot library not available");
|
||||||
}
|
}
|
||||||
return window.rrwebSnapshot.snapshot(document);
|
|
||||||
|
return window.rrwebSnapshot.snapshot(document, {
|
||||||
|
inlineImages: true,
|
||||||
|
collectFonts: true,
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
// Process the snapshot to proxy resources
|
// Process the snapshot to proxy resources
|
||||||
@@ -2238,10 +1555,12 @@ export class RemoteBrowser {
|
|||||||
this.emitRRWebSnapshot(enhancedSnapshot);
|
this.emitRRWebSnapshot(enhancedSnapshot);
|
||||||
} catch (error) {
|
} catch (error) {
|
||||||
// Handle navigation context destruction gracefully
|
// Handle navigation context destruction gracefully
|
||||||
if (error instanceof Error &&
|
if (
|
||||||
(error.message.includes("Execution context was destroyed") ||
|
error instanceof Error &&
|
||||||
|
(error.message.includes("Execution context was destroyed") ||
|
||||||
error.message.includes("most likely because of a navigation") ||
|
error.message.includes("most likely because of a navigation") ||
|
||||||
error.message.includes("Target closed"))) {
|
error.message.includes("Target closed"))
|
||||||
|
) {
|
||||||
logger.debug("DOM snapshot skipped due to page navigation or closure");
|
logger.debug("DOM snapshot skipped due to page navigation or closure");
|
||||||
return; // Don't emit error for navigation - this is expected
|
return; // Don't emit error for navigation - this is expected
|
||||||
}
|
}
|
||||||
@@ -2278,6 +1597,11 @@ export class RemoteBrowser {
|
|||||||
this.domUpdateInterval = null;
|
this.domUpdateInterval = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (this.networkRequestTimeout) {
|
||||||
|
clearTimeout(this.networkRequestTimeout);
|
||||||
|
this.networkRequestTimeout = null;
|
||||||
|
}
|
||||||
|
|
||||||
if (this.client) {
|
if (this.client) {
|
||||||
try {
|
try {
|
||||||
await this.client.send("DOM.disable");
|
await this.client.send("DOM.disable");
|
||||||
@@ -2296,32 +1620,35 @@ export class RemoteBrowser {
|
|||||||
* @returns {Promise<void>}
|
* @returns {Promise<void>}
|
||||||
*/
|
*/
|
||||||
public async switchOff(): Promise<void> {
|
public async switchOff(): Promise<void> {
|
||||||
try {
|
try {
|
||||||
this.isScreencastActive = false;
|
this.isScreencastActive = false;
|
||||||
this.isDOMStreamingActive = false;
|
this.isDOMStreamingActive = false;
|
||||||
|
|
||||||
await this.interpreter.stopInterpretation();
|
await this.interpreter.stopInterpretation();
|
||||||
|
|
||||||
if (this.screencastInterval) {
|
if (this.screencastInterval) {
|
||||||
clearInterval(this.screencastInterval);
|
clearInterval(this.screencastInterval);
|
||||||
}
|
|
||||||
|
|
||||||
if (this.client) {
|
|
||||||
await this.stopScreencast();
|
|
||||||
await this.stopDOM();
|
|
||||||
await this.stopNetworkMonitoring();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (this.browser) {
|
|
||||||
await this.browser.close();
|
|
||||||
}
|
|
||||||
|
|
||||||
this.screenshotQueue = [];
|
|
||||||
//this.performanceMonitor.reset();
|
|
||||||
|
|
||||||
} catch (error) {
|
|
||||||
logger.error('Error during browser shutdown:', error);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (this.domUpdateInterval) {
|
||||||
|
clearInterval(this.domUpdateInterval);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this.client) {
|
||||||
|
await this.stopScreencast();
|
||||||
|
await this.stopDOM();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (this.browser) {
|
||||||
|
await this.browser.close();
|
||||||
|
}
|
||||||
|
|
||||||
|
this.screenshotQueue = [];
|
||||||
|
//this.performanceMonitor.reset();
|
||||||
|
|
||||||
|
} catch (error) {
|
||||||
|
logger.error('Error during browser shutdown:', error);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private async optimizeScreenshot(screenshot: Buffer): Promise<Buffer> {
|
private async optimizeScreenshot(screenshot: Buffer): Promise<Buffer> {
|
||||||
@@ -2443,10 +1770,9 @@ export class RemoteBrowser {
|
|||||||
const page = this.currentPage?.context().pages()[tabIndex];
|
const page = this.currentPage?.context().pages()[tabIndex];
|
||||||
if (page) {
|
if (page) {
|
||||||
await this.stopScreencast();
|
await this.stopScreencast();
|
||||||
|
await this.stopDOM();
|
||||||
this.currentPage = page;
|
this.currentPage = page;
|
||||||
|
|
||||||
await this.currentPage.addInitScript({ path: './server/src/browser-management/classes/rrweb-bundle.js' });
|
|
||||||
|
|
||||||
await this.setupPageEventListeners(this.currentPage);
|
await this.setupPageEventListeners(this.currentPage);
|
||||||
|
|
||||||
//await this.currentPage.setViewportSize({ height: 400, width: 900 })
|
//await this.currentPage.setViewportSize({ height: 400, width: 900 })
|
||||||
@@ -2479,8 +1805,6 @@ export class RemoteBrowser {
|
|||||||
await this.currentPage?.close();
|
await this.currentPage?.close();
|
||||||
this.currentPage = newPage;
|
this.currentPage = newPage;
|
||||||
if (this.currentPage) {
|
if (this.currentPage) {
|
||||||
await this.currentPage.addInitScript({ path: './server/src/browser-management/classes/rrweb-bundle.js' });
|
|
||||||
|
|
||||||
await this.setupPageEventListeners(this.currentPage);
|
await this.setupPageEventListeners(this.currentPage);
|
||||||
|
|
||||||
this.client = await this.currentPage.context().newCDPSession(this.currentPage);
|
this.client = await this.currentPage.context().newCDPSession(this.currentPage);
|
||||||
|
|||||||
Reference in New Issue
Block a user