Merge pull request #751 from getmaxun/reclag-fix

fix: highlighting gets stuck on heavy sites
This commit is contained in:
Karishma Shukla
2025-08-25 19:35:19 +05:30
committed by GitHub

View File

@@ -109,6 +109,11 @@ class ClientSelectorGenerator {
private getList: boolean = false; private getList: boolean = false;
private paginationMode: boolean = false; private paginationMode: boolean = false;
private pathCache = new WeakMap<HTMLElement, string | null>();
private descendantsCache = new WeakMap<HTMLElement, HTMLElement[]>();
private meaningfulCache = new WeakMap<HTMLElement, boolean>();
private selectorCache = new Map<string, string[]>();
private elementGroups: Map<HTMLElement, ElementGroup> = new Map(); private elementGroups: Map<HTMLElement, ElementGroup> = new Map();
private groupedElements: Set<HTMLElement> = new Set(); private groupedElements: Set<HTMLElement> = new Set();
private lastAnalyzedDocument: Document | null = null; private lastAnalyzedDocument: Document | null = null;
@@ -397,7 +402,22 @@ class ClientSelectorGenerator {
this.lastAnalyzedDocument = iframeDoc; this.lastAnalyzedDocument = iframeDoc;
// Get all visible elements INCLUDING shadow DOM // Get all visible elements INCLUDING shadow DOM
const allElements = this.getAllVisibleElementsWithShadow(iframeDoc); let allElements = this.getAllVisibleElementsWithShadow(iframeDoc);
if (this.getList === true && this.listSelector === "") {
const dialogElements = this.findAllDialogElements(iframeDoc);
if (dialogElements.length > 0) {
// Check if dialogs contain significant content worth analyzing
const dialogContentElements = this.getElementsFromDialogs(dialogElements);
// Only switch to dialog-focused analysis if dialogs have substantial content
if (dialogContentElements.length > 5) {
allElements = dialogContentElements;
}
}
}
const processedInTables = new Set<HTMLElement>(); const processedInTables = new Set<HTMLElement>();
// 1. Specifically find and group rows within each table, bypassing normal similarity checks. // 1. Specifically find and group rows within each table, bypassing normal similarity checks.
@@ -517,16 +537,42 @@ class ClientSelectorGenerator {
return meaningfulChildren; return meaningfulChildren;
} }
/**
* Check if element has meaningful content for extraction (cached version)
*/
private isMeaningfulElementCached(element: HTMLElement): boolean {
if (this.meaningfulCache.has(element)) {
return this.meaningfulCache.get(element)!;
}
const result = this.isMeaningfulElement(element);
this.meaningfulCache.set(element, result);
return result;
}
/** /**
* Check if element has meaningful content for extraction * Check if element has meaningful content for extraction
*/ */
private isMeaningfulElement(element: HTMLElement): boolean { private isMeaningfulElement(element: HTMLElement): boolean {
const tagName = element.tagName.toLowerCase(); const tagName = element.tagName.toLowerCase();
// Fast path for common meaningful elements
if (["a", "img", "input", "button", "select"].includes(tagName)) {
return true;
}
const text = (element.textContent || "").trim(); const text = (element.textContent || "").trim();
const hasHref = element.hasAttribute("href"); const hasHref = element.hasAttribute("href");
const hasSrc = element.hasAttribute("src"); const hasSrc = element.hasAttribute("src");
// Quick checks first
if (text.length > 0 || hasHref || hasSrc) {
return true;
}
const isCustomElement = tagName.includes("-"); const isCustomElement = tagName.includes("-");
// For custom elements, be more lenient about what's considered meaningful
if (isCustomElement) { if (isCustomElement) {
const hasChildren = element.children.length > 0; const hasChildren = element.children.length > 0;
const hasSignificantAttributes = Array.from(element.attributes).some( const hasSignificantAttributes = Array.from(element.attributes).some(
@@ -534,9 +580,6 @@ class ClientSelectorGenerator {
); );
return ( return (
text.length > 0 ||
hasHref ||
hasSrc ||
hasChildren || hasChildren ||
hasSignificantAttributes || hasSignificantAttributes ||
element.hasAttribute("role") || element.hasAttribute("role") ||
@@ -544,12 +587,7 @@ class ClientSelectorGenerator {
); );
} }
return ( return false;
text.length > 0 ||
hasHref ||
hasSrc ||
["a", "img", "input", "button", "select"].includes(tagName)
);
} }
/** /**
@@ -2457,6 +2495,13 @@ class ClientSelectorGenerator {
parentSelector: string parentSelector: string
): string[] => { ): string[] => {
try { try {
const cacheKey = `${parentSelector}_${iframeDoc.location?.href || 'doc'}`;
if (this.selectorCache.has(cacheKey)) {
return this.selectorCache.get(cacheKey)!;
}
this.pathCache = new WeakMap<HTMLElement, string | null>();
// Use XPath evaluation to find parent elements // Use XPath evaluation to find parent elements
let parentElements: HTMLElement[] = this.evaluateXPath( let parentElements: HTMLElement[] = this.evaluateXPath(
parentSelector, parentSelector,
@@ -2468,9 +2513,17 @@ class ClientSelectorGenerator {
return []; return [];
} }
const allChildSelectors = new Set<string>(); if (parentElements.length > 10) {
parentElements = parentElements.slice(0, 10);
}
const allChildSelectors = new Set<string>();
const processedParents = new Set<HTMLElement>();
for (const parentElement of parentElements) {
if (processedParents.has(parentElement)) continue;
processedParents.add(parentElement);
parentElements.forEach((parentElement) => {
const otherListElements = parentElements.filter( const otherListElements = parentElements.filter(
(el) => el !== parentElement (el) => el !== parentElement
); );
@@ -2481,12 +2534,15 @@ class ClientSelectorGenerator {
iframeDoc, iframeDoc,
otherListElements otherListElements
); );
childSelectors.forEach((selector) => allChildSelectors.add(selector));
});
// Convert Set back to array and sort for consistency for (const selector of childSelectors) {
const childSelectors = Array.from(allChildSelectors).sort(); allChildSelectors.add(selector);
return childSelectors; }
}
const result = Array.from(allChildSelectors).sort();
this.selectorCache.set(cacheKey, result);
return result;
} catch (error) { } catch (error) {
console.error("Error in getChildSelectors:", error); console.error("Error in getChildSelectors:", error);
return []; return [];
@@ -2494,39 +2550,81 @@ class ClientSelectorGenerator {
}; };
private getAllDescendantsIncludingShadow( private getAllDescendantsIncludingShadow(
parentElement: HTMLElement, parentElement: HTMLElement
maxDepth: number = 20
): HTMLElement[] { ): HTMLElement[] {
const allDescendants: HTMLElement[] = []; if (this.descendantsCache.has(parentElement)) {
return this.descendantsCache.get(parentElement)!;
}
const meaningfulDescendants: HTMLElement[] = [];
const queue: HTMLElement[] = [parentElement];
const visited = new Set<HTMLElement>(); const visited = new Set<HTMLElement>();
visited.add(parentElement);
const traverse = (element: HTMLElement, currentDepth: number) => { const MAX_MEANINGFUL_ELEMENTS = 300;
if (currentDepth >= maxDepth || visited.has(element)) { const MAX_NODES_TO_CHECK = 1200;
return; const MAX_DEPTH = 12;
} let nodesChecked = 0;
visited.add(element);
if (element !== parentElement) { let adjustedMaxDepth = MAX_DEPTH;
allDescendants.push(element); const elementDensityThreshold = 50;
const depths: number[] = [0];
let queueIndex = 0;
while (queueIndex < queue.length) {
const element = queue[queueIndex];
const currentDepth = depths[queueIndex];
queueIndex++;
nodesChecked++;
if (currentDepth <= 3 && meaningfulDescendants.length > elementDensityThreshold) {
adjustedMaxDepth = Math.max(6, adjustedMaxDepth - 2);
} }
// Traverse light DOM children if (
const children = Array.from(element.children) as HTMLElement[]; nodesChecked > MAX_NODES_TO_CHECK ||
for (const child of children) { meaningfulDescendants.length >= MAX_MEANINGFUL_ELEMENTS ||
traverse(child, currentDepth + 1); currentDepth > adjustedMaxDepth
) {
break;
} }
// Traverse shadow DOM if it exists if (element !== parentElement && this.isMeaningfulElementCached(element)) {
if (element.shadowRoot) { meaningfulDescendants.push(element);
const shadowChildren = Array.from(element.shadowRoot.children) as HTMLElement[]; }
for (const shadowChild of shadowChildren) {
traverse(shadowChild, currentDepth + 1); if (currentDepth >= adjustedMaxDepth) {
continue;
}
const children = element.children;
const childLimit = Math.min(children.length, 30);
for (let i = 0; i < childLimit; i++) {
const child = children[i] as HTMLElement;
if (!visited.has(child)) {
visited.add(child);
queue.push(child);
depths.push(currentDepth + 1);
} }
} }
};
traverse(parentElement, 0); if (element.shadowRoot && currentDepth < adjustedMaxDepth - 1) {
return allDescendants; const shadowChildren = element.shadowRoot.children;
const shadowLimit = Math.min(shadowChildren.length, 20);
for (let i = 0; i < shadowLimit; i++) {
const child = shadowChildren[i] as HTMLElement;
if (!visited.has(child)) {
visited.add(child);
queue.push(child);
depths.push(currentDepth + 1);
}
}
}
}
this.descendantsCache.set(parentElement, meaningfulDescendants);
return meaningfulDescendants;
} }
private generateOptimizedChildXPaths( private generateOptimizedChildXPaths(
@@ -2541,24 +2639,35 @@ class ClientSelectorGenerator {
// Get all meaningful descendants (not just direct children) // Get all meaningful descendants (not just direct children)
const allDescendants = this.getAllDescendantsIncludingShadow(parentElement); const allDescendants = this.getAllDescendantsIncludingShadow(parentElement);
allDescendants.forEach((descendant, i) => { const batchSize = 25;
if (processedElements.has(descendant)) return; for (let i = 0; i < allDescendants.length; i += batchSize) {
processedElements.add(descendant); const batch = allDescendants.slice(i, i + batchSize);
if (!this.isMeaningfulElement(descendant)) return; for (const descendant of batch) {
if (processedElements.has(descendant)) continue;
processedElements.add(descendant);
const absolutePath = this.buildOptimizedAbsoluteXPath( const absolutePath = this.buildOptimizedAbsoluteXPath(
descendant, descendant,
listSelector, listSelector,
parentElement, parentElement,
document, document,
otherListElements otherListElements
); );
if (absolutePath) { if (absolutePath) {
selectors.push(absolutePath); selectors.push(absolutePath);
}
if (selectors.length >= 250) {
break;
}
} }
});
if (selectors.length >= 250) {
break;
}
}
return [...new Set(selectors)]; return [...new Set(selectors)];
} }
@@ -2736,6 +2845,10 @@ class ClientSelectorGenerator {
rootElement: HTMLElement, rootElement: HTMLElement,
otherListElements: HTMLElement[] = [] otherListElements: HTMLElement[] = []
): string | null { ): string | null {
if (this.pathCache.has(targetElement)) {
return this.pathCache.get(targetElement)!;
}
if ( if (
!this.elementContains(rootElement, targetElement) || !this.elementContains(rootElement, targetElement) ||
targetElement === rootElement targetElement === rootElement
@@ -2788,45 +2901,64 @@ class ClientSelectorGenerator {
} }
if (current !== rootElement) { if (current !== rootElement) {
this.pathCache.set(targetElement, null);
return null; return null;
} }
return pathParts.length > 0 ? "/" + pathParts.join("/") : null; const result = pathParts.length > 0 ? "/" + pathParts.join("/") : null;
this.pathCache.set(targetElement, result);
return result;
} }
private getCommonClassesAcrossLists( private getCommonClassesAcrossLists(
targetElement: HTMLElement, targetElement: HTMLElement,
otherListElements: HTMLElement[] otherListElements: HTMLElement[]
): string[] { ): string[] {
if (otherListElements.length === 0) {
return this.normalizeClasses(targetElement.classList).split(" ").filter(Boolean);
}
const targetClasses = this.normalizeClasses(targetElement.classList).split(" ").filter(Boolean); const targetClasses = this.normalizeClasses(targetElement.classList).split(" ").filter(Boolean);
const otherListsKey = otherListElements.map(el => `${el.tagName}-${el.className}`).sort().join('|'); if (targetClasses.length === 0) {
const cacheKey = `${targetElement.tagName}-${targetClasses.sort().join(',')}-${otherListsKey}`; return [];
}
const cacheKey = `${targetElement.tagName}_${targetClasses.join(',')}_${otherListElements.length}`;
if (this.classCache.has(cacheKey)) { if (this.classCache.has(cacheKey)) {
return this.classCache.get(cacheKey)!; return this.classCache.get(cacheKey)!;
} }
if (otherListElements.length === 0) { const maxElementsToCheck = 100;
this.classCache.set(cacheKey, targetClasses); let checkedElements = 0;
return targetClasses; const similarElements: HTMLElement[] = [];
}
const similarElements = otherListElements.flatMap(listEl => for (const listEl of otherListElements) {
this.getAllDescendantsIncludingShadow(listEl).filter(child => if (checkedElements >= maxElementsToCheck) break;
child.tagName === targetElement.tagName
) const descendants = this.getAllDescendantsIncludingShadow(listEl);
); for (const child of descendants) {
if (checkedElements >= maxElementsToCheck) break;
if (child.tagName === targetElement.tagName) {
similarElements.push(child);
checkedElements++;
}
}
}
if (similarElements.length === 0) { if (similarElements.length === 0) {
this.classCache.set(cacheKey, targetClasses); this.classCache.set(cacheKey, targetClasses);
return targetClasses; return targetClasses;
} }
const targetClassSet = new Set(targetClasses);
const exactMatches = similarElements.filter(el => { const exactMatches = similarElements.filter(el => {
const elClasses = this.normalizeClasses(el.classList).split(" ").filter(Boolean); const elClasses = this.normalizeClasses(el.classList).split(" ").filter(Boolean);
return targetClasses.length === elClasses.length && if (elClasses.length !== targetClasses.length) return false;
targetClasses.every(cls => elClasses.includes(cls)); return elClasses.every(cls => targetClassSet.has(cls));
}); });
if (exactMatches.length > 0) { if (exactMatches.length > 0) {
@@ -3913,6 +4045,168 @@ class ClientSelectorGenerator {
return depth; return depth;
} }
/**
* Find dialog element in the elements array
*/
private findDialogElement(elements: HTMLElement[]): HTMLElement | null {
let dialogElement = elements.find((el) => el.getAttribute("role") === "dialog");
if (!dialogElement) {
dialogElement = elements.find((el) => el.tagName.toLowerCase() === "dialog");
}
if (!dialogElement) {
dialogElement = elements.find((el) => {
const classList = el.classList.toString().toLowerCase();
const id = (el.id || "").toLowerCase();
return (
classList.includes("modal") ||
classList.includes("dialog") ||
classList.includes("popup") ||
classList.includes("overlay") ||
id.includes("modal") ||
id.includes("dialog") ||
id.includes("popup")
);
});
}
return dialogElement || null;
}
/**
* Find the deepest element within a dialog
*/
private findDeepestInDialog(
dialogElements: HTMLElement[],
dialogElement: HTMLElement
): HTMLElement | null {
if (!dialogElements.length) return null;
if (dialogElements.length === 1) return dialogElements[0];
let deepestElement = dialogElements[0];
let maxDepth = 0;
for (const element of dialogElements) {
let depth = 0;
let current = element;
// Calculate depth within the dialog context
while (
current &&
current.parentElement &&
current !== dialogElement.parentElement
) {
depth++;
current = current.parentElement;
}
if (depth > maxDepth) {
maxDepth = depth;
deepestElement = element;
}
}
return deepestElement;
}
/**
* Find all dialog elements in the document
*/
private findAllDialogElements(doc: Document): HTMLElement[] {
const dialogElements: HTMLElement[] = [];
const allElements = Array.from(doc.querySelectorAll("*")) as HTMLElement[];
for (const element of allElements) {
if (element.getAttribute("role") === "dialog") {
dialogElements.push(element);
continue;
}
if (element.tagName.toLowerCase() === "dialog") {
dialogElements.push(element);
continue;
}
}
return dialogElements;
}
/**
* Get all visible elements from within dialog elements
*/
private getElementsFromDialogs(dialogElements: HTMLElement[]): HTMLElement[] {
const elements: HTMLElement[] = [];
const visited = new Set<HTMLElement>();
for (const dialog of dialogElements) {
const dialogChildren = Array.from(dialog.querySelectorAll("*")).filter(
(el) => {
const rect = el.getBoundingClientRect();
return rect.width > 0 && rect.height > 0;
}
) as HTMLElement[];
// Add dialog itself if it's visible
const dialogRect = dialog.getBoundingClientRect();
if (dialogRect.width > 0 && dialogRect.height > 0 && !visited.has(dialog)) {
visited.add(dialog);
elements.push(dialog);
}
// Add all visible children
dialogChildren.forEach((element) => {
if (!visited.has(element)) {
visited.add(element);
elements.push(element);
// Traverse shadow DOM if it exists within dialog
if (element.shadowRoot) {
const shadowElements = this.getElementsFromShadowRoot(element.shadowRoot);
shadowElements.forEach(shadowEl => {
if (!visited.has(shadowEl)) {
visited.add(shadowEl);
elements.push(shadowEl);
}
});
}
}
});
}
return elements;
}
/**
* Get elements from shadow root (helper for dialog analysis)
*/
private getElementsFromShadowRoot(shadowRoot: ShadowRoot): HTMLElement[] {
const elements: HTMLElement[] = [];
try {
const shadowChildren = Array.from(shadowRoot.querySelectorAll("*")).filter(
(el) => {
const rect = el.getBoundingClientRect();
return rect.width > 0 && rect.height > 0;
}
) as HTMLElement[];
shadowChildren.forEach((element) => {
elements.push(element);
// Recursively traverse nested shadow DOMs
if (element.shadowRoot) {
const nestedShadowElements = this.getElementsFromShadowRoot(element.shadowRoot);
elements.push(...nestedShadowElements);
}
});
} catch (error) {
console.warn("Could not access shadow root:", error);
}
return elements;
}
/** /**
* Clean up when component unmounts or mode changes * Clean up when component unmounts or mode changes
@@ -3926,6 +4220,10 @@ class ClientSelectorGenerator {
this.spatialIndex.clear(); this.spatialIndex.clear();
this.lastCachedDocument = null; this.lastCachedDocument = null;
this.classCache.clear(); this.classCache.clear();
this.selectorCache.clear();
this.pathCache = new WeakMap<HTMLElement, string | null>();
this.descendantsCache = new WeakMap<HTMLElement, HTMLElement[]>();
this.meaningfulCache = new WeakMap<HTMLElement, boolean>();
} }
// Update generateSelector to use instance variables // Update generateSelector to use instance variables