/** * Page Analyzer for pagination auto-detection, selector generation and grouping */ (function () { 'use strict'; /** * Helper function to evaluate both CSS and XPath selectors * Returns array of matching elements */ function evaluateSelector(selector, doc) { try { const isXPath = selector.startsWith('//') || selector.startsWith('(//'); if (isXPath) { const result = doc.evaluate( selector, doc, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null ); const elements = []; for (let i = 0; i < result.snapshotLength; i++) { const node = result.snapshotItem(i); if (node && node.nodeType === Node.ELEMENT_NODE) { elements.push(node); } } return elements; } else { return Array.from(doc.querySelectorAll(selector)); } } catch (err) { return []; } } /** * Convert CSS selector to XPath */ function cssToXPath(cssSelector) { if (cssSelector.startsWith('//') || cssSelector.startsWith('/')) { return cssSelector; } try { let xpath = ''; const parts = cssSelector.split(/\s+(?![^[]*])/); for (let i = 0; i < parts.length; i++) { const part = parts[i].trim(); if (!part) continue; if (part === '>') continue; const xpathPart = convertCssPart(part); if (i === 0) { xpath = '//' + xpathPart; } else if (parts[i - 1] === '>') { xpath += '/' + xpathPart; } else { xpath += '//' + xpathPart; } } return xpath || `//*`; } catch (error) { return `//*`; } } /** * Convert a single CSS selector part to XPath */ function convertCssPart(cssPart) { const tagMatch = cssPart.match(/^([a-zA-Z][\w-]*|\*)/); const tag = tagMatch ? tagMatch[1] : '*'; const predicates = []; const idMatch = cssPart.match(/#([\w-]+)/); if (idMatch) { predicates.push(`@id='${idMatch[1]}'`); } const classMatches = cssPart.match(/\.((?:\\.|[^.#[\s])+)/g); if (classMatches) { classMatches.forEach(cls => { let className = cls.substring(1).replace(/\\/g, ''); predicates.push(`contains(@class, '${className}')`); }); } const attrMatches = cssPart.match(/\[([^\]]+)\]/g); if (attrMatches) { attrMatches.forEach(attr => { const content = attr.slice(1, -1); const eqMatch = content.match(/([^=]+)="([^"]+)"/); if (eqMatch) { predicates.push(`@${eqMatch[1]}='${eqMatch[2]}'`); } else { predicates.push(`@${content}`); } }); } if (predicates.length > 0) { return `${tag}[${predicates.join(' and ')}]`; } return tag; } /** * Main entry point for SDK - auto-converts CSS to XPath */ window.autoDetectListFields = function (selector) { try { let xpathSelector = cssToXPath(selector); const testElements = evaluateXPath(xpathSelector, document); if (testElements.length === 0) { console.error('No elements matched the XPath selector!'); return { fields: {}, listSelector: xpathSelector, listFallbackSelector: null, error: 'Selector did not match any elements on the page' }; } if (testElements.length > 0 && !xpathSelector.includes('count(*)')) { const childCounts = testElements.slice(0, 5).map(el => el.children.length); const uniqueCounts = [...new Set(childCounts)]; if (uniqueCounts.length > 1 && childCounts.filter(c => c === 1).length > childCounts.length / 2) { if (xpathSelector.includes('[') && xpathSelector.endsWith(']')) { xpathSelector = xpathSelector.slice(0, -1) + ' and count(*)=1]'; } else if (xpathSelector.includes('[')) { xpathSelector = xpathSelector.replace(/\]$/, ' and count(*)=1]'); } else { const lastSlash = xpathSelector.lastIndexOf('/'); if (lastSlash !== -1) { const beforeTag = xpathSelector.substring(0, lastSlash + 1); const tag = xpathSelector.substring(lastSlash + 1); xpathSelector = beforeTag + tag + '[count(*)=1]'; } else { xpathSelector = xpathSelector + '[count(*)=1]'; } } } } const fields = window.getChildSelectors(xpathSelector); return { fields: fields, listSelector: xpathSelector, listFallbackSelector: null, error: Object.keys(fields).length === 0 ? 'No valid fields could be auto-detected from the list items' : null }; } catch (error) { console.error('Exception:', error); return { fields: {}, error: error.message || 'Failed to auto-detect fields' }; } }; const pathCache = new WeakMap(); const descendantsCache = new WeakMap(); const meaningfulCache = new WeakMap(); const classCache = new Map(); /** * Main entry point - returns detected fields for a list selector */ window.getChildSelectors = function (parentSelector) { try { const parentElements = evaluateXPath(parentSelector, document); if (parentElements.length === 0) { console.error('No parent elements found!'); return {}; } const maxItems = 10; const limitedParents = parentElements.slice(0, Math.min(maxItems, parentElements.length)); const allChildSelectors = []; for (let i = 0; i < limitedParents.length; i++) { const parent = limitedParents[i]; const otherListElements = limitedParents.filter((_, index) => index !== i); const selectors = generateOptimizedChildXPaths( parent, parentSelector, otherListElements ); allChildSelectors.push(...selectors); } const childSelectors = Array.from(new Set(allChildSelectors)).sort() const fields = createFieldsFromSelectors( childSelectors, limitedParents, parentSelector ); return fields; } catch (error) { console.error('Exception:', error); return {}; } }; /** * Generate optimized XPath selectors for all meaningful children */ function generateOptimizedChildXPaths(parentElement, listSelector, otherListElements) { const selectors = []; const processedElements = new Set(); const allDescendants = getAllDescendantsIncludingShadow(parentElement); const batchSize = 25; for (let i = 0; i < allDescendants.length; i += batchSize) { const batch = allDescendants.slice(i, i + batchSize); for (const descendant of batch) { if (processedElements.has(descendant)) continue; processedElements.add(descendant); const xpath = buildOptimizedAbsoluteXPath( descendant, listSelector, parentElement, otherListElements ); if (xpath.primary) { selectors.push({ primary: xpath.primary, fallback: xpath.fallback, element: descendant }); } if (selectors.length >= 250) { break; } } if (selectors.length >= 250) { break; } } return selectors; } /** * Get all meaningful descendants including shadow DOM */ function getAllDescendantsIncludingShadow(parentElement) { if (descendantsCache.has(parentElement)) { return descendantsCache.get(parentElement); } const meaningfulDescendants = []; const queue = [parentElement]; const visited = new Set(); visited.add(parentElement); const MAX_MEANINGFUL_ELEMENTS = 300; const MAX_NODES_TO_CHECK = 1200; const MAX_DEPTH = 20; let nodesChecked = 0; const depths = [0]; let queueIndex = 0; while (queueIndex < queue.length) { const element = queue[queueIndex]; const currentDepth = depths[queueIndex]; queueIndex++; nodesChecked++; if ( nodesChecked > MAX_NODES_TO_CHECK || meaningfulDescendants.length >= MAX_MEANINGFUL_ELEMENTS || currentDepth > MAX_DEPTH ) { break; } if (element !== parentElement && isMeaningfulElement(element)) { meaningfulDescendants.push(element); } if (currentDepth >= MAX_DEPTH) { continue; } // Process light DOM children const children = element.children; const childLimit = Math.min(children.length, 30); for (let i = 0; i < childLimit; i++) { const child = children[i]; if (!visited.has(child)) { visited.add(child); queue.push(child); depths.push(currentDepth + 1); } } // Process shadow DOM if (element.shadowRoot && currentDepth < MAX_DEPTH - 1) { const shadowChildren = element.shadowRoot.children; const shadowLimit = Math.min(shadowChildren.length, 20); for (let i = 0; i < shadowLimit; i++) { const child = shadowChildren[i]; if (!visited.has(child)) { visited.add(child); queue.push(child); depths.push(currentDepth + 1); } } } } descendantsCache.set(parentElement, meaningfulDescendants); return meaningfulDescendants; } /** * Check if element has meaningful content for extraction */ function isMeaningfulElement(element) { if (meaningfulCache.has(element)) { return meaningfulCache.get(element); } const tagName = element.tagName.toLowerCase(); if (tagName === 'img' && element.hasAttribute('src')) { meaningfulCache.set(element, true); return true; } if (tagName === 'a' && element.hasAttribute('href')) { meaningfulCache.set(element, true); return true; } const text = (element.textContent || '').trim(); const hasVisibleText = text.length > 0; if (hasVisibleText || element.querySelector('svg')) { meaningfulCache.set(element, true); return true; } if (element.children.length > 0) { meaningfulCache.set(element, false); return false; } meaningfulCache.set(element, false); return false; } /** * Build optimized absolute XPath */ function buildOptimizedAbsoluteXPath(targetElement, listSelector, listElement, otherListElements) { try { let primary = null; const pathFromList = getOptimizedStructuralPath( targetElement, listElement, otherListElements ); if (pathFromList) { primary = listSelector + pathFromList; } const fallback = generateMandatoryChildFallbackXPath(targetElement, listElement); return { primary, fallback }; } catch (error) { const fallback = generateMandatoryChildFallbackXPath(targetElement, listElement); return { primary: null, fallback }; } } /** * Get optimized structural path from element to root */ function getOptimizedStructuralPath(targetElement, rootElement, otherListElements) { if (pathCache.has(targetElement)) { return pathCache.get(targetElement); } if (!elementContains(rootElement, targetElement) || targetElement === rootElement) { return null; } const pathParts = []; let current = targetElement; let pathDepth = 0; const MAX_PATH_DEPTH = 20; while (current && current !== rootElement && pathDepth < MAX_PATH_DEPTH) { const classes = getCommonClassesAcrossLists(current, otherListElements); const hasConflictingElement = classes.length > 0 && rootElement ? queryElementsInScope(rootElement, current.tagName.toLowerCase()) .filter(el => el !== current) .some(el => classes.every(cls => normalizeClasses(el.classList).split(' ').includes(cls) )) : false; const pathPart = generateOptimizedStructuralStep( current, rootElement, hasConflictingElement, otherListElements ); if (pathPart) { pathParts.unshift(pathPart); } current = current.parentElement || ((current.getRootNode()).host); pathDepth++; } if (current !== rootElement) { pathCache.set(targetElement, null); return null; } const result = pathParts.length > 0 ? '/' + pathParts.join('/') : null; pathCache.set(targetElement, result); return result; } /** * Generate optimized structural step for XPath */ function generateOptimizedStructuralStep(element, rootElement, addPositionToAll, otherListElements) { const tagName = element.tagName.toLowerCase(); const parent = element.parentElement || ((element.getRootNode()).host); if (!parent) { return tagName; } const classes = getCommonClassesAcrossLists(element, otherListElements); if (classes.length > 0 && !addPositionToAll) { const classSelector = classes .map(cls => `contains(@class, '${cls}')`) .join(' and '); const hasConflictingElement = rootElement ? queryElementsInScope(rootElement, element.tagName.toLowerCase()) .filter(el => el !== element) .some(el => classes.every(cls => normalizeClasses(el.classList).split(' ').includes(cls) )) : false; if (!hasConflictingElement) { return `${tagName}[${classSelector}]`; } else { const position = getSiblingPosition(element, parent); return `${tagName}[${classSelector}][${position}]`; } } if (!addPositionToAll) { const meaningfulAttrs = ['role', 'type']; for (const attrName of meaningfulAttrs) { if (element.hasAttribute(attrName)) { const value = element.getAttribute(attrName).replace(/'/g, "\\'"); const isCommon = isAttributeCommonAcrossLists( element, attrName, value, otherListElements ); if (isCommon) { return `${tagName}[@${attrName}='${value}']`; } } } } const position = getSiblingPosition(element, parent); if (addPositionToAll || classes.length === 0) { return `${tagName}[${position}]`; } return tagName; } /** * Get common classes across list items */ function getCommonClassesAcrossLists(targetElement, otherListElements) { if (otherListElements.length === 0) { return normalizeClasses(targetElement.classList).split(' ').filter(Boolean); } const targetClasses = normalizeClasses(targetElement.classList).split(' ').filter(Boolean); if (targetClasses.length === 0) { return []; } const cacheKey = `${targetElement.tagName}_${targetClasses.join(',')}_${otherListElements.length}`; if (classCache.has(cacheKey)) { return classCache.get(cacheKey); } const targetClassSet = new Set(targetClasses); const similarElements = []; const maxElementsToCheck = 100; let checkedElements = 0; for (const listEl of otherListElements) { if (checkedElements >= maxElementsToCheck) break; const descendants = getAllDescendantsIncludingShadow(listEl); for (const child of descendants) { if (checkedElements >= maxElementsToCheck) break; if (child.tagName === targetElement.tagName) { similarElements.push(child); checkedElements++; } } } if (similarElements.length === 0) { classCache.set(cacheKey, targetClasses); return targetClasses; } // Fast exact match check const exactMatches = similarElements.filter(el => { const elClasses = normalizeClasses(el.classList).split(' ').filter(Boolean); if (elClasses.length !== targetClasses.length) return false; return elClasses.every(cls => targetClassSet.has(cls)); }); if (exactMatches.length > 0) { classCache.set(cacheKey, targetClasses); return targetClasses; } // Find common classes const commonClasses = []; for (const targetClass of targetClasses) { const existsInAllOtherLists = otherListElements.every(listEl => { const elementsInThisList = getAllDescendantsIncludingShadow(listEl).filter(child => child.tagName === targetElement.tagName ); return elementsInThisList.some(el => normalizeClasses(el.classList).split(' ').includes(targetClass) ); }); if (existsInAllOtherLists) { commonClasses.push(targetClass); } } classCache.set(cacheKey, commonClasses); return commonClasses; } /** * Normalize class names by removing dynamic parts */ function normalizeClasses(classList) { return Array.from(classList) .filter(cls => { return ( !cls.match(/\d{3,}|uuid|hash|id-|_\d+$/i) && !cls.startsWith('_ngcontent-') && !cls.startsWith('_nghost-') && !cls.match(/^ng-tns-c\d+-\d+$/) ); }) .sort() .join(' '); } /** * Check if attribute is common across lists */ function isAttributeCommonAcrossLists(targetElement, attrName, attrValue, otherListElements) { if (otherListElements.length === 0) { return true; } const targetPath = getElementPath(targetElement); for (const otherListElement of otherListElements) { const correspondingElement = findCorrespondingElement(otherListElement, targetPath); if (correspondingElement) { const otherValue = correspondingElement.getAttribute(attrName); if (otherValue !== attrValue) { return false; } } } return true; } /** * Get element path as indices */ function getElementPath(element) { const path = []; let current = element; while (current && current.parentElement) { const siblings = Array.from(current.parentElement.children); path.unshift(siblings.indexOf(current)); current = current.parentElement; } return path; } /** * Find corresponding element in another list */ function findCorrespondingElement(rootElement, path) { let current = rootElement; for (const index of path) { const children = Array.from(current.children); if (index >= children.length) { return null; } current = children[index]; } return current; } /** * Get sibling position */ function getSiblingPosition(element, parent) { const siblings = Array.from(parent.children || []).filter( child => child.tagName === element.tagName ); return siblings.indexOf(element) + 1; } /** * Query elements in scope (handles shadow DOM) */ function queryElementsInScope(rootElement, tagName) { if (rootElement.shadowRoot || isInShadowDOM(rootElement)) { return deepQuerySelectorAll(rootElement, tagName); } else { return Array.from(rootElement.querySelectorAll(tagName)); } } /** * Check if element is in shadow DOM */ function isInShadowDOM(element) { return element.getRootNode() instanceof ShadowRoot; } /** * Deep query selector for shadow DOM */ function deepQuerySelectorAll(root, selector) { const elements = []; function process(node) { if (node instanceof Element && node.matches(selector)) { elements.push(node); } for (const child of node.children) { process(child); } if (node instanceof HTMLElement && node.shadowRoot) { process(node.shadowRoot); } } process(root); return elements; } /** * Check if container contains element (works with shadow DOM) */ function elementContains(container, element) { if (container.contains(element)) { return true; } let current = element; while (current) { if (current === container) { return true; } current = current.parentElement || ((current.getRootNode()).host); } return false; } /** * Generate fallback XPath using data-mx-id */ function generateMandatoryChildFallbackXPath(childElement, parentElement) { try { const parentMxId = parentElement.getAttribute('data-mx-id'); const childMxId = childElement.getAttribute('data-mx-id'); if (!parentMxId) { return null; } const parentTagName = parentElement.tagName.toLowerCase(); const childTagName = childElement.tagName.toLowerCase(); if (childMxId) { return `//${parentTagName}[@data-mx-id='${parentMxId}']//${childTagName}[@data-mx-id='${childMxId}']`; } else { const pathElements = getMandatoryFallbackPath(childElement, parentElement); if (pathElements.length > 0) { const parentPath = `//${parentTagName}[@data-mx-id='${parentMxId}']`; const childPath = pathElements.join('/'); return `${parentPath}/${childPath}`; } } return null; } catch (error) { return null; } } /** * Build mandatory fallback path using data-mx-id */ function getMandatoryFallbackPath(targetElement, rootElement) { const pathParts = []; let current = targetElement; while (current && current !== rootElement && current.parentElement) { const mxId = current.getAttribute('data-mx-id'); const tagName = current.tagName.toLowerCase(); if (mxId) { pathParts.unshift(`${tagName}[@data-mx-id='${mxId}']`); } else { const position = Array.from(current.parentElement.children) .filter(child => child.tagName === current.tagName) .indexOf(current) + 1; pathParts.unshift(`${tagName}[${position}]`); } current = current.parentElement; } return pathParts; } /** * Evaluate XPath and return elements */ function evaluateXPath(xpath, contextNode) { try { const doc = contextNode instanceof ShadowRoot ? contextNode.host.ownerDocument : contextNode; const result = doc.evaluate( xpath, contextNode, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null ); const elements = []; for (let i = 0; i < result.snapshotLength; i++) { const node = result.snapshotItem(i); if (node && node.nodeType === Node.ELEMENT_NODE) { elements.push(node); } } return elements; } catch (error) { return []; } } /** * Create fields from selectors by evaluating them and extracting data */ function createFieldsFromSelectors(selectorObjects, listElements, parentSelector) { const candidates = []; for (const selectorObj of selectorObjects) { try { const elements = evaluateXPath(selectorObj.primary, document); if (elements.length === 0) continue; const element = elements[0]; const tagName = element.tagName.toLowerCase(); if (tagName === 'a') { const href = element.getAttribute('href'); const text = (element.textContent || '').trim(); if (text) { const textField = createFieldData(element, selectorObj.primary, 'innerText'); if (textField && textField.data) { candidates.push({ field: textField, element: element, position: getElementPosition(element) }); } } if (href && href !== '#' && !href.startsWith('javascript:')) { const hrefField = createFieldData(element, selectorObj.primary, 'href'); if (hrefField && hrefField.data) { candidates.push({ field: hrefField, element: element, position: getElementPosition(element) }); } } } else { const field = createFieldData(element, selectorObj.primary); if (field && field.data) { candidates.push({ field: field, element: element, position: getElementPosition(element) }); } } } catch (error) { } } const filtered = removeParentChildDuplicates(candidates); filtered.sort((a, b) => { if (Math.abs(a.position.y - b.position.y) > 5) { return a.position.y - b.position.y; } return a.position.x - b.position.x; }); return removeDuplicateContentAndFormat(filtered); } /** * Create field data from element */ function createFieldData(element, selector, forceAttribute) { const tagName = element.tagName.toLowerCase(); let data = ''; let attribute = forceAttribute || 'innerText'; if (forceAttribute) { if (forceAttribute === 'href') { data = element.getAttribute('href') || ''; } else if (forceAttribute === 'innerText') { data = (element.textContent || '').trim(); } } else if (tagName === 'img') { data = element.getAttribute('src') || ''; attribute = 'src'; } else if (tagName === 'a') { const href = element.getAttribute('href') || ''; const text = (element.textContent || '').trim(); if (href && href !== '#' && !href.startsWith('javascript:')) { data = href; attribute = 'href'; } else if (text) { data = text; attribute = 'innerText'; } } else { data = (element.textContent || '').trim(); attribute = 'innerText'; } if (!data) { return null; } const isShadow = element.getRootNode() instanceof ShadowRoot; return { data: data, selectorObj: { selector: selector, attribute: attribute, tag: tagName.toUpperCase(), isShadow: isShadow } }; } /** * Get element position */ function getElementPosition(element) { const rect = element.getBoundingClientRect(); return { x: rect.left, y: rect.top }; } /** * Remove parent-child duplicates */ function removeParentChildDuplicates(candidates) { const filtered = []; for (const candidate of candidates) { let shouldInclude = true; const tagName = candidate.element.tagName.toLowerCase(); for (const existing of filtered) { if (candidate.element.contains(existing.element)) { shouldInclude = false; break; } else if (existing.element.contains(candidate.element)) { const existingIndex = filtered.indexOf(existing); filtered.splice(existingIndex, 1); break; } } if (tagName === 'a' || tagName === 'img') { shouldInclude = true; } if (shouldInclude) { filtered.push(candidate); } } return filtered; } /** * Remove duplicate content and format for workflow */ function removeDuplicateContentAndFormat(candidates) { const finalFields = {}; const seenContent = new Set(); const seenSelectors = new Set(); let labelCounter = 1; for (const candidate of candidates) { const content = candidate.field.data.trim().toLowerCase(); const selectorKey = `${candidate.field.selectorObj.selector}::${candidate.field.selectorObj.attribute}`; if (!seenContent.has(content) && !seenSelectors.has(selectorKey)) { seenContent.add(content); seenSelectors.add(selectorKey); const fieldName = `Label ${labelCounter}`; finalFields[fieldName] = { selector: candidate.field.selectorObj.selector, attribute: candidate.field.selectorObj.attribute, tag: candidate.field.selectorObj.tag, isShadow: candidate.field.selectorObj.isShadow }; labelCounter++; } } return finalFields; } /** * Auto-detect pagination type and selector * Returns: { type: string, selector: string | null } * Types: 'scrollDown', 'scrollUp', 'clickNext', 'clickLoadMore', '' */ window.autoDetectPagination = function (listSelector, options) { try { const listElements = evaluateSelector(listSelector, document); if (listElements.length === 0) { return { type: '', selector: null, confidence: 'low', debug: 'No list elements found' }; } const listContainer = listElements[0]; const nextButtonPatterns = [ /next/i, /\bnext\s+page\b/i, /page\s+suivante/i, /siguiente/i, /weiter/i, />>|›|→|»|⟩/, /\bforward\b/i, /\bnewer\b/i, /\bolder\b/i ]; const loadMorePatterns = [ /load\s+more/i, /show\s+more/i, /view\s+more/i, /see\s+more/i, /more\s+results/i, /plus\s+de\s+résultats/i, /más\s+resultados/i, /weitere\s+ergebnisse/i ]; const prevButtonPatterns = [ /prev/i, /previous/i, /<<|‹|←|«/, /\bback\b/i ]; /** * Check if element text matches any pattern */ function matchesAnyPattern(text, patterns) { return patterns.some(pattern => pattern.test(text)); } /** * Get all clickable elements (buttons, links, etc.) */ function getClickableElements() { const clickables = []; const selectors = ['button', 'a', '[role="button"]', '[onclick]', '.btn', '.button']; for (const selector of selectors) { const elements = document.querySelectorAll(selector); clickables.push(...Array.from(elements)); } return [...new Set(clickables)]; } /** * Check if element is visible */ function isVisible(element) { const style = window.getComputedStyle(element); return style.display !== 'none' && style.visibility !== 'hidden' && style.opacity !== '0' && element.offsetWidth > 0 && element.offsetHeight > 0; } /** * Comprehensive selector generator based on @medv/finder algorithm * Generates multiple selector types and chains them for reliability */ function generatePaginationSelector(element) { try { element.scrollIntoView({ behavior: 'instant', block: 'center', inline: 'center' }); } catch (e) { } const rect = element.getBoundingClientRect(); const coordinates = { x: rect.left + rect.width / 2, y: rect.top + rect.height / 2 }; const result = getSelectors(document, coordinates); const selectorChain = []; if (result.primary) { if (result.primary.id) selectorChain.push(result.primary.id); if (result.primary.testIdSelector) selectorChain.push(result.primary.testIdSelector); if (result.primary.relSelector) selectorChain.push(result.primary.relSelector); if (result.primary.accessibilitySelector) selectorChain.push(result.primary.accessibilitySelector); if (result.primary.hrefSelector) selectorChain.push(result.primary.hrefSelector); if (result.primary.formSelector) selectorChain.push(result.primary.formSelector); if (result.primary.attrSelector) selectorChain.push(result.primary.attrSelector); if (result.primary.generalSelector) selectorChain.push(result.primary.generalSelector); } return selectorChain.length > 0 ? selectorChain.join(',') : element.tagName.toLowerCase(); } /** * Comprehensive selector generator (based on @medv/finder) * Supports shadow DOM, iframes, and multiple selector strategies */ function getSelectors(iframeDoc, coordinates) { try { // ===== FINDER ALGORITHM ===== // Based on @medv/finder by Anton Medvedev // https://github.com/antonmedv/finder/blob/master/finder.ts const Limit = { All: 0, Two: 1, One: 2 }; let config; let rootDocument; function finder(input, options) { if (input.nodeType !== Node.ELEMENT_NODE) { throw new Error("Can't generate CSS selector for non-element node type."); } if ('html' === input.tagName.toLowerCase()) { return 'html'; } const defaults = { root: iframeDoc.body, idName: function (name) { return true; }, className: function (name) { return true; }, tagName: function (name) { return true; }, attr: function (name, value) { return false; }, seedMinLength: 1, optimizedMinLength: 2, threshold: 900, maxNumberOfTries: 9000 }; config = Object.assign({}, defaults, options || {}); rootDocument = findRootDocument(config.root, defaults); let path = bottomUpSearch(input, Limit.All, function () { return bottomUpSearch(input, Limit.Two, function () { return bottomUpSearch(input, Limit.One); }); }); if (path) { const optimized = sort(optimize(path, input)); if (optimized.length > 0) { path = optimized[0]; } return selector(path); } else { throw new Error('Selector was not found.'); } } function findRootDocument(rootNode, defaults) { if (rootNode.nodeType === Node.DOCUMENT_NODE) { return rootNode; } if (rootNode === defaults.root) { return rootNode.ownerDocument; } return rootNode; } function bottomUpSearch(input, limit, fallback) { let path = null; let stack = []; let current = input; let i = 0; while (current && current !== config.root.parentElement) { let level = maybe(id(current)) || maybe.apply(null, attr(current)) || maybe.apply(null, classNames(current)) || maybe(tagName(current)) || [any()]; const nth = index(current); if (limit === Limit.All) { if (nth) { level = level.concat( level.filter(dispensableNth).map(function (node) { return nthChild(node, nth); }) ); } } else if (limit === Limit.Two) { level = level.slice(0, 1); if (nth) { level = level.concat( level.filter(dispensableNth).map(function (node) { return nthChild(node, nth); }) ); } } else if (limit === Limit.One) { const node = level[0]; level = level.slice(0, 1); if (nth && dispensableNth(node)) { level = [nthChild(node, nth)]; } } for (let j = 0; j < level.length; j++) { level[j].level = i; } stack.push(level); if (stack.length >= config.seedMinLength) { path = findUniquePath(stack, fallback); if (path) { break; } } current = current.parentElement; i++; } if (!path) { path = findUniquePath(stack, fallback); } return path; } function findUniquePath(stack, fallback) { const paths = sort(combinations(stack)); if (paths.length > config.threshold) { return fallback ? fallback() : null; } for (let i = 0; i < paths.length; i++) { if (unique(paths[i])) { return paths[i]; } } return null; } function selector(path) { let node = path[0]; let query = node.name; for (let i = 1; i < path.length; i++) { const level = path[i].level || 0; if (node.level === level - 1) { query = path[i].name + ' > ' + query; } else { query = path[i].name + ' ' + query; } node = path[i]; } return query; } function penalty(path) { return path.map(function (node) { return node.penalty; }) .reduce(function (acc, i) { return acc + i; }, 0); } function unique(path) { const elements = rootDocument.querySelectorAll(selector(path)); switch (elements.length) { case 0: throw new Error("Can't select any node with this selector: " + selector(path)); case 1: return true; default: return false; } } function id(input) { const elementId = input.getAttribute('id'); if (elementId && config.idName(elementId)) { return { name: '#' + cssesc(elementId, { isIdentifier: true }), penalty: 0 }; } return null; } function attr(input) { const attrs = Array.from(input.attributes).filter(function (attr) { return config.attr(attr.name, attr.value) && attr.name !== 'data-mx-id'; }); return attrs.map(function (attr) { let attrValue = attr.value; if (attr.name === 'href' && attr.value.includes('://')) { try { const url = new URL(attr.value); const siteOrigin = url.protocol + '//' + url.host; attrValue = attr.value.replace(siteOrigin, ''); } catch (e) { // Keep original if URL parsing fails } } return { name: '[' + cssesc(attr.name, { isIdentifier: true }) + '="' + cssesc(attrValue) + '"]', penalty: 0.5 }; }); } function classNames(input) { const names = Array.from(input.classList).filter(config.className); return names.map(function (name) { return { name: '.' + cssesc(name, { isIdentifier: true }), penalty: 1 }; }); } function tagName(input) { const name = input.tagName.toLowerCase(); if (config.tagName(name)) { return { name: name, penalty: 2 }; } return null; } function any() { return { name: '*', penalty: 3 }; } function index(input) { const parent = input.parentNode; if (!parent) { return null; } let child = parent.firstChild; if (!child) { return null; } let i = 0; while (child) { if (child.nodeType === Node.ELEMENT_NODE) { i++; } if (child === input) { break; } child = child.nextSibling; } return i; } function nthChild(node, i) { return { name: node.name + ':nth-child(' + i + ')', penalty: node.penalty + 1 }; } function dispensableNth(node) { return node.name !== 'html' && !node.name.startsWith('#'); } function maybe() { const args = Array.prototype.slice.call(arguments); const list = args.filter(notEmpty); if (list.length > 0) { return list; } return null; } function notEmpty(value) { return value !== null && value !== undefined; } function combinations(stack, path) { path = path || []; const results = []; function* generate(s, p) { if (s.length > 0) { for (let i = 0; i < s[0].length; i++) { yield* generate(s.slice(1), p.concat(s[0][i])); } } else { yield p; } } const gen = generate(stack, path); let next = gen.next(); while (!next.done) { results.push(next.value); next = gen.next(); } return results; } function sort(paths) { return Array.from(paths).sort(function (a, b) { return penalty(a) - penalty(b); }); } function* optimize(path, input, scope) { scope = scope || { counter: 0, visited: new Map() }; if (path.length > 2 && path.length > config.optimizedMinLength) { for (let i = 1; i < path.length - 1; i++) { if (scope.counter > config.maxNumberOfTries) { return; } scope.counter += 1; const newPath = path.slice(); newPath.splice(i, 1); const newPathKey = selector(newPath); if (scope.visited.has(newPathKey)) { continue; } try { if (unique(newPath) && same(newPath, input)) { yield newPath; scope.visited.set(newPathKey, true); yield* optimize(newPath, input, scope); } } catch (e) { continue; } } } } function same(path, input) { return rootDocument.querySelector(selector(path)) === input; } // ===== CSSESC UTILITY ===== const regexAnySingleEscape = /[ -,\.\/:-@\[-\^`\{-~]/; const regexSingleEscape = /[ -,\.\/:-@\[\]\^`\{-~]/; const regexExcessiveSpaces = /(^|\\+)?(\\[A-F0-9]{1,6})\x20(?![a-fA-F0-9\x20])/g; const defaultCssEscOptions = { escapeEverything: false, isIdentifier: false, quotes: 'single', wrap: false }; function cssesc(string, opt) { const options = Object.assign({}, defaultCssEscOptions, opt || {}); if (options.quotes != 'single' && options.quotes != 'double') { options.quotes = 'single'; } const quote = options.quotes == 'double' ? '"' : "'"; const isIdentifier = options.isIdentifier; const firstChar = string.charAt(0); let output = ''; let counter = 0; const length = string.length; while (counter < length) { const character = string.charAt(counter++); let codePoint = character.charCodeAt(0); let value = undefined; if (codePoint < 0x20 || codePoint > 0x7e) { if (codePoint >= 0xd800 && codePoint <= 0xdbff && counter < length) { const extra = string.charCodeAt(counter++); if ((extra & 0xfc00) == 0xdc00) { codePoint = ((codePoint & 0x3ff) << 10) + (extra & 0x3ff) + 0x10000; } else { counter--; } } value = '\\' + codePoint.toString(16).toUpperCase() + ' '; } else { if (options.escapeEverything) { if (regexAnySingleEscape.test(character)) { value = '\\' + character; } else { value = '\\' + codePoint.toString(16).toUpperCase() + ' '; } } else if (/[\t\n\f\r\x0B]/.test(character)) { value = '\\' + codePoint.toString(16).toUpperCase() + ' '; } else if ( character == '\\' || (!isIdentifier && ((character == '"' && quote == character) || (character == "'" && quote == character))) || (isIdentifier && regexSingleEscape.test(character)) ) { value = '\\' + character; } else { value = character; } } output += value; } if (isIdentifier) { if (/^-[-\d]/.test(output)) { output = '\\-' + output.slice(1); } else if (/\d/.test(firstChar)) { output = '\\3' + firstChar + ' ' + output.slice(1); } } output = output.replace(regexExcessiveSpaces, function ($0, $1, $2) { if ($1 && $1.length % 2) { return $0; } return ($1 || '') + $2; }); if (!isIdentifier && options.wrap) { return quote + output + quote; } return output; } // ===== ELEMENT DETECTION ===== function getDeepestElementFromPoint(x, y) { let elements = iframeDoc.elementsFromPoint(x, y); if (!elements || elements.length === 0) return null; // Check for dialog elements first const dialogElement = elements.find(function (el) { return el.getAttribute('role') === 'dialog'; }); if (dialogElement) { const dialogElements = elements.filter(function (el) { return el === dialogElement || dialogElement.contains(el); }); const findDeepestInDialog = function (elems) { if (!elems.length) return null; if (elems.length === 1) return elems[0]; let deepestElement = elems[0]; let maxDepth = 0; for (let i = 0; i < elems.length; i++) { let depth = 0; let current = elems[i]; while (current && current.parentElement && current !== dialogElement.parentElement) { depth++; current = current.parentElement; } if (depth > maxDepth) { maxDepth = depth; deepestElement = elems[i]; } } return deepestElement; }; return findDeepestInDialog(dialogElements); } // Standard deepest element detection const findDeepestElement = function (elems) { if (!elems.length) return null; if (elems.length === 1) return elems[0]; // Check for positioned overlays for (let i = 0; i < Math.min(3, elems.length); i++) { const element = elems[i]; const style = window.getComputedStyle(element); const zIndex = parseInt(style.zIndex) || 0; if ((style.position === 'fixed' || style.position === 'absolute') && zIndex > 50) { return element; } if (element.tagName === 'SVG' && i < 2) { return element; } } // Depth-based fallback let deepestElement = elems[0]; let maxDepth = 0; for (let i = 0; i < elems.length; i++) { let depth = 0; let current = elems[i]; while (current) { depth++; if (current.parentElement) { current = current.parentElement; } else { break; } } if (depth > maxDepth) { maxDepth = depth; deepestElement = elems[i]; } } return deepestElement; }; let deepestElement = findDeepestElement(elements); if (!deepestElement) return null; // Handle shadow DOM const traverseShadowDOM = function (element) { let current = element; let shadowRoot = current.shadowRoot; let deepest = current; let depth = 0; const MAX_SHADOW_DEPTH = 4; while (shadowRoot && depth < MAX_SHADOW_DEPTH) { const shadowElement = shadowRoot.elementFromPoint(x, y); if (!shadowElement || shadowElement === current) break; deepest = shadowElement; current = shadowElement; shadowRoot = current.shadowRoot; depth++; } return deepest; }; deepestElement = traverseShadowDOM(deepestElement); return deepestElement; } // ===== SELECTOR GENERATION ===== function genAttributeSet(element, attributes) { return new Set( attributes.filter(function (attr) { const attrValue = element.getAttribute(attr); return attrValue != null && attrValue.length > 0; }) ); } function isAttributesDefined(element, attributes) { return genAttributeSet(element, attributes).size > 0; } function genValidAttributeFilter(element, attributes) { const attrSet = genAttributeSet(element, attributes); return function (name) { return attrSet.has(name); }; } function genSelectorForAttributes(element, attributes) { let selector = null; try { if (attributes.includes('rel') && element.hasAttribute('rel')) { const relValue = element.getAttribute('rel'); return '[rel="' + relValue + '"]'; } selector = isAttributesDefined(element, attributes) ? finder(element, { idName: function () { return false; }, attr: genValidAttributeFilter(element, attributes) }) : null; } catch (e) { } return selector; } function isCharacterNumber(char) { return char && char.length === 1 && /[0-9]/.test(char); } function generateMandatoryCSSFallback(element) { const mxId = Math.floor(Math.random() * 10000).toString(); element.setAttribute('data-mx-id', mxId); return element.tagName.toLowerCase() + '[data-mx-id="' + mxId + '"]'; } function genSelectors(element) { if (element == null) { return null; } const href = element.getAttribute('href'); let generalSelector = null; try { generalSelector = finder(element); } catch (e) { } let attrSelector = null; try { attrSelector = finder(element, { attr: function () { return true; } }); } catch (e) { } const relSelector = genSelectorForAttributes(element, ['rel']); const hrefSelector = genSelectorForAttributes(element, ['href']); const formSelector = genSelectorForAttributes(element, ['name', 'placeholder', 'for']); const accessibilitySelector = genSelectorForAttributes(element, ['aria-label', 'alt', 'title']); const testIdSelector = genSelectorForAttributes(element, [ 'data-testid', 'data-test-id', 'data-testing', 'data-test', 'data-qa', 'data-cy' ]); let idSelector = null; try { const elementId = element.getAttribute('id'); idSelector = isAttributesDefined(element, ['id']) && !isCharacterNumber(elementId ? elementId[0] : '') ? finder(element, { attr: function (name) { return name === 'id'; } }) : null; } catch (e) { } return { id: idSelector, generalSelector: generalSelector, attrSelector: attrSelector, testIdSelector: testIdSelector, text: element.innerText, href: href || undefined, hrefSelector: hrefSelector, accessibilitySelector: accessibilitySelector, formSelector: formSelector, relSelector: relSelector, iframeSelector: null, shadowSelector: null }; } // Main execution const hoveredElement = getDeepestElementFromPoint(coordinates.x, coordinates.y); if (hoveredElement != null) { const parentElement = hoveredElement.parentElement; const element = (parentElement && parentElement.tagName === 'A') ? parentElement : hoveredElement; const generatedSelectors = genSelectors(element); return { primary: generatedSelectors }; } } catch (e) { } return { primary: null }; } /** * Check if element is near the list container */ function isNearList(element) { try { const listRect = listContainer.getBoundingClientRect(); const elementRect = element.getBoundingClientRect(); if (elementRect.top >= listRect.bottom && elementRect.top <= listRect.bottom + 500) { return true; } if (elementRect.bottom <= listRect.top && elementRect.bottom >= listRect.top - 500) { return true; } const verticalOverlap = !(elementRect.bottom < listRect.top || elementRect.top > listRect.bottom); if (verticalOverlap) { const horizontalDistance = Math.min( Math.abs(elementRect.left - listRect.right), Math.abs(elementRect.right - listRect.left) ); if (horizontalDistance < 200) { return true; } } return false; } catch (error) { return false; } } const clickableElements = getClickableElements(); let nextButton = null; let nextButtonScore = 0; const nextButtonCandidates = []; for (const element of clickableElements) { if (!isVisible(element)) continue; const text = (element.textContent || '').trim(); const ariaLabel = element.getAttribute('aria-label') || ''; const title = element.getAttribute('title') || ''; const combinedText = `${text} ${ariaLabel} ${title}`; let score = 0; const reasons = []; if (matchesAnyPattern(combinedText, nextButtonPatterns)) { score += 10; reasons.push('text match (+10)'); } if (isNearList(element)) { score += 5; reasons.push('near list (+5)'); } if (element.tagName === 'BUTTON') { score += 2; reasons.push('button tag (+2)'); } const className = element.className || ''; if (/pagination|next|forward/i.test(className)) { score += 3; reasons.push('pagination class (+3)'); } if (score > 0) { nextButtonCandidates.push({ element: element, score: score, text: text.substring(0, 50), ariaLabel: ariaLabel, tag: element.tagName, className: className, reasons: reasons }); } if (score > nextButtonScore) { nextButtonScore = score; nextButton = element; } } let loadMoreButton = null; let loadMoreScore = 0; for (const element of clickableElements) { if (!isVisible(element)) continue; const text = (element.textContent || '').trim(); const ariaLabel = element.getAttribute('aria-label') || ''; const title = element.getAttribute('title') || ''; const combinedText = `${text} ${ariaLabel} ${title}`; let score = 0; if (matchesAnyPattern(combinedText, loadMorePatterns)) { score += 10; } if (isNearList(element)) { score += 5; } if (element.tagName === 'BUTTON') { score += 2; } if (score > loadMoreScore) { loadMoreScore = score; loadMoreButton = element; } } let prevButton = null; let prevButtonScore = 0; for (const element of clickableElements) { if (!isVisible(element)) continue; const text = (element.textContent || '').trim(); const ariaLabel = element.getAttribute('aria-label') || ''; const title = element.getAttribute('title') || ''; const combinedText = `${text} ${ariaLabel} ${title}`; let score = 0; if (matchesAnyPattern(combinedText, prevButtonPatterns)) { score += 10; } if (isNearList(element)) { score += 5; } if (score > prevButtonScore) { prevButtonScore = score; prevButton = element; } } function detectInfiniteScrollScore() { try { const debugInfo = { indicators: [], score: 0, threshold: 5 }; const initialItemCount = listElements.length; const initialHeight = document.documentElement.scrollHeight; const viewportHeight = window.innerHeight; const currentScrollY = window.scrollY; if (initialHeight <= viewportHeight) { return 0; } const loadingIndicators = [ '[class*="loading"]', '[class*="spinner"]', '[class*="skeleton"]', '[aria-busy="true"]', '[data-loading="true"]', '.loader', '.load-more-spinner', '[class*="load"]', '[id*="loading"]', '[id*="spinner"]' ]; for (const selector of loadingIndicators) { if (document.querySelector(selector)) { debugInfo.score += 3; debugInfo.indicators.push(`Loading indicator: ${selector} (+3)`); break; } } const sentinelPatterns = [ '[class*="sentinel"]', '[class*="trigger"]', '[data-infinite]', '[data-scroll-trigger]', '#infinite-scroll-trigger', '[class*="infinite"]', '[id*="infinite"]' ]; for (const selector of sentinelPatterns) { if (document.querySelector(selector)) { debugInfo.score += 4; debugInfo.indicators.push(`Sentinel element: ${selector} (+4)`); break; } } const scrollToTopPatterns = [ '[class*="scroll"][class*="top"]', '[aria-label*="scroll to top"]', '[title*="back to top"]', '.back-to-top', '#back-to-top', '[class*="scrolltop"]', '[class*="backtotop"]', 'button[class*="top"]', 'a[href="#top"]', 'a[href="#"]' ]; for (const selector of scrollToTopPatterns) { const element = document.querySelector(selector); if (element && isVisible(element)) { debugInfo.score += 2; debugInfo.indicators.push('Scroll-to-top button (+2)'); break; } } if (initialHeight > viewportHeight * 3) { debugInfo.score += 3; debugInfo.indicators.push(`Very tall page (${(initialHeight / viewportHeight).toFixed(1)}x viewport) (+3)`); } else if (initialHeight > viewportHeight * 2) { debugInfo.score += 2; debugInfo.indicators.push(`Tall page (${(initialHeight / viewportHeight).toFixed(1)}x viewport) (+2)`); } if (initialItemCount >= 20) { debugInfo.score += 2; debugInfo.indicators.push(`Many list items (${initialItemCount}) (+2)`); } else if (initialItemCount >= 10) { debugInfo.score += 1; debugInfo.indicators.push(`Good number of list items (${initialItemCount}) (+1)`); } const infiniteScrollLibraries = [ '.infinite-scroll', '[data-infinite-scroll]', '[data-flickity]', '[data-slick]', '.masonry', '[data-masonry]', '[class*="infinite-scroll"]', '[class*="lazy-load"]', '[data-lazy]' ]; for (const selector of infiniteScrollLibraries) { if (document.querySelector(selector)) { debugInfo.score += 4; debugInfo.indicators.push(`Infinite scroll library: ${selector} (+4)`); break; } } const lastListItem = listElements[listElements.length - 1]; if (lastListItem) { const lastItemRect = lastListItem.getBoundingClientRect(); const lastItemY = lastItemRect.bottom + currentScrollY; const viewportBottom = currentScrollY + viewportHeight; if (lastItemY > viewportBottom + viewportHeight) { debugInfo.score += 3; debugInfo.indicators.push('List extends far below viewport (+3)'); } else if (lastItemY > viewportBottom) { debugInfo.score += 2; debugInfo.indicators.push('List extends below viewport (+2)'); } } const hiddenLoadMore = document.querySelectorAll('[class*="load"], [class*="more"]'); for (let i = 0; i < hiddenLoadMore.length; i++) { const el = hiddenLoadMore[i]; const style = window.getComputedStyle(el); if (style.opacity === '0' || style.visibility === 'hidden') { debugInfo.score += 2; debugInfo.indicators.push('Hidden load trigger element (+2)'); break; } } const paginationControls = document.querySelectorAll('[class*="pagination"], [class*="pager"]'); if (paginationControls.length === 0) { debugInfo.score += 1; debugInfo.indicators.push('No pagination controls found (+1)'); } return debugInfo.score; } catch (error) { return 0; } } const infiniteScrollScore = (options && options.disableScrollDetection) ? 0 : detectInfiniteScrollScore(); const hasStrongInfiniteScrollSignals = infiniteScrollScore >= 8; const hasMediumInfiniteScrollSignals = infiniteScrollScore >= 5 && infiniteScrollScore < 8; if (hasStrongInfiniteScrollSignals) { const confidence = infiniteScrollScore >= 12 ? 'high' : infiniteScrollScore >= 10 ? 'medium' : 'low'; return { type: 'scrollDown', selector: null, confidence: confidence }; } if (loadMoreButton && loadMoreScore >= 15) { const selector = generatePaginationSelector(loadMoreButton); return { type: 'clickLoadMore', selector: selector, confidence: 'high' }; } if (nextButton && nextButtonScore >= 15 && !hasMediumInfiniteScrollSignals) { const selector = generatePaginationSelector(nextButton); return { type: 'clickNext', selector: selector, confidence: 'high' }; } if (hasMediumInfiniteScrollSignals) { const confidence = infiniteScrollScore >= 7 ? 'medium' : 'low'; return { type: 'scrollDown', selector: null, confidence: confidence }; } if (loadMoreButton && loadMoreScore >= 8) { const selector = generatePaginationSelector(loadMoreButton); const confidence = loadMoreScore >= 10 ? 'medium' : 'low'; return { type: 'clickLoadMore', selector: selector, confidence: confidence }; } if (nextButton && nextButtonScore >= 8) { const selector = generatePaginationSelector(nextButton); const confidence = nextButtonScore >= 10 ? 'medium' : 'low'; return { type: 'clickNext', selector: selector, confidence: confidence }; } if (prevButton && prevButtonScore >= 8) { const confidence = prevButtonScore >= 15 ? 'high' : prevButtonScore >= 10 ? 'medium' : 'low'; return { type: 'scrollUp', selector: null, confidence: confidence }; } return { type: '', selector: null, confidence: 'low', debug: { clickableElementsCount: clickableElements.length, nextCandidatesCount: nextButtonCandidates.length, topNextCandidates: nextButtonCandidates.slice(0, 3).map(c => ({ score: c.score, text: c.text, tag: c.tag, reasons: c.reasons })), finalScores: { loadMore: loadMoreScore, next: nextButtonScore, prev: prevButtonScore, infiniteScroll: infiniteScrollScore } } }; } catch (error) { return { type: '', selector: null, confidence: 'low', error: error.message, debug: 'Exception thrown: ' + error.message }; } }; /** * Analyze element groups on the page * Returns grouped elements with their structural fingerprints */ window.analyzeElementGroups = function() { try { const normalizeClasses = (classList) => { return Array.from(classList) .filter((cls) => { return ( !cls.match(/\d{3,}|uuid|hash|id-|_\d+$/i) && !cls.startsWith('_ngcontent-') && !cls.startsWith('_nghost-') && !cls.match(/^ng-tns-c\d+-\d+$/) ); }) .sort() .join(' '); }; const getStructuralFingerprint = (element) => { if (element.nodeType !== Node.ELEMENT_NODE) return null; const tagName = element.tagName.toLowerCase(); const isCustomElement = tagName.includes('-'); const standardExcludeSelectors = ['script', 'style', 'meta', 'link', 'title', 'head']; if (!isCustomElement && standardExcludeSelectors.includes(tagName)) { return null; } const children = Array.from(element.children); let childrenStructureString; if (tagName === 'table') { const thead = element.querySelector('thead'); const representativeRow = thead ? thead.querySelector('tr') : element.querySelector('tr'); if (representativeRow) { const structure = Array.from(representativeRow.children).map(child => ({ tag: child.tagName.toLowerCase(), classes: normalizeClasses(child.classList), })); childrenStructureString = JSON.stringify(structure); } else { childrenStructureString = JSON.stringify([]); } } else if (tagName === 'tr') { const structure = children.map((child) => ({ tag: child.tagName.toLowerCase(), classes: normalizeClasses(child.classList), })); childrenStructureString = JSON.stringify(structure); } else { const structure = children.map((child) => ({ tag: child.tagName.toLowerCase(), classes: normalizeClasses(child.classList), hasText: (child.textContent ?? '').trim().length > 0, })); childrenStructureString = JSON.stringify(structure); } const normalizedClasses = normalizeClasses(element.classList); const relevantAttributes = Array.from(element.attributes) .filter((attr) => { if (isCustomElement) { return !['id', 'style', 'data-reactid', 'data-react-checksum'].includes(attr.name.toLowerCase()); } else { return ( !['id', 'style', 'data-reactid', 'data-react-checksum'].includes(attr.name.toLowerCase()) && (!attr.name.startsWith('data-') || attr.name === 'data-type' || attr.name === 'data-role') ); } }) .map((attr) => `${attr.name}=${attr.value}`) .sort(); let depth = 0; let parent = element.parentElement; while (parent && depth < 20) { depth++; parent = parent.parentElement; } const textContent = (element.textContent ?? '').trim(); const textCharacteristics = { hasText: textContent.length > 0, textLength: Math.floor(textContent.length / 20) * 20, hasLinks: element.querySelectorAll('a').length, hasImages: element.querySelectorAll('img').length, hasButtons: element.querySelectorAll('button, input[type="button"], input[type="submit"]').length, }; const signature = `${tagName}::${normalizedClasses}::${children.length}::${childrenStructureString}::${relevantAttributes.join('|')}`; return { tagName, normalizedClasses, childrenCount: children.length, childrenStructure: childrenStructureString, attributes: relevantAttributes.join('|'), depth, textCharacteristics, signature, }; }; const calculateSimilarity = (fp1, fp2) => { if (!fp1 || !fp2) return 0; let score = 0; let maxScore = 0; maxScore += 10; if (fp1.tagName === fp2.tagName) score += 10; else return 0; maxScore += 8; if (fp1.normalizedClasses === fp2.normalizedClasses) score += 8; else if (fp1.normalizedClasses && fp2.normalizedClasses) { const classes1 = fp1.normalizedClasses.split(' ').filter((c) => c); const classes2 = fp2.normalizedClasses.split(' ').filter((c) => c); const commonClasses = classes1.filter((c) => classes2.includes(c)); if (classes1.length > 0 && classes2.length > 0) { score += (commonClasses.length / Math.max(classes1.length, classes2.length)) * 8; } } maxScore += 8; if (fp1.childrenStructure === fp2.childrenStructure) score += 8; else if (fp1.childrenCount === fp2.childrenCount) score += 4; maxScore += 5; if (fp1.attributes === fp2.attributes) score += 5; else if (fp1.attributes && fp2.attributes) { const attrs1 = fp1.attributes.split('|').filter((a) => a); const attrs2 = fp2.attributes.split('|').filter((a) => a); const commonAttrs = attrs1.filter((a) => attrs2.includes(a)); if (attrs1.length > 0 && attrs2.length > 0) { score += (commonAttrs.length / Math.max(attrs1.length, attrs2.length)) * 5; } } maxScore += 2; if (Math.abs(fp1.depth - fp2.depth) <= 1) score += 2; else if (Math.abs(fp1.depth - fp2.depth) <= 2) score += 1; maxScore += 3; const tc1 = fp1.textCharacteristics; const tc2 = fp2.textCharacteristics; if (tc1.hasText === tc2.hasText) score += 1; if (Math.abs(tc1.textLength - tc2.textLength) <= 40) score += 1; if (tc1.hasLinks === tc2.hasLinks && tc1.hasImages === tc2.hasImages) score += 1; return maxScore > 0 ? score / maxScore : 0; }; const hasAnyMeaningfulChildren = (element) => { const meaningfulChildren = []; const traverse = (el, depth) => { if (depth === undefined) depth = 0; if (depth > 5) return; Array.from(el.children).forEach(function(child) { const tagName = child.tagName.toLowerCase(); if (tagName === 'img' && child.hasAttribute('src')) { meaningfulChildren.push(child); return; } if (tagName === 'a' && child.hasAttribute('href')) { meaningfulChildren.push(child); return; } const text = (child.textContent || '').trim(); const hasVisibleText = text.length > 0; if (hasVisibleText || child.querySelector('svg')) { meaningfulChildren.push(child); return; } if (child.children.length > 0) { traverse(child, depth + 1); } }); if (el.shadowRoot) { Array.from(el.shadowRoot.children).forEach(function(shadowChild) { const tagName = shadowChild.tagName.toLowerCase(); if (tagName === 'img' && shadowChild.hasAttribute('src')) { meaningfulChildren.push(shadowChild); return; } if (tagName === 'a' && shadowChild.hasAttribute('href')) { meaningfulChildren.push(shadowChild); return; } const text = (shadowChild.textContent || '').trim(); const hasVisibleText = text.length > 0; if (hasVisibleText || shadowChild.querySelector('svg')) { meaningfulChildren.push(shadowChild); return; } if (shadowChild.children.length > 0) { traverse(shadowChild, depth + 1); } }); } }; traverse(element); return meaningfulChildren.length > 0; }; const getAllVisibleElements = () => { const allElements = []; const visited = new Set(); const traverseContainer = (container) => { try { const elements = Array.from(container.querySelectorAll('*')).filter((el) => { const rect = el.getBoundingClientRect(); return rect.width > 0 && rect.height > 0; }); elements.forEach((element) => { if (!visited.has(element)) { visited.add(element); allElements.push(element); if (element.shadowRoot) { traverseContainer(element.shadowRoot); } } }); } catch (error) { console.warn('Error traversing container:', error); } }; traverseContainer(document); return allElements; }; const allElements = getAllVisibleElements(); const processedInTables = new Set(); const elementGroups = new Map(); const groupedElements = new Set(); // Group table rows const tables = allElements.filter(el => el.tagName === 'TABLE'); tables.forEach(table => { const rows = Array.from(table.querySelectorAll('tbody > tr')).filter(row => { const parent = row.parentElement; if (!parent || !table.contains(parent)) return false; const rect = row.getBoundingClientRect(); return rect.width > 0 && rect.height > 0; }); if (rows.length >= 2) { const representativeFingerprint = getStructuralFingerprint(rows[0]); if (!representativeFingerprint) return; const group = { elements: rows, fingerprint: representativeFingerprint, representative: rows[0], }; rows.forEach(row => { elementGroups.set(row, group); groupedElements.add(row); processedInTables.add(row); }); } }); // Group other elements const remainingElements = allElements.filter(el => !processedInTables.has(el)); const elementFingerprints = new Map(); remainingElements.forEach((element) => { const fingerprint = getStructuralFingerprint(element); if (fingerprint) { elementFingerprints.set(element, fingerprint); } }); const processedElements = new Set(); const similarityThreshold = 0.7; const minGroupSize = 2; const maxParentLevels = 5; elementFingerprints.forEach((fingerprint, element) => { if (processedElements.has(element)) return; const currentGroup = [element]; processedElements.add(element); elementFingerprints.forEach((otherFingerprint, otherElement) => { if (processedElements.has(otherElement)) return; const similarity = calculateSimilarity(fingerprint, otherFingerprint); if (similarity >= similarityThreshold) { currentGroup.push(otherElement); processedElements.add(otherElement); } }); if (currentGroup.length >= minGroupSize && hasAnyMeaningfulChildren(element)) { let grouped = false; for (let level = 1; level <= maxParentLevels && !grouped; level++) { let ancestor = currentGroup[0]; for (let i = 0; i < level && ancestor; i++) { ancestor = ancestor.parentElement; } if (!ancestor) break; const allShareAncestor = currentGroup.every(el => { let elAncestor = el; for (let i = 0; i < level && elAncestor; i++) { elAncestor = elAncestor.parentElement; } return elAncestor === ancestor; }); if (allShareAncestor) { const group = { elements: currentGroup, fingerprint, representative: element, }; currentGroup.forEach((el) => { elementGroups.set(el, group); groupedElements.add(el); }); grouped = true; } } if (!grouped) { currentGroup.forEach((el, idx) => { if (idx > 0) processedElements.delete(el); }); } } }); // Convert to serializable format with XPath const uniqueGroups = new Map(); elementGroups.forEach((group) => { const signature = group.fingerprint.signature; if (!uniqueGroups.has(signature)) { const tagName = group.fingerprint.tagName; const classes = group.fingerprint.normalizedClasses.split(' ').filter(Boolean); let xpath = `//${tagName}`; if (classes.length > 0) { const classConditions = classes.map(cls => `contains(@class, '${cls}')`).join(' and '); xpath += `[${classConditions}]`; } // Get sample innerText from first 3 elements const sampleTexts = group.elements.slice(0, 3).map((el) => { return (el.textContent || '').trim().substring(0, 200); }); // Get sample HTML structure const sampleHTML = group.representative.outerHTML.substring(0, 500); uniqueGroups.set(signature, { fingerprint: group.fingerprint, count: group.elements.length, xpath: xpath, sampleTexts: sampleTexts, sampleHTML: sampleHTML, }); } }); return Array.from(uniqueGroups.values()); } catch (error) { console.error('[analyzeElementGroups] Error:', error); return []; } }; })();