Files
parcer/server/src/sdk/browserSide/pageAnalyzer.js

2630 lines
79 KiB
JavaScript
Raw Normal View History

/**
* Page Analyzer for pagination auto-detection, selector generation and grouping
*/
(function () {
'use strict';
/**
* Helper function to evaluate both CSS and XPath selectors
* Returns array of matching elements
*/
function evaluateSelector(selector, doc) {
try {
const isXPath = selector.startsWith('//') || selector.startsWith('(//');
if (isXPath) {
const result = doc.evaluate(
selector,
doc,
null,
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE,
null
);
const elements = [];
for (let i = 0; i < result.snapshotLength; i++) {
const node = result.snapshotItem(i);
if (node && node.nodeType === Node.ELEMENT_NODE) {
elements.push(node);
}
}
return elements;
} else {
return Array.from(doc.querySelectorAll(selector));
}
} catch (err) {
return [];
}
}
/**
* Convert CSS selector to XPath
*/
function cssToXPath(cssSelector) {
if (cssSelector.startsWith('//') || cssSelector.startsWith('/')) {
return cssSelector;
}
try {
let xpath = '';
const parts = cssSelector.split(/\s+(?![^[]*])/);
for (let i = 0; i < parts.length; i++) {
const part = parts[i].trim();
if (!part) continue;
if (part === '>') continue;
const xpathPart = convertCssPart(part);
if (i === 0) {
xpath = '//' + xpathPart;
} else if (parts[i - 1] === '>') {
xpath += '/' + xpathPart;
} else {
xpath += '//' + xpathPart;
}
}
return xpath || `//*`;
} catch (error) {
return `//*`;
}
}
/**
* Convert a single CSS selector part to XPath
*/
function convertCssPart(cssPart) {
const tagMatch = cssPart.match(/^([a-zA-Z][\w-]*|\*)/);
const tag = tagMatch ? tagMatch[1] : '*';
const predicates = [];
const idMatch = cssPart.match(/#([\w-]+)/);
if (idMatch) {
predicates.push(`@id='${idMatch[1]}'`);
}
const classMatches = cssPart.match(/\.((?:\\.|[^.#[\s])+)/g);
if (classMatches) {
classMatches.forEach(cls => {
let className = cls.substring(1).replace(/\\/g, '');
predicates.push(`contains(@class, '${className}')`);
});
}
const attrMatches = cssPart.match(/\[([^\]]+)\]/g);
if (attrMatches) {
attrMatches.forEach(attr => {
const content = attr.slice(1, -1);
const eqMatch = content.match(/([^=]+)="([^"]+)"/);
if (eqMatch) {
predicates.push(`@${eqMatch[1]}='${eqMatch[2]}'`);
} else {
predicates.push(`@${content}`);
}
});
}
if (predicates.length > 0) {
return `${tag}[${predicates.join(' and ')}]`;
}
return tag;
}
/**
* Main entry point for SDK - auto-converts CSS to XPath
*/
window.autoDetectListFields = function (selector) {
try {
let xpathSelector = cssToXPath(selector);
const testElements = evaluateXPath(xpathSelector, document);
if (testElements.length === 0) {
console.error('No elements matched the XPath selector!');
return {
fields: {},
listSelector: xpathSelector,
listFallbackSelector: null,
error: 'Selector did not match any elements on the page'
};
}
if (testElements.length > 0 && !xpathSelector.includes('count(*)')) {
const childCounts = testElements.slice(0, 5).map(el => el.children.length);
const uniqueCounts = [...new Set(childCounts)];
if (uniqueCounts.length > 1 && childCounts.filter(c => c === 1).length > childCounts.length / 2) {
if (xpathSelector.includes('[') && xpathSelector.endsWith(']')) {
xpathSelector = xpathSelector.slice(0, -1) + ' and count(*)=1]';
} else if (xpathSelector.includes('[')) {
xpathSelector = xpathSelector.replace(/\]$/, ' and count(*)=1]');
} else {
const lastSlash = xpathSelector.lastIndexOf('/');
if (lastSlash !== -1) {
const beforeTag = xpathSelector.substring(0, lastSlash + 1);
const tag = xpathSelector.substring(lastSlash + 1);
xpathSelector = beforeTag + tag + '[count(*)=1]';
} else {
xpathSelector = xpathSelector + '[count(*)=1]';
}
}
}
}
const fields = window.getChildSelectors(xpathSelector);
return {
fields: fields,
listSelector: xpathSelector,
listFallbackSelector: null,
error: Object.keys(fields).length === 0 ? 'No valid fields could be auto-detected from the list items' : null
};
} catch (error) {
console.error('Exception:', error);
return {
fields: {},
error: error.message || 'Failed to auto-detect fields'
};
}
};
const pathCache = new WeakMap();
const descendantsCache = new WeakMap();
const meaningfulCache = new WeakMap();
const classCache = new Map();
/**
* Main entry point - returns detected fields for a list selector
*/
window.getChildSelectors = function (parentSelector) {
try {
const parentElements = evaluateXPath(parentSelector, document);
if (parentElements.length === 0) {
console.error('No parent elements found!');
return {};
}
const maxItems = 10;
const limitedParents = parentElements.slice(0, Math.min(maxItems, parentElements.length));
const allChildSelectors = [];
for (let i = 0; i < limitedParents.length; i++) {
const parent = limitedParents[i];
const otherListElements = limitedParents.filter((_, index) => index !== i);
const selectors = generateOptimizedChildXPaths(
parent,
parentSelector,
otherListElements
);
allChildSelectors.push(...selectors);
}
const childSelectors = Array.from(new Set(allChildSelectors)).sort()
const fields = createFieldsFromSelectors(
childSelectors,
limitedParents,
parentSelector
);
return fields;
} catch (error) {
console.error('Exception:', error);
return {};
}
};
/**
* Generate optimized XPath selectors for all meaningful children
*/
function generateOptimizedChildXPaths(parentElement, listSelector, otherListElements) {
const selectors = [];
const processedElements = new Set();
const allDescendants = getAllDescendantsIncludingShadow(parentElement);
const batchSize = 25;
for (let i = 0; i < allDescendants.length; i += batchSize) {
const batch = allDescendants.slice(i, i + batchSize);
for (const descendant of batch) {
if (processedElements.has(descendant)) continue;
processedElements.add(descendant);
const xpath = buildOptimizedAbsoluteXPath(
descendant,
listSelector,
parentElement,
otherListElements
);
if (xpath.primary) {
selectors.push({
primary: xpath.primary,
fallback: xpath.fallback,
element: descendant
});
}
if (selectors.length >= 250) {
break;
}
}
if (selectors.length >= 250) {
break;
}
}
return selectors;
}
/**
* Get all meaningful descendants including shadow DOM
*/
function getAllDescendantsIncludingShadow(parentElement) {
if (descendantsCache.has(parentElement)) {
return descendantsCache.get(parentElement);
}
const meaningfulDescendants = [];
const queue = [parentElement];
const visited = new Set();
visited.add(parentElement);
const MAX_MEANINGFUL_ELEMENTS = 300;
const MAX_NODES_TO_CHECK = 1200;
const MAX_DEPTH = 20;
let nodesChecked = 0;
const depths = [0];
let queueIndex = 0;
while (queueIndex < queue.length) {
const element = queue[queueIndex];
const currentDepth = depths[queueIndex];
queueIndex++;
nodesChecked++;
if (
nodesChecked > MAX_NODES_TO_CHECK ||
meaningfulDescendants.length >= MAX_MEANINGFUL_ELEMENTS ||
currentDepth > MAX_DEPTH
) {
break;
}
if (element !== parentElement && isMeaningfulElement(element)) {
meaningfulDescendants.push(element);
}
if (currentDepth >= MAX_DEPTH) {
continue;
}
// Process light DOM children
const children = element.children;
const childLimit = Math.min(children.length, 30);
for (let i = 0; i < childLimit; i++) {
const child = children[i];
if (!visited.has(child)) {
visited.add(child);
queue.push(child);
depths.push(currentDepth + 1);
}
}
// Process shadow DOM
if (element.shadowRoot && currentDepth < MAX_DEPTH - 1) {
const shadowChildren = element.shadowRoot.children;
const shadowLimit = Math.min(shadowChildren.length, 20);
for (let i = 0; i < shadowLimit; i++) {
const child = shadowChildren[i];
if (!visited.has(child)) {
visited.add(child);
queue.push(child);
depths.push(currentDepth + 1);
}
}
}
}
descendantsCache.set(parentElement, meaningfulDescendants);
return meaningfulDescendants;
}
/**
* Check if element has meaningful content for extraction
*/
function isMeaningfulElement(element) {
if (meaningfulCache.has(element)) {
return meaningfulCache.get(element);
}
const tagName = element.tagName.toLowerCase();
if (tagName === 'img' && element.hasAttribute('src')) {
meaningfulCache.set(element, true);
return true;
}
if (tagName === 'a' && element.hasAttribute('href')) {
meaningfulCache.set(element, true);
return true;
}
const text = (element.textContent || '').trim();
const hasVisibleText = text.length > 0;
if (hasVisibleText || element.querySelector('svg')) {
meaningfulCache.set(element, true);
return true;
}
if (element.children.length > 0) {
meaningfulCache.set(element, false);
return false;
}
meaningfulCache.set(element, false);
return false;
}
/**
* Build optimized absolute XPath
*/
function buildOptimizedAbsoluteXPath(targetElement, listSelector, listElement, otherListElements) {
try {
let primary = null;
const pathFromList = getOptimizedStructuralPath(
targetElement,
listElement,
otherListElements
);
if (pathFromList) {
primary = listSelector + pathFromList;
}
const fallback = generateMandatoryChildFallbackXPath(targetElement, listElement);
return { primary, fallback };
} catch (error) {
const fallback = generateMandatoryChildFallbackXPath(targetElement, listElement);
return { primary: null, fallback };
}
}
/**
* Get optimized structural path from element to root
*/
function getOptimizedStructuralPath(targetElement, rootElement, otherListElements) {
if (pathCache.has(targetElement)) {
return pathCache.get(targetElement);
}
if (!elementContains(rootElement, targetElement) || targetElement === rootElement) {
return null;
}
const pathParts = [];
let current = targetElement;
let pathDepth = 0;
const MAX_PATH_DEPTH = 20;
while (current && current !== rootElement && pathDepth < MAX_PATH_DEPTH) {
const classes = getCommonClassesAcrossLists(current, otherListElements);
const hasConflictingElement = classes.length > 0 && rootElement
? queryElementsInScope(rootElement, current.tagName.toLowerCase())
.filter(el => el !== current)
.some(el => classes.every(cls =>
normalizeClasses(el.classList).split(' ').includes(cls)
))
: false;
const pathPart = generateOptimizedStructuralStep(
current,
rootElement,
hasConflictingElement,
otherListElements
);
if (pathPart) {
pathParts.unshift(pathPart);
}
current = current.parentElement ||
((current.getRootNode()).host);
pathDepth++;
}
if (current !== rootElement) {
pathCache.set(targetElement, null);
return null;
}
const result = pathParts.length > 0 ? '/' + pathParts.join('/') : null;
pathCache.set(targetElement, result);
return result;
}
/**
* Generate optimized structural step for XPath
*/
function generateOptimizedStructuralStep(element, rootElement, addPositionToAll, otherListElements) {
const tagName = element.tagName.toLowerCase();
const parent = element.parentElement ||
((element.getRootNode()).host);
if (!parent) {
return tagName;
}
const classes = getCommonClassesAcrossLists(element, otherListElements);
if (classes.length > 0 && !addPositionToAll) {
const classSelector = classes
.map(cls => `contains(@class, '${cls}')`)
.join(' and ');
const hasConflictingElement = rootElement
? queryElementsInScope(rootElement, element.tagName.toLowerCase())
.filter(el => el !== element)
.some(el => classes.every(cls =>
normalizeClasses(el.classList).split(' ').includes(cls)
))
: false;
if (!hasConflictingElement) {
return `${tagName}[${classSelector}]`;
} else {
const position = getSiblingPosition(element, parent);
return `${tagName}[${classSelector}][${position}]`;
}
}
if (!addPositionToAll) {
const meaningfulAttrs = ['role', 'type'];
for (const attrName of meaningfulAttrs) {
if (element.hasAttribute(attrName)) {
const value = element.getAttribute(attrName).replace(/'/g, "\\'");
const isCommon = isAttributeCommonAcrossLists(
element,
attrName,
value,
otherListElements
);
if (isCommon) {
return `${tagName}[@${attrName}='${value}']`;
}
}
}
}
const position = getSiblingPosition(element, parent);
if (addPositionToAll || classes.length === 0) {
return `${tagName}[${position}]`;
}
return tagName;
}
/**
* Get common classes across list items
*/
function getCommonClassesAcrossLists(targetElement, otherListElements) {
if (otherListElements.length === 0) {
return normalizeClasses(targetElement.classList).split(' ').filter(Boolean);
}
const targetClasses = normalizeClasses(targetElement.classList).split(' ').filter(Boolean);
if (targetClasses.length === 0) {
return [];
}
const cacheKey = `${targetElement.tagName}_${targetClasses.join(',')}_${otherListElements.length}`;
if (classCache.has(cacheKey)) {
return classCache.get(cacheKey);
}
const targetClassSet = new Set(targetClasses);
const similarElements = [];
const maxElementsToCheck = 100;
let checkedElements = 0;
for (const listEl of otherListElements) {
if (checkedElements >= maxElementsToCheck) break;
const descendants = getAllDescendantsIncludingShadow(listEl);
for (const child of descendants) {
if (checkedElements >= maxElementsToCheck) break;
if (child.tagName === targetElement.tagName) {
similarElements.push(child);
checkedElements++;
}
}
}
if (similarElements.length === 0) {
classCache.set(cacheKey, targetClasses);
return targetClasses;
}
// Fast exact match check
const exactMatches = similarElements.filter(el => {
const elClasses = normalizeClasses(el.classList).split(' ').filter(Boolean);
if (elClasses.length !== targetClasses.length) return false;
return elClasses.every(cls => targetClassSet.has(cls));
});
if (exactMatches.length > 0) {
classCache.set(cacheKey, targetClasses);
return targetClasses;
}
// Find common classes
const commonClasses = [];
for (const targetClass of targetClasses) {
const existsInAllOtherLists = otherListElements.every(listEl => {
const elementsInThisList = getAllDescendantsIncludingShadow(listEl).filter(child =>
child.tagName === targetElement.tagName
);
return elementsInThisList.some(el =>
normalizeClasses(el.classList).split(' ').includes(targetClass)
);
});
if (existsInAllOtherLists) {
commonClasses.push(targetClass);
}
}
classCache.set(cacheKey, commonClasses);
return commonClasses;
}
/**
* Normalize class names by removing dynamic parts
*/
function normalizeClasses(classList) {
return Array.from(classList)
.filter(cls => {
return (
!cls.match(/\d{3,}|uuid|hash|id-|_\d+$/i) &&
!cls.startsWith('_ngcontent-') &&
!cls.startsWith('_nghost-') &&
!cls.match(/^ng-tns-c\d+-\d+$/)
);
})
.sort()
.join(' ');
}
/**
* Check if attribute is common across lists
*/
function isAttributeCommonAcrossLists(targetElement, attrName, attrValue, otherListElements) {
if (otherListElements.length === 0) {
return true;
}
const targetPath = getElementPath(targetElement);
for (const otherListElement of otherListElements) {
const correspondingElement = findCorrespondingElement(otherListElement, targetPath);
if (correspondingElement) {
const otherValue = correspondingElement.getAttribute(attrName);
if (otherValue !== attrValue) {
return false;
}
}
}
return true;
}
/**
* Get element path as indices
*/
function getElementPath(element) {
const path = [];
let current = element;
while (current && current.parentElement) {
const siblings = Array.from(current.parentElement.children);
path.unshift(siblings.indexOf(current));
current = current.parentElement;
}
return path;
}
/**
* Find corresponding element in another list
*/
function findCorrespondingElement(rootElement, path) {
let current = rootElement;
for (const index of path) {
const children = Array.from(current.children);
if (index >= children.length) {
return null;
}
current = children[index];
}
return current;
}
/**
* Get sibling position
*/
function getSiblingPosition(element, parent) {
const siblings = Array.from(parent.children || []).filter(
child => child.tagName === element.tagName
);
return siblings.indexOf(element) + 1;
}
/**
* Query elements in scope (handles shadow DOM)
*/
function queryElementsInScope(rootElement, tagName) {
if (rootElement.shadowRoot || isInShadowDOM(rootElement)) {
return deepQuerySelectorAll(rootElement, tagName);
} else {
return Array.from(rootElement.querySelectorAll(tagName));
}
}
/**
* Check if element is in shadow DOM
*/
function isInShadowDOM(element) {
return element.getRootNode() instanceof ShadowRoot;
}
/**
* Deep query selector for shadow DOM
*/
function deepQuerySelectorAll(root, selector) {
const elements = [];
function process(node) {
if (node instanceof Element && node.matches(selector)) {
elements.push(node);
}
for (const child of node.children) {
process(child);
}
if (node instanceof HTMLElement && node.shadowRoot) {
process(node.shadowRoot);
}
}
process(root);
return elements;
}
/**
* Check if container contains element (works with shadow DOM)
*/
function elementContains(container, element) {
if (container.contains(element)) {
return true;
}
let current = element;
while (current) {
if (current === container) {
return true;
}
current = current.parentElement ||
((current.getRootNode()).host);
}
return false;
}
/**
* Generate fallback XPath using data-mx-id
*/
function generateMandatoryChildFallbackXPath(childElement, parentElement) {
try {
const parentMxId = parentElement.getAttribute('data-mx-id');
const childMxId = childElement.getAttribute('data-mx-id');
if (!parentMxId) {
return null;
}
const parentTagName = parentElement.tagName.toLowerCase();
const childTagName = childElement.tagName.toLowerCase();
if (childMxId) {
return `//${parentTagName}[@data-mx-id='${parentMxId}']//${childTagName}[@data-mx-id='${childMxId}']`;
} else {
const pathElements = getMandatoryFallbackPath(childElement, parentElement);
if (pathElements.length > 0) {
const parentPath = `//${parentTagName}[@data-mx-id='${parentMxId}']`;
const childPath = pathElements.join('/');
return `${parentPath}/${childPath}`;
}
}
return null;
} catch (error) {
return null;
}
}
/**
* Build mandatory fallback path using data-mx-id
*/
function getMandatoryFallbackPath(targetElement, rootElement) {
const pathParts = [];
let current = targetElement;
while (current && current !== rootElement && current.parentElement) {
const mxId = current.getAttribute('data-mx-id');
const tagName = current.tagName.toLowerCase();
if (mxId) {
pathParts.unshift(`${tagName}[@data-mx-id='${mxId}']`);
} else {
const position = Array.from(current.parentElement.children)
.filter(child => child.tagName === current.tagName)
.indexOf(current) + 1;
pathParts.unshift(`${tagName}[${position}]`);
}
current = current.parentElement;
}
return pathParts;
}
/**
* Evaluate XPath and return elements
*/
function evaluateXPath(xpath, contextNode) {
try {
const doc = contextNode instanceof ShadowRoot
? contextNode.host.ownerDocument
: contextNode;
const result = doc.evaluate(
xpath,
contextNode,
null,
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE,
null
);
const elements = [];
for (let i = 0; i < result.snapshotLength; i++) {
const node = result.snapshotItem(i);
if (node && node.nodeType === Node.ELEMENT_NODE) {
elements.push(node);
}
}
return elements;
} catch (error) {
return [];
}
}
/**
* Create fields from selectors by evaluating them and extracting data
*/
function createFieldsFromSelectors(selectorObjects, listElements, parentSelector) {
const candidates = [];
for (const selectorObj of selectorObjects) {
try {
const elements = evaluateXPath(selectorObj.primary, document);
if (elements.length === 0) continue;
const element = elements[0];
const tagName = element.tagName.toLowerCase();
if (tagName === 'a') {
const href = element.getAttribute('href');
const text = (element.textContent || '').trim();
if (text) {
const textField = createFieldData(element, selectorObj.primary, 'innerText');
if (textField && textField.data) {
candidates.push({
field: textField,
element: element,
position: getElementPosition(element)
});
}
}
if (href && href !== '#' && !href.startsWith('javascript:')) {
const hrefField = createFieldData(element, selectorObj.primary, 'href');
if (hrefField && hrefField.data) {
candidates.push({
field: hrefField,
element: element,
position: getElementPosition(element)
});
}
}
} else {
const field = createFieldData(element, selectorObj.primary);
if (field && field.data) {
candidates.push({
field: field,
element: element,
position: getElementPosition(element)
});
}
}
} catch (error) {
}
}
const filtered = removeParentChildDuplicates(candidates);
filtered.sort((a, b) => {
if (Math.abs(a.position.y - b.position.y) > 5) {
return a.position.y - b.position.y;
}
return a.position.x - b.position.x;
});
return removeDuplicateContentAndFormat(filtered);
}
/**
* Create field data from element
*/
function createFieldData(element, selector, forceAttribute) {
const tagName = element.tagName.toLowerCase();
let data = '';
let attribute = forceAttribute || 'innerText';
if (forceAttribute) {
if (forceAttribute === 'href') {
data = element.getAttribute('href') || '';
} else if (forceAttribute === 'innerText') {
data = (element.textContent || '').trim();
}
} else if (tagName === 'img') {
data = element.getAttribute('src') || '';
attribute = 'src';
} else if (tagName === 'a') {
const href = element.getAttribute('href') || '';
const text = (element.textContent || '').trim();
if (href && href !== '#' && !href.startsWith('javascript:')) {
data = href;
attribute = 'href';
} else if (text) {
data = text;
attribute = 'innerText';
}
} else {
data = (element.textContent || '').trim();
attribute = 'innerText';
}
if (!data) {
return null;
}
const isShadow = element.getRootNode() instanceof ShadowRoot;
return {
data: data,
selectorObj: {
selector: selector,
attribute: attribute,
tag: tagName.toUpperCase(),
isShadow: isShadow
}
};
}
/**
* Get element position
*/
function getElementPosition(element) {
const rect = element.getBoundingClientRect();
return {
x: rect.left,
y: rect.top
};
}
/**
* Remove parent-child duplicates
*/
function removeParentChildDuplicates(candidates) {
const filtered = [];
for (const candidate of candidates) {
let shouldInclude = true;
const tagName = candidate.element.tagName.toLowerCase();
for (const existing of filtered) {
if (candidate.element.contains(existing.element)) {
shouldInclude = false;
break;
} else if (existing.element.contains(candidate.element)) {
const existingIndex = filtered.indexOf(existing);
filtered.splice(existingIndex, 1);
break;
}
}
if (tagName === 'a' || tagName === 'img') {
shouldInclude = true;
}
if (shouldInclude) {
filtered.push(candidate);
}
}
return filtered;
}
/**
* Remove duplicate content and format for workflow
*/
function removeDuplicateContentAndFormat(candidates) {
const finalFields = {};
const seenContent = new Set();
const seenSelectors = new Set();
let labelCounter = 1;
for (const candidate of candidates) {
const content = candidate.field.data.trim().toLowerCase();
const selectorKey = `${candidate.field.selectorObj.selector}::${candidate.field.selectorObj.attribute}`;
if (!seenContent.has(content) && !seenSelectors.has(selectorKey)) {
seenContent.add(content);
seenSelectors.add(selectorKey);
const fieldName = `Label ${labelCounter}`;
finalFields[fieldName] = {
selector: candidate.field.selectorObj.selector,
attribute: candidate.field.selectorObj.attribute,
tag: candidate.field.selectorObj.tag,
isShadow: candidate.field.selectorObj.isShadow
};
labelCounter++;
}
}
return finalFields;
}
/**
* Auto-detect pagination type and selector
* Returns: { type: string, selector: string | null }
* Types: 'scrollDown', 'scrollUp', 'clickNext', 'clickLoadMore', ''
*/
2025-12-08 18:58:13 +05:30
window.autoDetectPagination = function (listSelector, options) {
try {
const listElements = evaluateSelector(listSelector, document);
if (listElements.length === 0) {
2025-12-08 18:58:13 +05:30
return { type: '', selector: null, confidence: 'low', debug: 'No list elements found' };
}
const listContainer = listElements[0];
const nextButtonPatterns = [
/next/i,
/\bnext\s+page\b/i,
/page\s+suivante/i,
/siguiente/i,
/weiter/i,
2025-12-11 17:41:39 +05:30
/>>||→|»|⟩/,
/\bforward\b/i,
/\bnewer\b/i,
/\bolder\b/i
];
const loadMorePatterns = [
/load\s+more/i,
/show\s+more/i,
/view\s+more/i,
/see\s+more/i,
/more\s+results/i,
/plus\s+de\s+résultats/i,
/más\s+resultados/i,
/weitere\s+ergebnisse/i
];
const prevButtonPatterns = [
/prev/i,
/previous/i,
/<<||←|«/,
/\bback\b/i
];
/**
* Check if element text matches any pattern
*/
function matchesAnyPattern(text, patterns) {
return patterns.some(pattern => pattern.test(text));
}
/**
* Get all clickable elements (buttons, links, etc.)
*/
function getClickableElements() {
const clickables = [];
const selectors = ['button', 'a', '[role="button"]', '[onclick]', '.btn', '.button'];
for (const selector of selectors) {
const elements = document.querySelectorAll(selector);
clickables.push(...Array.from(elements));
}
return [...new Set(clickables)];
}
/**
* Check if element is visible
*/
function isVisible(element) {
const style = window.getComputedStyle(element);
return style.display !== 'none' &&
style.visibility !== 'hidden' &&
style.opacity !== '0' &&
element.offsetWidth > 0 &&
element.offsetHeight > 0;
}
/**
* Comprehensive selector generator based on @medv/finder algorithm
* Generates multiple selector types and chains them for reliability
*/
function generatePaginationSelector(element) {
try {
element.scrollIntoView({ behavior: 'instant', block: 'center', inline: 'center' });
} catch (e) {
}
const rect = element.getBoundingClientRect();
const coordinates = {
x: rect.left + rect.width / 2,
y: rect.top + rect.height / 2
};
const result = getSelectors(document, coordinates);
const selectorChain = [];
if (result.primary) {
if (result.primary.id) selectorChain.push(result.primary.id);
if (result.primary.testIdSelector) selectorChain.push(result.primary.testIdSelector);
if (result.primary.relSelector) selectorChain.push(result.primary.relSelector);
if (result.primary.accessibilitySelector) selectorChain.push(result.primary.accessibilitySelector);
if (result.primary.hrefSelector) selectorChain.push(result.primary.hrefSelector);
if (result.primary.formSelector) selectorChain.push(result.primary.formSelector);
if (result.primary.attrSelector) selectorChain.push(result.primary.attrSelector);
if (result.primary.generalSelector) selectorChain.push(result.primary.generalSelector);
}
return selectorChain.length > 0 ? selectorChain.join(',') : element.tagName.toLowerCase();
}
/**
* Comprehensive selector generator (based on @medv/finder)
* Supports shadow DOM, iframes, and multiple selector strategies
*/
function getSelectors(iframeDoc, coordinates) {
try {
// ===== FINDER ALGORITHM =====
// Based on @medv/finder by Anton Medvedev
// https://github.com/antonmedv/finder/blob/master/finder.ts
const Limit = {
All: 0,
Two: 1,
One: 2
};
let config;
let rootDocument;
function finder(input, options) {
if (input.nodeType !== Node.ELEMENT_NODE) {
throw new Error("Can't generate CSS selector for non-element node type.");
}
if ('html' === input.tagName.toLowerCase()) {
return 'html';
}
const defaults = {
root: iframeDoc.body,
idName: function (name) { return true; },
className: function (name) { return true; },
tagName: function (name) { return true; },
attr: function (name, value) { return false; },
seedMinLength: 1,
optimizedMinLength: 2,
threshold: 900,
maxNumberOfTries: 9000
};
config = Object.assign({}, defaults, options || {});
rootDocument = findRootDocument(config.root, defaults);
let path = bottomUpSearch(input, Limit.All, function () {
return bottomUpSearch(input, Limit.Two, function () {
return bottomUpSearch(input, Limit.One);
});
});
if (path) {
const optimized = sort(optimize(path, input));
if (optimized.length > 0) {
path = optimized[0];
}
return selector(path);
} else {
throw new Error('Selector was not found.');
}
}
function findRootDocument(rootNode, defaults) {
if (rootNode.nodeType === Node.DOCUMENT_NODE) {
return rootNode;
}
if (rootNode === defaults.root) {
return rootNode.ownerDocument;
}
return rootNode;
}
function bottomUpSearch(input, limit, fallback) {
let path = null;
let stack = [];
let current = input;
let i = 0;
while (current && current !== config.root.parentElement) {
let level = maybe(id(current)) ||
maybe.apply(null, attr(current)) ||
maybe.apply(null, classNames(current)) ||
maybe(tagName(current)) ||
[any()];
const nth = index(current);
if (limit === Limit.All) {
if (nth) {
level = level.concat(
level.filter(dispensableNth).map(function (node) {
return nthChild(node, nth);
})
);
}
} else if (limit === Limit.Two) {
level = level.slice(0, 1);
if (nth) {
level = level.concat(
level.filter(dispensableNth).map(function (node) {
return nthChild(node, nth);
})
);
}
} else if (limit === Limit.One) {
const node = level[0];
level = level.slice(0, 1);
if (nth && dispensableNth(node)) {
level = [nthChild(node, nth)];
}
}
for (let j = 0; j < level.length; j++) {
level[j].level = i;
}
stack.push(level);
if (stack.length >= config.seedMinLength) {
path = findUniquePath(stack, fallback);
if (path) {
break;
}
}
current = current.parentElement;
i++;
}
if (!path) {
path = findUniquePath(stack, fallback);
}
return path;
}
function findUniquePath(stack, fallback) {
const paths = sort(combinations(stack));
if (paths.length > config.threshold) {
return fallback ? fallback() : null;
}
for (let i = 0; i < paths.length; i++) {
if (unique(paths[i])) {
return paths[i];
}
}
return null;
}
function selector(path) {
let node = path[0];
let query = node.name;
for (let i = 1; i < path.length; i++) {
const level = path[i].level || 0;
if (node.level === level - 1) {
query = path[i].name + ' > ' + query;
} else {
query = path[i].name + ' ' + query;
}
node = path[i];
}
return query;
}
function penalty(path) {
return path.map(function (node) { return node.penalty; })
.reduce(function (acc, i) { return acc + i; }, 0);
}
function unique(path) {
const elements = rootDocument.querySelectorAll(selector(path));
switch (elements.length) {
case 0:
throw new Error("Can't select any node with this selector: " + selector(path));
case 1:
return true;
default:
return false;
}
}
function id(input) {
const elementId = input.getAttribute('id');
if (elementId && config.idName(elementId)) {
return {
name: '#' + cssesc(elementId, { isIdentifier: true }),
penalty: 0
};
}
return null;
}
function attr(input) {
const attrs = Array.from(input.attributes).filter(function (attr) {
return config.attr(attr.name, attr.value) && attr.name !== 'data-mx-id';
});
return attrs.map(function (attr) {
let attrValue = attr.value;
if (attr.name === 'href' && attr.value.includes('://')) {
try {
const url = new URL(attr.value);
const siteOrigin = url.protocol + '//' + url.host;
attrValue = attr.value.replace(siteOrigin, '');
} catch (e) {
// Keep original if URL parsing fails
}
}
return {
name: '[' + cssesc(attr.name, { isIdentifier: true }) + '="' + cssesc(attrValue) + '"]',
penalty: 0.5
};
});
}
function classNames(input) {
const names = Array.from(input.classList).filter(config.className);
return names.map(function (name) {
return {
name: '.' + cssesc(name, { isIdentifier: true }),
penalty: 1
};
});
}
function tagName(input) {
const name = input.tagName.toLowerCase();
if (config.tagName(name)) {
return {
name: name,
penalty: 2
};
}
return null;
}
function any() {
return {
name: '*',
penalty: 3
};
}
function index(input) {
const parent = input.parentNode;
if (!parent) {
return null;
}
let child = parent.firstChild;
if (!child) {
return null;
}
let i = 0;
while (child) {
if (child.nodeType === Node.ELEMENT_NODE) {
i++;
}
if (child === input) {
break;
}
child = child.nextSibling;
}
return i;
}
function nthChild(node, i) {
return {
name: node.name + ':nth-child(' + i + ')',
penalty: node.penalty + 1
};
}
function dispensableNth(node) {
return node.name !== 'html' && !node.name.startsWith('#');
}
function maybe() {
const args = Array.prototype.slice.call(arguments);
const list = args.filter(notEmpty);
if (list.length > 0) {
return list;
}
return null;
}
function notEmpty(value) {
return value !== null && value !== undefined;
}
function combinations(stack, path) {
path = path || [];
const results = [];
function* generate(s, p) {
if (s.length > 0) {
for (let i = 0; i < s[0].length; i++) {
yield* generate(s.slice(1), p.concat(s[0][i]));
}
} else {
yield p;
}
}
const gen = generate(stack, path);
let next = gen.next();
while (!next.done) {
results.push(next.value);
next = gen.next();
}
return results;
}
function sort(paths) {
return Array.from(paths).sort(function (a, b) {
return penalty(a) - penalty(b);
});
}
function* optimize(path, input, scope) {
scope = scope || {
counter: 0,
visited: new Map()
};
if (path.length > 2 && path.length > config.optimizedMinLength) {
for (let i = 1; i < path.length - 1; i++) {
if (scope.counter > config.maxNumberOfTries) {
return;
}
scope.counter += 1;
const newPath = path.slice();
newPath.splice(i, 1);
const newPathKey = selector(newPath);
if (scope.visited.has(newPathKey)) {
continue;
}
try {
if (unique(newPath) && same(newPath, input)) {
yield newPath;
scope.visited.set(newPathKey, true);
yield* optimize(newPath, input, scope);
}
} catch (e) {
continue;
}
}
}
}
function same(path, input) {
return rootDocument.querySelector(selector(path)) === input;
}
// ===== CSSESC UTILITY =====
const regexAnySingleEscape = /[ -,\.\/:-@\[-\^`\{-~]/;
const regexSingleEscape = /[ -,\.\/:-@\[\]\^`\{-~]/;
const regexExcessiveSpaces = /(^|\\+)?(\\[A-F0-9]{1,6})\x20(?![a-fA-F0-9\x20])/g;
const defaultCssEscOptions = {
escapeEverything: false,
isIdentifier: false,
quotes: 'single',
wrap: false
};
function cssesc(string, opt) {
const options = Object.assign({}, defaultCssEscOptions, opt || {});
if (options.quotes != 'single' && options.quotes != 'double') {
options.quotes = 'single';
}
const quote = options.quotes == 'double' ? '"' : "'";
const isIdentifier = options.isIdentifier;
const firstChar = string.charAt(0);
let output = '';
let counter = 0;
const length = string.length;
while (counter < length) {
const character = string.charAt(counter++);
let codePoint = character.charCodeAt(0);
let value = undefined;
if (codePoint < 0x20 || codePoint > 0x7e) {
if (codePoint >= 0xd800 && codePoint <= 0xdbff && counter < length) {
const extra = string.charCodeAt(counter++);
if ((extra & 0xfc00) == 0xdc00) {
codePoint = ((codePoint & 0x3ff) << 10) + (extra & 0x3ff) + 0x10000;
} else {
counter--;
}
}
value = '\\' + codePoint.toString(16).toUpperCase() + ' ';
} else {
if (options.escapeEverything) {
if (regexAnySingleEscape.test(character)) {
value = '\\' + character;
} else {
value = '\\' + codePoint.toString(16).toUpperCase() + ' ';
}
} else if (/[\t\n\f\r\x0B]/.test(character)) {
value = '\\' + codePoint.toString(16).toUpperCase() + ' ';
} else if (
character == '\\' ||
(!isIdentifier && ((character == '"' && quote == character) || (character == "'" && quote == character))) ||
(isIdentifier && regexSingleEscape.test(character))
) {
value = '\\' + character;
} else {
value = character;
}
}
output += value;
}
if (isIdentifier) {
if (/^-[-\d]/.test(output)) {
output = '\\-' + output.slice(1);
} else if (/\d/.test(firstChar)) {
output = '\\3' + firstChar + ' ' + output.slice(1);
}
}
output = output.replace(regexExcessiveSpaces, function ($0, $1, $2) {
if ($1 && $1.length % 2) {
return $0;
}
return ($1 || '') + $2;
});
if (!isIdentifier && options.wrap) {
return quote + output + quote;
}
return output;
}
// ===== ELEMENT DETECTION =====
function getDeepestElementFromPoint(x, y) {
let elements = iframeDoc.elementsFromPoint(x, y);
if (!elements || elements.length === 0) return null;
// Check for dialog elements first
const dialogElement = elements.find(function (el) {
return el.getAttribute('role') === 'dialog';
});
if (dialogElement) {
const dialogElements = elements.filter(function (el) {
return el === dialogElement || dialogElement.contains(el);
});
const findDeepestInDialog = function (elems) {
if (!elems.length) return null;
if (elems.length === 1) return elems[0];
let deepestElement = elems[0];
let maxDepth = 0;
for (let i = 0; i < elems.length; i++) {
let depth = 0;
let current = elems[i];
while (current && current.parentElement && current !== dialogElement.parentElement) {
depth++;
current = current.parentElement;
}
if (depth > maxDepth) {
maxDepth = depth;
deepestElement = elems[i];
}
}
return deepestElement;
};
return findDeepestInDialog(dialogElements);
}
// Standard deepest element detection
const findDeepestElement = function (elems) {
if (!elems.length) return null;
if (elems.length === 1) return elems[0];
// Check for positioned overlays
for (let i = 0; i < Math.min(3, elems.length); i++) {
const element = elems[i];
const style = window.getComputedStyle(element);
const zIndex = parseInt(style.zIndex) || 0;
if ((style.position === 'fixed' || style.position === 'absolute') && zIndex > 50) {
return element;
}
if (element.tagName === 'SVG' && i < 2) {
return element;
}
}
// Depth-based fallback
let deepestElement = elems[0];
let maxDepth = 0;
for (let i = 0; i < elems.length; i++) {
let depth = 0;
let current = elems[i];
while (current) {
depth++;
if (current.parentElement) {
current = current.parentElement;
} else {
break;
}
}
if (depth > maxDepth) {
maxDepth = depth;
deepestElement = elems[i];
}
}
return deepestElement;
};
let deepestElement = findDeepestElement(elements);
if (!deepestElement) return null;
// Handle shadow DOM
const traverseShadowDOM = function (element) {
let current = element;
let shadowRoot = current.shadowRoot;
let deepest = current;
let depth = 0;
const MAX_SHADOW_DEPTH = 4;
while (shadowRoot && depth < MAX_SHADOW_DEPTH) {
const shadowElement = shadowRoot.elementFromPoint(x, y);
if (!shadowElement || shadowElement === current) break;
deepest = shadowElement;
current = shadowElement;
shadowRoot = current.shadowRoot;
depth++;
}
return deepest;
};
deepestElement = traverseShadowDOM(deepestElement);
return deepestElement;
}
// ===== SELECTOR GENERATION =====
function genAttributeSet(element, attributes) {
return new Set(
attributes.filter(function (attr) {
const attrValue = element.getAttribute(attr);
return attrValue != null && attrValue.length > 0;
})
);
}
function isAttributesDefined(element, attributes) {
return genAttributeSet(element, attributes).size > 0;
}
function genValidAttributeFilter(element, attributes) {
const attrSet = genAttributeSet(element, attributes);
return function (name) { return attrSet.has(name); };
}
function genSelectorForAttributes(element, attributes) {
let selector = null;
try {
if (attributes.includes('rel') && element.hasAttribute('rel')) {
const relValue = element.getAttribute('rel');
return '[rel="' + relValue + '"]';
}
selector = isAttributesDefined(element, attributes)
? finder(element, {
idName: function () { return false; },
attr: genValidAttributeFilter(element, attributes)
})
: null;
} catch (e) { }
return selector;
}
function isCharacterNumber(char) {
return char && char.length === 1 && /[0-9]/.test(char);
}
function generateMandatoryCSSFallback(element) {
const mxId = Math.floor(Math.random() * 10000).toString();
element.setAttribute('data-mx-id', mxId);
return element.tagName.toLowerCase() + '[data-mx-id="' + mxId + '"]';
}
function genSelectors(element) {
if (element == null) {
return null;
}
const href = element.getAttribute('href');
let generalSelector = null;
try {
generalSelector = finder(element);
} catch (e) { }
let attrSelector = null;
try {
attrSelector = finder(element, {
attr: function () { return true; }
});
} catch (e) { }
const relSelector = genSelectorForAttributes(element, ['rel']);
const hrefSelector = genSelectorForAttributes(element, ['href']);
const formSelector = genSelectorForAttributes(element, ['name', 'placeholder', 'for']);
const accessibilitySelector = genSelectorForAttributes(element, ['aria-label', 'alt', 'title']);
const testIdSelector = genSelectorForAttributes(element, [
'data-testid', 'data-test-id', 'data-testing',
'data-test', 'data-qa', 'data-cy'
]);
let idSelector = null;
try {
const elementId = element.getAttribute('id');
idSelector = isAttributesDefined(element, ['id']) && !isCharacterNumber(elementId ? elementId[0] : '')
? finder(element, {
attr: function (name) { return name === 'id'; }
})
: null;
} catch (e) { }
return {
id: idSelector,
generalSelector: generalSelector,
attrSelector: attrSelector,
testIdSelector: testIdSelector,
text: element.innerText,
href: href || undefined,
hrefSelector: hrefSelector,
accessibilitySelector: accessibilitySelector,
formSelector: formSelector,
relSelector: relSelector,
iframeSelector: null,
shadowSelector: null
};
}
// Main execution
const hoveredElement = getDeepestElementFromPoint(coordinates.x, coordinates.y);
if (hoveredElement != null) {
const parentElement = hoveredElement.parentElement;
const element = (parentElement && parentElement.tagName === 'A') ? parentElement : hoveredElement;
const generatedSelectors = genSelectors(element);
return {
primary: generatedSelectors
};
}
} catch (e) {
}
return { primary: null };
}
/**
* Check if element is near the list container
*/
function isNearList(element) {
try {
const listRect = listContainer.getBoundingClientRect();
const elementRect = element.getBoundingClientRect();
if (elementRect.top >= listRect.bottom && elementRect.top <= listRect.bottom + 500) {
return true;
}
if (elementRect.bottom <= listRect.top && elementRect.bottom >= listRect.top - 500) {
return true;
}
const verticalOverlap = !(elementRect.bottom < listRect.top || elementRect.top > listRect.bottom);
if (verticalOverlap) {
const horizontalDistance = Math.min(
Math.abs(elementRect.left - listRect.right),
Math.abs(elementRect.right - listRect.left)
);
if (horizontalDistance < 200) {
return true;
}
}
return false;
} catch (error) {
return false;
}
}
const clickableElements = getClickableElements();
let nextButton = null;
let nextButtonScore = 0;
const nextButtonCandidates = [];
for (const element of clickableElements) {
if (!isVisible(element)) continue;
const text = (element.textContent || '').trim();
const ariaLabel = element.getAttribute('aria-label') || '';
const title = element.getAttribute('title') || '';
const combinedText = `${text} ${ariaLabel} ${title}`;
let score = 0;
const reasons = [];
if (matchesAnyPattern(combinedText, nextButtonPatterns)) {
score += 10;
reasons.push('text match (+10)');
}
if (isNearList(element)) {
score += 5;
reasons.push('near list (+5)');
}
if (element.tagName === 'BUTTON') {
score += 2;
reasons.push('button tag (+2)');
}
const className = element.className || '';
if (/pagination|next|forward/i.test(className)) {
score += 3;
reasons.push('pagination class (+3)');
}
if (score > 0) {
nextButtonCandidates.push({
element: element,
score: score,
text: text.substring(0, 50),
ariaLabel: ariaLabel,
tag: element.tagName,
className: className,
reasons: reasons
});
}
if (score > nextButtonScore) {
nextButtonScore = score;
nextButton = element;
}
}
let loadMoreButton = null;
let loadMoreScore = 0;
for (const element of clickableElements) {
if (!isVisible(element)) continue;
const text = (element.textContent || '').trim();
const ariaLabel = element.getAttribute('aria-label') || '';
const title = element.getAttribute('title') || '';
const combinedText = `${text} ${ariaLabel} ${title}`;
let score = 0;
if (matchesAnyPattern(combinedText, loadMorePatterns)) {
score += 10;
}
if (isNearList(element)) {
score += 5;
}
if (element.tagName === 'BUTTON') {
score += 2;
}
if (score > loadMoreScore) {
loadMoreScore = score;
loadMoreButton = element;
}
}
let prevButton = null;
let prevButtonScore = 0;
for (const element of clickableElements) {
if (!isVisible(element)) continue;
const text = (element.textContent || '').trim();
const ariaLabel = element.getAttribute('aria-label') || '';
const title = element.getAttribute('title') || '';
const combinedText = `${text} ${ariaLabel} ${title}`;
let score = 0;
if (matchesAnyPattern(combinedText, prevButtonPatterns)) {
score += 10;
}
if (isNearList(element)) {
score += 5;
}
if (score > prevButtonScore) {
prevButtonScore = score;
prevButton = element;
}
}
function detectInfiniteScrollScore() {
try {
const debugInfo = {
indicators: [],
score: 0,
threshold: 5
};
const initialItemCount = listElements.length;
const initialHeight = document.documentElement.scrollHeight;
const viewportHeight = window.innerHeight;
const currentScrollY = window.scrollY;
if (initialHeight <= viewportHeight) {
return 0;
}
const loadingIndicators = [
'[class*="loading"]',
'[class*="spinner"]',
'[class*="skeleton"]',
'[aria-busy="true"]',
'[data-loading="true"]',
'.loader',
'.load-more-spinner',
'[class*="load"]',
'[id*="loading"]',
'[id*="spinner"]'
];
for (const selector of loadingIndicators) {
if (document.querySelector(selector)) {
debugInfo.score += 3;
debugInfo.indicators.push(`Loading indicator: ${selector} (+3)`);
break;
}
}
const sentinelPatterns = [
'[class*="sentinel"]',
'[class*="trigger"]',
'[data-infinite]',
'[data-scroll-trigger]',
'#infinite-scroll-trigger',
'[class*="infinite"]',
'[id*="infinite"]'
];
for (const selector of sentinelPatterns) {
if (document.querySelector(selector)) {
debugInfo.score += 4;
debugInfo.indicators.push(`Sentinel element: ${selector} (+4)`);
break;
}
}
const scrollToTopPatterns = [
'[class*="scroll"][class*="top"]',
'[aria-label*="scroll to top"]',
'[title*="back to top"]',
'.back-to-top',
'#back-to-top',
'[class*="scrolltop"]',
'[class*="backtotop"]',
'button[class*="top"]',
'a[href="#top"]',
'a[href="#"]'
];
for (const selector of scrollToTopPatterns) {
const element = document.querySelector(selector);
if (element && isVisible(element)) {
debugInfo.score += 2;
debugInfo.indicators.push('Scroll-to-top button (+2)');
break;
}
}
if (initialHeight > viewportHeight * 3) {
debugInfo.score += 3;
debugInfo.indicators.push(`Very tall page (${(initialHeight / viewportHeight).toFixed(1)}x viewport) (+3)`);
} else if (initialHeight > viewportHeight * 2) {
debugInfo.score += 2;
debugInfo.indicators.push(`Tall page (${(initialHeight / viewportHeight).toFixed(1)}x viewport) (+2)`);
}
if (initialItemCount >= 20) {
debugInfo.score += 2;
debugInfo.indicators.push(`Many list items (${initialItemCount}) (+2)`);
} else if (initialItemCount >= 10) {
debugInfo.score += 1;
debugInfo.indicators.push(`Good number of list items (${initialItemCount}) (+1)`);
}
const infiniteScrollLibraries = [
'.infinite-scroll',
'[data-infinite-scroll]',
'[data-flickity]',
'[data-slick]',
'.masonry',
'[data-masonry]',
'[class*="infinite-scroll"]',
'[class*="lazy-load"]',
'[data-lazy]'
];
for (const selector of infiniteScrollLibraries) {
if (document.querySelector(selector)) {
debugInfo.score += 4;
debugInfo.indicators.push(`Infinite scroll library: ${selector} (+4)`);
break;
}
}
const lastListItem = listElements[listElements.length - 1];
if (lastListItem) {
const lastItemRect = lastListItem.getBoundingClientRect();
const lastItemY = lastItemRect.bottom + currentScrollY;
const viewportBottom = currentScrollY + viewportHeight;
if (lastItemY > viewportBottom + viewportHeight) {
debugInfo.score += 3;
debugInfo.indicators.push('List extends far below viewport (+3)');
} else if (lastItemY > viewportBottom) {
debugInfo.score += 2;
debugInfo.indicators.push('List extends below viewport (+2)');
}
}
const hiddenLoadMore = document.querySelectorAll('[class*="load"], [class*="more"]');
for (let i = 0; i < hiddenLoadMore.length; i++) {
const el = hiddenLoadMore[i];
const style = window.getComputedStyle(el);
if (style.opacity === '0' || style.visibility === 'hidden') {
debugInfo.score += 2;
debugInfo.indicators.push('Hidden load trigger element (+2)');
break;
}
}
const paginationControls = document.querySelectorAll('[class*="pagination"], [class*="pager"]');
if (paginationControls.length === 0) {
debugInfo.score += 1;
debugInfo.indicators.push('No pagination controls found (+1)');
}
return debugInfo.score;
} catch (error) {
return 0;
}
}
2025-12-11 17:41:39 +05:30
const infiniteScrollScore = (options && options.disableScrollDetection)
2025-12-08 18:58:13 +05:30
? 0
: detectInfiniteScrollScore();
const hasStrongInfiniteScrollSignals = infiniteScrollScore >= 8;
const hasMediumInfiniteScrollSignals = infiniteScrollScore >= 5 && infiniteScrollScore < 8;
if (hasStrongInfiniteScrollSignals) {
2025-12-08 18:58:13 +05:30
const confidence = infiniteScrollScore >= 12 ? 'high' : infiniteScrollScore >= 10 ? 'medium' : 'low';
return {
type: 'scrollDown',
2025-12-08 18:58:13 +05:30
selector: null,
confidence: confidence
};
}
if (loadMoreButton && loadMoreScore >= 15) {
const selector = generatePaginationSelector(loadMoreButton);
return {
type: 'clickLoadMore',
2025-12-08 18:58:13 +05:30
selector: selector,
confidence: 'high'
};
}
if (nextButton && nextButtonScore >= 15 && !hasMediumInfiniteScrollSignals) {
const selector = generatePaginationSelector(nextButton);
return {
type: 'clickNext',
2025-12-08 18:58:13 +05:30
selector: selector,
confidence: 'high'
};
}
if (hasMediumInfiniteScrollSignals) {
2025-12-08 18:58:13 +05:30
const confidence = infiniteScrollScore >= 7 ? 'medium' : 'low';
return {
type: 'scrollDown',
2025-12-08 18:58:13 +05:30
selector: null,
confidence: confidence
};
}
if (loadMoreButton && loadMoreScore >= 8) {
const selector = generatePaginationSelector(loadMoreButton);
2025-12-08 18:58:13 +05:30
const confidence = loadMoreScore >= 10 ? 'medium' : 'low';
return {
type: 'clickLoadMore',
2025-12-08 18:58:13 +05:30
selector: selector,
confidence: confidence
};
}
if (nextButton && nextButtonScore >= 8) {
const selector = generatePaginationSelector(nextButton);
2025-12-08 18:58:13 +05:30
const confidence = nextButtonScore >= 10 ? 'medium' : 'low';
return {
type: 'clickNext',
2025-12-08 18:58:13 +05:30
selector: selector,
confidence: confidence
};
}
if (prevButton && prevButtonScore >= 8) {
2025-12-08 18:58:13 +05:30
const confidence = prevButtonScore >= 15 ? 'high' : prevButtonScore >= 10 ? 'medium' : 'low';
return {
type: 'scrollUp',
2025-12-08 18:58:13 +05:30
selector: null,
confidence: confidence
};
}
return {
type: '',
selector: null,
2025-12-08 18:58:13 +05:30
confidence: 'low',
debug: {
clickableElementsCount: clickableElements.length,
nextCandidatesCount: nextButtonCandidates.length,
topNextCandidates: nextButtonCandidates.slice(0, 3).map(c => ({
score: c.score,
text: c.text,
tag: c.tag,
reasons: c.reasons
})),
finalScores: {
loadMore: loadMoreScore,
next: nextButtonScore,
2025-12-08 18:58:13 +05:30
prev: prevButtonScore,
infiniteScroll: infiniteScrollScore
}
}
};
} catch (error) {
return {
type: '',
selector: null,
2025-12-08 18:58:13 +05:30
confidence: 'low',
error: error.message,
debug: 'Exception thrown: ' + error.message
};
}
};
/**
* Analyze element groups on the page
* Returns grouped elements with their structural fingerprints
*/
window.analyzeElementGroups = function() {
try {
const normalizeClasses = (classList) => {
return Array.from(classList)
.filter((cls) => {
return (
!cls.match(/\d{3,}|uuid|hash|id-|_\d+$/i) &&
!cls.startsWith('_ngcontent-') &&
!cls.startsWith('_nghost-') &&
!cls.match(/^ng-tns-c\d+-\d+$/)
);
})
.sort()
.join(' ');
};
const getStructuralFingerprint = (element) => {
if (element.nodeType !== Node.ELEMENT_NODE) return null;
const tagName = element.tagName.toLowerCase();
const isCustomElement = tagName.includes('-');
const standardExcludeSelectors = ['script', 'style', 'meta', 'link', 'title', 'head'];
if (!isCustomElement && standardExcludeSelectors.includes(tagName)) {
return null;
}
const children = Array.from(element.children);
let childrenStructureString;
if (tagName === 'table') {
const thead = element.querySelector('thead');
const representativeRow = thead ? thead.querySelector('tr') : element.querySelector('tr');
if (representativeRow) {
const structure = Array.from(representativeRow.children).map(child => ({
tag: child.tagName.toLowerCase(),
classes: normalizeClasses(child.classList),
}));
childrenStructureString = JSON.stringify(structure);
} else {
childrenStructureString = JSON.stringify([]);
}
} else if (tagName === 'tr') {
const structure = children.map((child) => ({
tag: child.tagName.toLowerCase(),
classes: normalizeClasses(child.classList),
}));
childrenStructureString = JSON.stringify(structure);
} else {
const structure = children.map((child) => ({
tag: child.tagName.toLowerCase(),
classes: normalizeClasses(child.classList),
hasText: (child.textContent ?? '').trim().length > 0,
}));
childrenStructureString = JSON.stringify(structure);
}
const normalizedClasses = normalizeClasses(element.classList);
const relevantAttributes = Array.from(element.attributes)
.filter((attr) => {
if (isCustomElement) {
return !['id', 'style', 'data-reactid', 'data-react-checksum'].includes(attr.name.toLowerCase());
} else {
return (
!['id', 'style', 'data-reactid', 'data-react-checksum'].includes(attr.name.toLowerCase()) &&
(!attr.name.startsWith('data-') || attr.name === 'data-type' || attr.name === 'data-role')
);
}
})
.map((attr) => `${attr.name}=${attr.value}`)
.sort();
let depth = 0;
let parent = element.parentElement;
while (parent && depth < 20) {
depth++;
parent = parent.parentElement;
}
const textContent = (element.textContent ?? '').trim();
const textCharacteristics = {
hasText: textContent.length > 0,
textLength: Math.floor(textContent.length / 20) * 20,
hasLinks: element.querySelectorAll('a').length,
hasImages: element.querySelectorAll('img').length,
hasButtons: element.querySelectorAll('button, input[type="button"], input[type="submit"]').length,
};
const signature = `${tagName}::${normalizedClasses}::${children.length}::${childrenStructureString}::${relevantAttributes.join('|')}`;
return {
tagName,
normalizedClasses,
childrenCount: children.length,
childrenStructure: childrenStructureString,
attributes: relevantAttributes.join('|'),
depth,
textCharacteristics,
signature,
};
};
const calculateSimilarity = (fp1, fp2) => {
if (!fp1 || !fp2) return 0;
let score = 0;
let maxScore = 0;
maxScore += 10;
if (fp1.tagName === fp2.tagName) score += 10;
else return 0;
maxScore += 8;
if (fp1.normalizedClasses === fp2.normalizedClasses) score += 8;
else if (fp1.normalizedClasses && fp2.normalizedClasses) {
const classes1 = fp1.normalizedClasses.split(' ').filter((c) => c);
const classes2 = fp2.normalizedClasses.split(' ').filter((c) => c);
const commonClasses = classes1.filter((c) => classes2.includes(c));
if (classes1.length > 0 && classes2.length > 0) {
score += (commonClasses.length / Math.max(classes1.length, classes2.length)) * 8;
}
}
maxScore += 8;
if (fp1.childrenStructure === fp2.childrenStructure) score += 8;
else if (fp1.childrenCount === fp2.childrenCount) score += 4;
maxScore += 5;
if (fp1.attributes === fp2.attributes) score += 5;
else if (fp1.attributes && fp2.attributes) {
const attrs1 = fp1.attributes.split('|').filter((a) => a);
const attrs2 = fp2.attributes.split('|').filter((a) => a);
const commonAttrs = attrs1.filter((a) => attrs2.includes(a));
if (attrs1.length > 0 && attrs2.length > 0) {
score += (commonAttrs.length / Math.max(attrs1.length, attrs2.length)) * 5;
}
}
maxScore += 2;
if (Math.abs(fp1.depth - fp2.depth) <= 1) score += 2;
else if (Math.abs(fp1.depth - fp2.depth) <= 2) score += 1;
maxScore += 3;
const tc1 = fp1.textCharacteristics;
const tc2 = fp2.textCharacteristics;
if (tc1.hasText === tc2.hasText) score += 1;
if (Math.abs(tc1.textLength - tc2.textLength) <= 40) score += 1;
if (tc1.hasLinks === tc2.hasLinks && tc1.hasImages === tc2.hasImages) score += 1;
return maxScore > 0 ? score / maxScore : 0;
};
2025-12-11 17:41:39 +05:30
const hasAnyMeaningfulChildren = (element) => {
const meaningfulChildren = [];
const traverse = (el, depth) => {
if (depth === undefined) depth = 0;
if (depth > 5) return;
Array.from(el.children).forEach(function(child) {
const tagName = child.tagName.toLowerCase();
if (tagName === 'img' && child.hasAttribute('src')) {
meaningfulChildren.push(child);
return;
}
if (tagName === 'a' && child.hasAttribute('href')) {
meaningfulChildren.push(child);
return;
}
const text = (child.textContent || '').trim();
const hasVisibleText = text.length > 0;
if (hasVisibleText || child.querySelector('svg')) {
meaningfulChildren.push(child);
return;
}
if (child.children.length > 0) {
traverse(child, depth + 1);
}
});
if (el.shadowRoot) {
Array.from(el.shadowRoot.children).forEach(function(shadowChild) {
const tagName = shadowChild.tagName.toLowerCase();
if (tagName === 'img' && shadowChild.hasAttribute('src')) {
meaningfulChildren.push(shadowChild);
return;
}
if (tagName === 'a' && shadowChild.hasAttribute('href')) {
meaningfulChildren.push(shadowChild);
return;
}
const text = (shadowChild.textContent || '').trim();
const hasVisibleText = text.length > 0;
if (hasVisibleText || shadowChild.querySelector('svg')) {
meaningfulChildren.push(shadowChild);
return;
}
if (shadowChild.children.length > 0) {
traverse(shadowChild, depth + 1);
}
});
}
};
traverse(element);
return meaningfulChildren.length > 0;
};
const getAllVisibleElements = () => {
const allElements = [];
const visited = new Set();
const traverseContainer = (container) => {
try {
const elements = Array.from(container.querySelectorAll('*')).filter((el) => {
const rect = el.getBoundingClientRect();
return rect.width > 0 && rect.height > 0;
});
elements.forEach((element) => {
if (!visited.has(element)) {
visited.add(element);
allElements.push(element);
if (element.shadowRoot) {
traverseContainer(element.shadowRoot);
}
}
});
} catch (error) {
console.warn('Error traversing container:', error);
}
};
traverseContainer(document);
return allElements;
};
const allElements = getAllVisibleElements();
const processedInTables = new Set();
const elementGroups = new Map();
const groupedElements = new Set();
// Group table rows
const tables = allElements.filter(el => el.tagName === 'TABLE');
tables.forEach(table => {
const rows = Array.from(table.querySelectorAll('tbody > tr')).filter(row => {
const parent = row.parentElement;
if (!parent || !table.contains(parent)) return false;
const rect = row.getBoundingClientRect();
return rect.width > 0 && rect.height > 0;
});
if (rows.length >= 2) {
const representativeFingerprint = getStructuralFingerprint(rows[0]);
if (!representativeFingerprint) return;
const group = {
elements: rows,
fingerprint: representativeFingerprint,
representative: rows[0],
};
rows.forEach(row => {
elementGroups.set(row, group);
groupedElements.add(row);
processedInTables.add(row);
});
}
});
// Group other elements
const remainingElements = allElements.filter(el => !processedInTables.has(el));
const elementFingerprints = new Map();
remainingElements.forEach((element) => {
const fingerprint = getStructuralFingerprint(element);
if (fingerprint) {
elementFingerprints.set(element, fingerprint);
}
});
const processedElements = new Set();
const similarityThreshold = 0.7;
const minGroupSize = 2;
const maxParentLevels = 5;
elementFingerprints.forEach((fingerprint, element) => {
if (processedElements.has(element)) return;
const currentGroup = [element];
processedElements.add(element);
elementFingerprints.forEach((otherFingerprint, otherElement) => {
if (processedElements.has(otherElement)) return;
const similarity = calculateSimilarity(fingerprint, otherFingerprint);
if (similarity >= similarityThreshold) {
currentGroup.push(otherElement);
processedElements.add(otherElement);
}
});
2025-12-11 17:41:39 +05:30
if (currentGroup.length >= minGroupSize && hasAnyMeaningfulChildren(element)) {
let grouped = false;
for (let level = 1; level <= maxParentLevels && !grouped; level++) {
let ancestor = currentGroup[0];
for (let i = 0; i < level && ancestor; i++) {
ancestor = ancestor.parentElement;
}
if (!ancestor) break;
const allShareAncestor = currentGroup.every(el => {
let elAncestor = el;
for (let i = 0; i < level && elAncestor; i++) {
elAncestor = elAncestor.parentElement;
}
return elAncestor === ancestor;
});
if (allShareAncestor) {
const group = {
elements: currentGroup,
fingerprint,
representative: element,
};
currentGroup.forEach((el) => {
elementGroups.set(el, group);
groupedElements.add(el);
});
grouped = true;
}
}
2025-12-11 17:41:39 +05:30
if (!grouped) {
currentGroup.forEach((el, idx) => {
if (idx > 0) processedElements.delete(el);
});
}
}
});
// Convert to serializable format with XPath
const uniqueGroups = new Map();
elementGroups.forEach((group) => {
const signature = group.fingerprint.signature;
if (!uniqueGroups.has(signature)) {
const tagName = group.fingerprint.tagName;
const classes = group.fingerprint.normalizedClasses.split(' ').filter(Boolean);
let xpath = `//${tagName}`;
if (classes.length > 0) {
const classConditions = classes.map(cls => `contains(@class, '${cls}')`).join(' and ');
xpath += `[${classConditions}]`;
}
// Get sample innerText from first 3 elements
const sampleTexts = group.elements.slice(0, 3).map((el) => {
return (el.textContent || '').trim().substring(0, 200);
});
// Get sample HTML structure
const sampleHTML = group.representative.outerHTML.substring(0, 500);
uniqueGroups.set(signature, {
fingerprint: group.fingerprint,
count: group.elements.length,
xpath: xpath,
sampleTexts: sampleTexts,
sampleHTML: sampleHTML,
});
}
});
return Array.from(uniqueGroups.values());
} catch (error) {
console.error('[analyzeElementGroups] Error:', error);
return [];
}
};
})();