Merge pull request #904 from getmaxun/auto-pag

feat: add auto pagination detection
This commit is contained in:
Karishma Shukla
2025-11-30 19:13:48 +05:30
committed by GitHub
6 changed files with 1149 additions and 9 deletions

View File

@@ -1242,6 +1242,29 @@ export const BrowserWindow = () => {
}
}, [browserSteps, getList, listSelector, initialAutoFieldIds, currentListActionId, manuallyAddedFieldIds]);
useEffect(() => {
if (currentListActionId && browserSteps.length > 0) {
const activeStep = browserSteps.find(
s => s.type === 'list' && s.actionId === currentListActionId
) as ListStep | undefined;
if (activeStep) {
if (currentListId !== activeStep.id) {
setCurrentListId(activeStep.id);
}
if (listSelector !== activeStep.listSelector) {
setListSelector(activeStep.listSelector);
}
if (JSON.stringify(fields) !== JSON.stringify(activeStep.fields)) {
setFields(activeStep.fields);
}
if (activeStep.pagination?.selector && paginationSelector !== activeStep.pagination.selector) {
setPaginationSelector(activeStep.pagination.selector);
}
}
}
}, [currentListActionId, browserSteps, currentListId, listSelector, fields, paginationSelector]);
useEffect(() => {
if (!isDOMMode) {
capturedElementHighlighter.clearHighlights();
@@ -1637,6 +1660,22 @@ export const BrowserWindow = () => {
paginationType !== "scrollUp" &&
paginationType !== "none"
) {
let targetListId = currentListId;
let targetFields = fields;
if ((!targetListId || targetListId === 0) && currentListActionId) {
const activeStep = browserSteps.find(
s => s.type === 'list' && s.actionId === currentListActionId
) as ListStep | undefined;
if (activeStep) {
targetListId = activeStep.id;
if (Object.keys(targetFields).length === 0 && Object.keys(activeStep.fields).length > 0) {
targetFields = activeStep.fields;
}
}
}
setPaginationSelector(highlighterData.selector);
notify(
`info`,
@@ -1646,8 +1685,8 @@ export const BrowserWindow = () => {
);
addListStep(
listSelector!,
fields,
currentListId || 0,
targetFields,
targetListId || 0,
currentListActionId || `list-${crypto.randomUUID()}`,
{
type: paginationType,
@@ -1812,6 +1851,8 @@ export const BrowserWindow = () => {
socket,
t,
paginationSelector,
highlighterData,
browserSteps
]
);
@@ -1864,6 +1905,22 @@ export const BrowserWindow = () => {
paginationType !== "scrollUp" &&
paginationType !== "none"
) {
let targetListId = currentListId;
let targetFields = fields;
if ((!targetListId || targetListId === 0) && currentListActionId) {
const activeStep = browserSteps.find(
s => s.type === 'list' && s.actionId === currentListActionId
) as ListStep | undefined;
if (activeStep) {
targetListId = activeStep.id;
if (Object.keys(targetFields).length === 0 && Object.keys(activeStep.fields).length > 0) {
targetFields = activeStep.fields;
}
}
}
setPaginationSelector(highlighterData.selector);
notify(
`info`,
@@ -1873,8 +1930,8 @@ export const BrowserWindow = () => {
);
addListStep(
listSelector!,
fields,
currentListId || 0,
targetFields,
targetListId || 0,
currentListActionId || `list-${crypto.randomUUID()}`,
{ type: paginationType, selector: highlighterData.selector, isShadow: highlighterData.isShadow },
undefined,
@@ -2046,6 +2103,31 @@ export const BrowserWindow = () => {
}
}, [paginationMode, resetPaginationSelector]);
useEffect(() => {
if (paginationMode && currentListActionId) {
const currentListStep = browserSteps.find(
step => step.type === 'list' && step.actionId === currentListActionId
) as (ListStep & { type: 'list' }) | undefined;
const currentSelector = currentListStep?.pagination?.selector;
const currentType = currentListStep?.pagination?.type;
if (['clickNext', 'clickLoadMore'].includes(paginationType)) {
if (!currentSelector || (currentType && currentType !== paginationType)) {
setPaginationSelector('');
}
}
const stepSelector = currentListStep?.pagination?.selector;
if (stepSelector && !paginationSelector) {
setPaginationSelector(stepSelector);
} else if (!stepSelector && paginationSelector) {
setPaginationSelector('');
}
}
}, [browserSteps, paginationMode, currentListActionId, paginationSelector]);
return (
<div
onClick={handleClick}
@@ -2310,6 +2392,7 @@ export const BrowserWindow = () => {
listSelector={listSelector}
cachedChildSelectors={cachedChildSelectors}
paginationMode={paginationMode}
paginationSelector={paginationSelector}
paginationType={paginationType}
limitMode={limitMode}
isCachingChildSelectors={isCachingChildSelectors}

View File

@@ -100,6 +100,7 @@ interface RRWebDOMBrowserRendererProps {
listSelector?: string | null;
cachedChildSelectors?: string[];
paginationMode?: boolean;
paginationSelector?: string;
paginationType?: string;
limitMode?: boolean;
isCachingChildSelectors?: boolean;
@@ -153,6 +154,7 @@ export const DOMBrowserRenderer: React.FC<RRWebDOMBrowserRendererProps> = ({
listSelector = null,
cachedChildSelectors = [],
paginationMode = false,
paginationSelector = "",
paginationType = "",
limitMode = false,
isCachingChildSelectors = false,
@@ -257,6 +259,13 @@ export const DOMBrowserRenderer: React.FC<RRWebDOMBrowserRendererProps> = ({
else if (listSelector) {
if (limitMode) {
shouldHighlight = false;
} else if (
paginationMode &&
paginationSelector &&
paginationType !== "" &&
!["none", "scrollDown", "scrollUp"].includes(paginationType)
) {
shouldHighlight = false;
} else if (
paginationMode &&
paginationType !== "" &&

View File

@@ -1,4 +1,4 @@
import React, { useState, useCallback, useEffect, useMemo } from 'react';
import React, { useState, useCallback, useEffect, useRef, useMemo } from 'react';
import { Button, Paper, Box, TextField, IconButton, Tooltip } from "@mui/material";
import { WorkflowFile } from "maxun-core";
import Typography from "@mui/material/Typography";
@@ -15,9 +15,9 @@ import ActionDescriptionBox from '../action/ActionDescriptionBox';
import { useThemeMode } from '../../context/theme-provider';
import { useTranslation } from 'react-i18next';
import { useBrowserDimensionsStore } from '../../context/browserDimensions';
import { emptyWorkflow } from '../../shared/constants';
import { clientListExtractor } from '../../helpers/clientListExtractor';
import { clientSelectorGenerator } from '../../helpers/clientSelectorGenerator';
import { clientPaginationDetector } from '../../helpers/clientPaginationDetector';
const fetchWorkflow = (id: string, callback: (response: WorkflowFile) => void) => {
getActiveWorkflow(id).then(
@@ -45,6 +45,13 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
const [showCaptureText, setShowCaptureText] = useState(true);
const { panelHeight } = useBrowserDimensionsStore();
const [autoDetectedPagination, setAutoDetectedPagination] = useState<{
type: PaginationType;
selector: string | null;
confidence: 'high' | 'medium' | 'low';
} | null>(null);
const autoDetectionRunRef = useRef<string | null>(null);
const { lastAction, notify, currentWorkflowActionsState, setCurrentWorkflowActionsState, resetInterpretationLog, currentListActionId, setCurrentListActionId, currentTextActionId, setCurrentTextActionId, currentScreenshotActionId, setCurrentScreenshotActionId, isDOMMode, setIsDOMMode, currentSnapshot, setCurrentSnapshot, updateDOMMode, initialUrl, setRecordingUrl, currentTextGroupName } = useGlobalInfoStore();
const {
getText, startGetText, stopGetText,
@@ -62,7 +69,7 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
startAction, finishAction
} = useActionContext();
const { browserSteps, updateBrowserTextStepLabel, deleteBrowserStep, addScreenshotStep, updateListTextFieldLabel, removeListTextField, updateListStepLimit, deleteStepsByActionId, updateListStepData, updateScreenshotStepData, emitActionForStep } = useBrowserSteps();
const { browserSteps, addScreenshotStep, updateListStepLimit, updateListStepPagination, deleteStepsByActionId, updateListStepData, updateScreenshotStepData, emitActionForStep } = useBrowserSteps();
const { id, socket } = useSocketStore();
const { t } = useTranslation();
@@ -72,6 +79,73 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
setWorkflow(data);
}, [setWorkflow]);
useEffect(() => {
if (!paginationType || !currentListActionId) return;
const currentListStep = browserSteps.find(
step => step.type === 'list' && step.actionId === currentListActionId
) as (BrowserStep & { type: 'list' }) | undefined;
const currentSelector = currentListStep?.pagination?.selector;
const currentType = currentListStep?.pagination?.type;
if (['clickNext', 'clickLoadMore'].includes(paginationType)) {
const needsSelector = !currentSelector && !currentType;
const typeChanged = currentType && currentType !== paginationType;
if (typeChanged) {
const iframeElement = document.querySelector('#browser-window iframe') as HTMLIFrameElement;
if (iframeElement?.contentDocument && currentSelector) {
try {
function evaluateSelector(selector: string, doc: Document): Element[] {
if (selector.startsWith('//') || selector.startsWith('(//')) {
try {
const result = doc.evaluate(selector, doc, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
const elements: Element[] = [];
for (let i = 0; i < result.snapshotLength; i++) {
const node = result.snapshotItem(i);
if (node && node.nodeType === Node.ELEMENT_NODE) {
elements.push(node as Element);
}
}
return elements;
} catch (err) {
return [];
}
} else {
try {
return Array.from(doc.querySelectorAll(selector));
} catch (err) {
return [];
}
}
}
const elements = evaluateSelector(currentSelector, iframeElement.contentDocument);
elements.forEach((el: Element) => {
(el as HTMLElement).style.outline = '';
(el as HTMLElement).style.outlineOffset = '';
(el as HTMLElement).style.zIndex = '';
});
} catch (error) {
console.error('Error removing pagination highlight:', error);
}
}
if (currentListStep) {
updateListStepPagination(currentListStep.id, {
type: paginationType,
selector: null,
});
}
startPaginationMode();
} else if (needsSelector) {
startPaginationMode();
}
}
}, [paginationType, currentListActionId, browserSteps, updateListStepPagination, startPaginationMode]);
useEffect(() => {
if (socket) {
const domModeHandler = (data: any) => {
@@ -391,7 +465,182 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
return;
}
startPaginationMode();
const currentListStepForAutoDetect = browserSteps.find(
step => step.type === 'list' && step.actionId === currentListActionId
) as (BrowserStep & { type: 'list'; listSelector?: string }) | undefined;
if (currentListStepForAutoDetect?.listSelector) {
if (autoDetectionRunRef.current !== currentListActionId) {
autoDetectionRunRef.current = currentListActionId;
notify('info', 'Detecting pagination...');
try {
socket?.emit('testPaginationScroll', {
listSelector: currentListStepForAutoDetect.listSelector
});
const handleScrollTestResult = (result: any) => {
if (result.success && result.contentLoaded) {
setAutoDetectedPagination({
type: 'scrollDown',
selector: null,
confidence: 'high'
});
updatePaginationType('scrollDown');
const latestListStep = browserSteps.find(
step => step.type === 'list' && step.actionId === currentListActionId
);
if (latestListStep) {
updateListStepPagination(latestListStep.id, {
type: 'scrollDown',
selector: null,
isShadow: false
});
}
} else if (result.success && !result.contentLoaded) {
const iframeElement = document.querySelector('#browser-window iframe') as HTMLIFrameElement;
const iframeDoc = iframeElement?.contentDocument;
if (iframeDoc) {
const detectionResult = clientPaginationDetector.autoDetectPagination(
iframeDoc,
currentListStepForAutoDetect.listSelector!,
clientSelectorGenerator,
{ disableScrollDetection: true }
);
if (detectionResult.type) {
setAutoDetectedPagination({
type: detectionResult.type,
selector: detectionResult.selector,
confidence: detectionResult.confidence
});
const latestListStep = browserSteps.find(
step => step.type === 'list' && step.actionId === currentListActionId
);
if (latestListStep) {
updateListStepPagination(latestListStep.id, {
type: detectionResult.type,
selector: detectionResult.selector,
isShadow: false
});
}
updatePaginationType(detectionResult.type);
if (detectionResult.selector && (detectionResult.type === 'clickNext' || detectionResult.type === 'clickLoadMore')) {
try {
function evaluateSelector(selector: string, doc: Document): Element[] {
try {
const isXPath = selector.startsWith('//') || selector.startsWith('(//');
if (isXPath) {
const result = doc.evaluate(
selector,
doc,
null,
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE,
null
);
const elements: Element[] = [];
for (let i = 0; i < result.snapshotLength; i++) {
const node = result.snapshotItem(i);
if (node && node.nodeType === Node.ELEMENT_NODE) {
elements.push(node as Element);
}
}
return elements;
} else {
try {
const allElements = Array.from(doc.querySelectorAll(selector));
if (allElements.length > 0) {
return allElements;
}
} catch (err) {
console.warn('[RightSidePanel] Full chained selector failed, trying individual selectors:', err);
}
const selectorParts = selector.split(',');
for (const part of selectorParts) {
try {
const elements = Array.from(doc.querySelectorAll(part.trim()));
if (elements.length > 0) {
return elements;
}
} catch (err) {
console.warn('[RightSidePanel] Selector part failed:', part.trim(), err);
continue;
}
}
return [];
}
} catch (err) {
console.error('[RightSidePanel] Selector evaluation failed:', selector, err);
return [];
}
}
const elements = evaluateSelector(detectionResult.selector, iframeDoc);
if (elements.length > 0) {
elements.forEach((el: Element) => {
(el as HTMLElement).style.outline = '3px dashed #ff00c3';
(el as HTMLElement).style.outlineOffset = '2px';
(el as HTMLElement).style.zIndex = '9999';
});
const firstElement = elements[0] as HTMLElement;
const elementRect = firstElement.getBoundingClientRect();
const iframeWindow = iframeElement.contentWindow;
if (iframeWindow) {
const targetY = elementRect.top + iframeWindow.scrollY - (iframeWindow.innerHeight / 2) + (elementRect.height / 2);
iframeWindow.scrollTo({ top: targetY, behavior: 'smooth' });
}
const paginationTypeLabel = detectionResult.type === 'clickNext' ? 'Next Button' : 'Load More Button';
notify('info', `${paginationTypeLabel} has been auto-detected and highlighted on the page`);
} else {
console.warn(' No elements found for selector:', detectionResult.selector);
}
} catch (error) {
console.error('Error highlighting pagination button:', error);
}
}
} else {
setAutoDetectedPagination(null);
}
}
} else {
console.error('Scroll test failed:', result.error);
setAutoDetectedPagination(null);
}
socket?.off('paginationScrollTestResult', handleScrollTestResult);
};
socket?.on('paginationScrollTestResult', handleScrollTestResult);
setTimeout(() => {
socket?.off('paginationScrollTestResult', handleScrollTestResult);
}, 5000);
} catch (error) {
console.error('Scroll test failed:', error);
setAutoDetectedPagination(null);
}
}
}
const shouldSkipPaginationMode = autoDetectedPagination && (
['scrollDown', 'scrollUp'].includes(autoDetectedPagination.type) ||
(['clickNext', 'clickLoadMore'].includes(autoDetectedPagination.type) && autoDetectedPagination.selector)
);
if (!shouldSkipPaginationMode) {
startPaginationMode();
}
setShowPaginationOptions(true);
setCaptureStage('pagination');
break;
@@ -460,6 +709,7 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
case 'pagination':
stopPaginationMode();
setShowPaginationOptions(false);
setAutoDetectedPagination(null);
setCaptureStage('initial');
break;
}
@@ -495,17 +745,58 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
socket.emit('removeAction', { actionId: currentListActionId });
}
}
if (autoDetectedPagination?.selector) {
const iframeElement = document.querySelector('#browser-window iframe') as HTMLIFrameElement;
if (iframeElement?.contentDocument) {
try {
function evaluateSelector(selector: string, doc: Document): Element[] {
if (selector.startsWith('//') || selector.startsWith('(//')) {
try {
const result = doc.evaluate(selector, doc, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
const elements: Element[] = [];
for (let i = 0; i < result.snapshotLength; i++) {
const node = result.snapshotItem(i);
if (node && node.nodeType === Node.ELEMENT_NODE) {
elements.push(node as Element);
}
}
return elements;
} catch (err) {
return [];
}
} else {
try {
return Array.from(doc.querySelectorAll(selector));
} catch (err) {
return [];
}
}
}
const elements = evaluateSelector(autoDetectedPagination.selector, iframeElement.contentDocument);
elements.forEach((el: Element) => {
(el as HTMLElement).style.outline = '';
(el as HTMLElement).style.outlineOffset = '';
(el as HTMLElement).style.zIndex = '';
});
} catch (error) {
console.error('Error removing pagination highlight on discard:', error);
}
}
}
resetListState();
stopPaginationMode();
stopLimitMode();
setShowPaginationOptions(false);
setShowLimitOptions(false);
setAutoDetectedPagination(null);
setCaptureStage('initial');
setCurrentListActionId('');
clientSelectorGenerator.cleanup();
notify('error', t('right_panel.errors.capture_list_discarded'));
}, [currentListActionId, browserSteps, stopGetList, deleteStepsByActionId, resetListState, setShowPaginationOptions, setShowLimitOptions, setCaptureStage, notify, t, stopPaginationMode, stopLimitMode, socket]);
}, [currentListActionId, browserSteps, stopGetList, deleteStepsByActionId, resetListState, setShowPaginationOptions, setShowLimitOptions, setCaptureStage, notify, t, stopPaginationMode, stopLimitMode, socket, autoDetectedPagination]);
const captureScreenshot = (fullPage: boolean) => {
const screenshotCount = browserSteps.filter(s => s.type === 'screenshot').length + 1;
@@ -615,6 +906,114 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
{showPaginationOptions && (
<Box display="flex" flexDirection="column" gap={2} style={{ margin: '13px' }}>
<Typography>{t('right_panel.pagination.title')}</Typography>
{autoDetectedPagination && autoDetectedPagination.type !== '' && (
<Box
sx={{
p: 2,
mb: 1,
borderRadius: '8px',
backgroundColor: isDarkMode ? '#1a3a1a' : '#e8f5e9',
border: `1px solid ${isDarkMode ? '#2e7d32' : '#4caf50'}`,
}}
>
<Typography
variant="body2"
sx={{
color: isDarkMode ? '#81c784' : '#2e7d32',
fontWeight: 'bold',
mb: 0.5
}}
>
Auto-detected: {
autoDetectedPagination.type === 'clickNext' ? 'Click Next' :
autoDetectedPagination.type === 'clickLoadMore' ? 'Click Load More' :
autoDetectedPagination.type === 'scrollDown' ? 'Scroll Down' :
autoDetectedPagination.type === 'scrollUp' ? 'Scroll Up' :
autoDetectedPagination.type
}
</Typography>
<Typography
variant="caption"
sx={{
color: isDarkMode ? '#a5d6a7' : '#388e3c',
display: 'block',
mb: 1
}}
>
You can continue with this or manually select a different pagination type below.
</Typography>
{autoDetectedPagination.selector && ['clickNext', 'clickLoadMore'].includes(autoDetectedPagination.type) && (
<Button
size="small"
variant="outlined"
onClick={() => {
const currentListStep = browserSteps.find(
step => step.type === 'list' && step.actionId === currentListActionId
) as (BrowserStep & { type: 'list' }) | undefined;
if (currentListStep) {
const iframeElement = document.querySelector('#browser-window iframe') as HTMLIFrameElement;
if (iframeElement?.contentDocument && autoDetectedPagination.selector) {
try {
function evaluateSelector(selector: string, doc: Document): Element[] {
if (selector.startsWith('//') || selector.startsWith('(//')) {
try {
const result = doc.evaluate(selector, doc, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
const elements: Element[] = [];
for (let i = 0; i < result.snapshotLength; i++) {
const node = result.snapshotItem(i);
if (node && node.nodeType === Node.ELEMENT_NODE) {
elements.push(node as Element);
}
}
return elements;
} catch (err) {
return [];
}
} else {
try {
return Array.from(doc.querySelectorAll(selector));
} catch (err) {
return [];
}
}
}
const elements = evaluateSelector(autoDetectedPagination.selector, iframeElement.contentDocument);
elements.forEach((el: Element) => {
(el as HTMLElement).style.outline = '';
(el as HTMLElement).style.outlineOffset = '';
(el as HTMLElement).style.zIndex = '';
});
} catch (error) {
console.error('Error removing pagination highlight:', error);
}
}
updateListStepPagination(currentListStep.id, {
type: autoDetectedPagination.type,
selector: null,
});
startPaginationMode();
notify('info', 'Please select a different pagination element');
}
}}
sx={{
color: isDarkMode ? '#81c784' : '#2e7d32',
borderColor: isDarkMode ? '#81c784' : '#2e7d32',
'&:hover': {
borderColor: isDarkMode ? '#a5d6a7' : '#4caf50',
backgroundColor: isDarkMode ? '#1a3a1a' : '#f1f8f4',
}
}}
>
Choose Different Element
</Button>
)}
</Box>
)}
<Button
variant={paginationType === 'clickNext' ? "contained" : "outlined"}
onClick={() => handlePaginationSettingSelect('clickNext')}

View File

@@ -80,6 +80,7 @@ interface BrowserStepsContextType {
newLabel: string
) => void;
updateListStepLimit: (listId: number, limit: number) => void;
updateListStepPagination: (listId: number, pagination: { type: string; selector: string | null; isShadow?: boolean }) => void;
updateListStepData: (listId: number, extractedData: any[]) => void;
updateListStepName: (listId: number, name: string) => void;
updateScreenshotStepName: (id: number, name: string) => void;
@@ -479,6 +480,26 @@ export const BrowserStepsProvider: React.FC<{ children: React.ReactNode }> = ({
);
};
const updateListStepPagination = (
listId: number,
pagination: { type: string; selector: string | null; isShadow?: boolean }
) => {
setBrowserSteps((prevSteps) =>
prevSteps.map((step) => {
if (step.type === "list" && step.id === listId) {
return {
...step,
pagination: {
...pagination,
selector: pagination.selector || "",
},
};
}
return step;
})
);
};
const updateListStepName = (listId: number, name: string) => {
setBrowserSteps((prevSteps) =>
prevSteps.map((step) => {
@@ -533,6 +554,7 @@ export const BrowserStepsProvider: React.FC<{ children: React.ReactNode }> = ({
updateBrowserTextStepLabel,
updateListTextFieldLabel,
updateListStepLimit,
updateListStepPagination,
updateListStepData,
updateListStepName,
updateScreenshotStepName,

View File

@@ -0,0 +1,586 @@
/**
* Client-Side Pagination Auto-Detection
* Detects pagination type and selector for list extraction
* Operates on passed document object (works in DOM mode / iframe)
*/
import type { ClientSelectorGenerator } from './clientSelectorGenerator';
export type PaginationDetectionResult = {
type: 'scrollDown' | 'scrollUp' | 'clickNext' | 'clickLoadMore' | '';
selector: string | null;
confidence: 'high' | 'medium' | 'low';
debug?: any;
};
class ClientPaginationDetector {
/**
* Auto-detect pagination on a page
* @param doc - The document object to analyze (can be iframe document)
* @param listSelector - The selector for the list container
* @param options - Optional detection options
* @returns Pagination detection result
*/
autoDetectPagination(
doc: Document,
listSelector: string,
selectorGenerator: ClientSelectorGenerator,
options?: { disableScrollDetection?: boolean }
): PaginationDetectionResult {
try {
const listElements = this.evaluateSelector(listSelector, doc);
if (listElements.length === 0) {
return { type: '', selector: null, confidence: 'low', debug: 'No list elements found' };
}
const listContainer = listElements[0];
const nextButtonPatterns = [
/next/i,
/\bnext\s+page\b/i,
/page\s+suivante/i,
/siguiente/i,
/weiter/i,
/>>||→|»|⟩/,
/\bforward\b/i,
/\bnewer\b/i,
/\bolder\b/i
];
const loadMorePatterns = [
/load\s+more/i,
/show\s+more/i,
/view\s+more/i,
/see\s+more/i,
/more\s+results/i,
/plus\s+de\s+résultats/i,
/más\s+resultados/i,
/weitere\s+ergebnisse/i
];
const prevButtonPatterns = [
/prev/i,
/previous/i,
/<<||←|«/,
/\bback\b/i
];
const clickableElements = this.getClickableElements(doc);
let nextButton: HTMLElement | null = null;
let nextButtonScore = 0;
const nextButtonCandidates: any[] = [];
for (const element of clickableElements) {
if (!this.isVisible(element)) continue;
const text = (element.textContent || '').trim();
const ariaLabel = element.getAttribute('aria-label') || '';
const title = element.getAttribute('title') || '';
const combinedText = `${text} ${ariaLabel} ${title}`;
let score = 0;
const reasons: string[] = [];
if (this.matchesAnyPattern(combinedText, nextButtonPatterns)) {
score += 10;
reasons.push('text match (+10)');
}
if (this.isNearList(element, listContainer)) {
score += 5;
reasons.push('near list (+5)');
}
if (element.tagName === 'BUTTON') {
score += 2;
reasons.push('button tag (+2)');
}
const className = element.className || '';
if (/pagination|next|forward/i.test(className)) {
score += 3;
reasons.push('pagination class (+3)');
}
if (score > 0) {
nextButtonCandidates.push({
element: element,
score: score,
text: text.substring(0, 50),
ariaLabel: ariaLabel,
tag: element.tagName,
className: className,
reasons: reasons
});
}
if (score > nextButtonScore) {
nextButtonScore = score;
nextButton = element;
}
}
let loadMoreButton: HTMLElement | null = null;
let loadMoreScore = 0;
for (const element of clickableElements) {
if (!this.isVisible(element)) continue;
const text = (element.textContent || '').trim();
const ariaLabel = element.getAttribute('aria-label') || '';
const title = element.getAttribute('title') || '';
const combinedText = `${text} ${ariaLabel} ${title}`;
let score = 0;
if (this.matchesAnyPattern(combinedText, loadMorePatterns)) {
score += 10;
}
if (this.isNearList(element, listContainer)) {
score += 5;
}
if (element.tagName === 'BUTTON') {
score += 2;
}
if (score > loadMoreScore) {
loadMoreScore = score;
loadMoreButton = element;
}
}
let prevButton: HTMLElement | null = null;
let prevButtonScore = 0;
for (const element of clickableElements) {
if (!this.isVisible(element)) continue;
const text = (element.textContent || '').trim();
const ariaLabel = element.getAttribute('aria-label') || '';
const title = element.getAttribute('title') || '';
const combinedText = `${text} ${ariaLabel} ${title}`;
let score = 0;
if (this.matchesAnyPattern(combinedText, prevButtonPatterns)) {
score += 10;
}
if (this.isNearList(element, listContainer)) {
score += 5;
}
if (score > prevButtonScore) {
prevButtonScore = score;
prevButton = element;
}
}
const infiniteScrollScore = options?.disableScrollDetection
? 0
: this.detectInfiniteScrollIndicators(doc, listElements, listContainer);
const hasStrongInfiniteScrollSignals = infiniteScrollScore >= 8;
const hasMediumInfiniteScrollSignals = infiniteScrollScore >= 5 && infiniteScrollScore < 8;
if (hasStrongInfiniteScrollSignals) {
const confidence = infiniteScrollScore >= 12 ? 'high' : infiniteScrollScore >= 10 ? 'medium' : 'low';
return {
type: 'scrollDown',
selector: null,
confidence: confidence
};
}
if (loadMoreButton && loadMoreScore >= 15) {
const selector = this.generateSelectorsForElement(loadMoreButton, doc, selectorGenerator);
return {
type: 'clickLoadMore',
selector: selector,
confidence: 'high'
};
}
if (nextButton && nextButtonScore >= 15 && !hasMediumInfiniteScrollSignals) {
const selector = this.generateSelectorsForElement(nextButton, doc, selectorGenerator);
return {
type: 'clickNext',
selector: selector,
confidence: 'high'
};
}
if (hasMediumInfiniteScrollSignals) {
const confidence = infiniteScrollScore >= 7 ? 'medium' : 'low';
return {
type: 'scrollDown',
selector: null,
confidence: confidence
};
}
if (loadMoreButton && loadMoreScore >= 8) {
const selector = this.generateSelectorsForElement(loadMoreButton, doc, selectorGenerator);
const confidence = loadMoreScore >= 10 ? 'medium' : 'low';
return {
type: 'clickLoadMore',
selector: selector,
confidence: confidence
};
}
if (nextButton && nextButtonScore >= 8) {
const selector = this.generateSelectorsForElement(nextButton, doc, selectorGenerator);
const confidence = nextButtonScore >= 10 ? 'medium' : 'low';
return {
type: 'clickNext',
selector: selector,
confidence: confidence
};
}
if (prevButton && prevButtonScore >= 8) {
const confidence = prevButtonScore >= 15 ? 'high' : prevButtonScore >= 10 ? 'medium' : 'low';
return {
type: 'scrollUp',
selector: null,
confidence: confidence
};
}
return {
type: '',
selector: null,
confidence: 'low',
debug: {
clickableElementsCount: clickableElements.length,
nextCandidatesCount: nextButtonCandidates.length,
topNextCandidates: nextButtonCandidates.slice(0, 3).map(c => ({
score: c.score,
text: c.text,
tag: c.tag,
reasons: c.reasons
})),
finalScores: {
loadMore: loadMoreScore,
next: nextButtonScore,
prev: prevButtonScore,
infiniteScroll: infiniteScrollScore
}
}
};
} catch (error: any) {
console.error('Error:', error);
return {
type: '',
selector: null,
confidence: 'low',
debug: 'Exception: ' + error.message
};
}
}
/**
* Evaluate selector (supports both CSS and XPath)
*/
private evaluateSelector(selector: string, doc: Document): HTMLElement[] {
try {
const isXPath = selector.startsWith('//') || selector.startsWith('(//');
if (isXPath) {
const result = doc.evaluate(
selector,
doc,
null,
XPathResult.ORDERED_NODE_SNAPSHOT_TYPE,
null
);
const elements: HTMLElement[] = [];
for (let i = 0; i < result.snapshotLength; i++) {
const node = result.snapshotItem(i);
if (node && node.nodeType === Node.ELEMENT_NODE) {
elements.push(node as HTMLElement);
}
}
return elements;
} else {
return Array.from(doc.querySelectorAll(selector));
}
} catch (err) {
console.error('Selector evaluation failed:', selector, err);
return [];
}
}
/**
* Get all clickable elements in document
*/
private getClickableElements(doc: Document): HTMLElement[] {
const clickables: HTMLElement[] = [];
const selectors = ['button', 'a', '[role="button"]', '[onclick]', '.btn', '.button'];
for (const selector of selectors) {
const elements = doc.querySelectorAll(selector);
clickables.push(...Array.from(elements) as HTMLElement[]);
}
return Array.from(new Set(clickables));
}
/**
* Check if element is visible
*/
private isVisible(element: HTMLElement): boolean {
try {
const style = window.getComputedStyle(element);
return style.display !== 'none' &&
style.visibility !== 'hidden' &&
style.opacity !== '0' &&
element.offsetWidth > 0 &&
element.offsetHeight > 0;
} catch {
return false;
}
}
/**
* Check if text matches any pattern
*/
private matchesAnyPattern(text: string, patterns: RegExp[]): boolean {
return patterns.some(pattern => pattern.test(text));
}
/**
* Check if element is near the list container
*/
private isNearList(element: HTMLElement, listContainer: HTMLElement): boolean {
try {
const listRect = listContainer.getBoundingClientRect();
const elementRect = element.getBoundingClientRect();
if (elementRect.top >= listRect.bottom && elementRect.top <= listRect.bottom + 500) {
return true;
}
if (elementRect.bottom <= listRect.top && elementRect.bottom >= listRect.top - 500) {
return true;
}
const verticalOverlap = !(elementRect.bottom < listRect.top || elementRect.top > listRect.bottom);
if (verticalOverlap) {
const horizontalDistance = Math.min(
Math.abs(elementRect.left - listRect.right),
Math.abs(elementRect.right - listRect.left)
);
if (horizontalDistance < 200) {
return true;
}
}
return false;
} catch (error) {
return false;
}
}
/**
* Detect infinite scroll indicators
*/
private detectInfiniteScrollIndicators(doc: Document, listElements: HTMLElement[], listContainer: HTMLElement): number {
try {
let score = 0;
const indicators: string[] = [];
const initialItemCount = listElements.length;
const initialHeight = doc.documentElement.scrollHeight;
const viewportHeight = window.innerHeight;
if (initialHeight <= viewportHeight) {
return 0;
}
const loadingIndicators = [
'[class*="loading"]',
'[class*="spinner"]',
'[class*="skeleton"]',
'[aria-busy="true"]',
'[data-loading="true"]',
'.loader',
'.load-more-spinner',
'[class*="load"]',
'[id*="loading"]',
'[id*="spinner"]'
];
for (const selector of loadingIndicators) {
if (doc.querySelector(selector)) {
score += 3;
indicators.push(`Loading indicator: ${selector} (+3)`);
break;
}
}
const sentinelPatterns = [
'[class*="sentinel"]',
'[class*="trigger"]',
'[data-infinite]',
'[data-scroll-trigger]',
'#infinite-scroll-trigger',
'[class*="infinite"]',
'[id*="infinite"]'
];
for (const selector of sentinelPatterns) {
if (doc.querySelector(selector)) {
score += 4;
indicators.push(`Sentinel element: ${selector} (+4)`);
break;
}
}
const scrollToTopPatterns = [
'[class*="scroll"][class*="top"]',
'[aria-label*="scroll to top"]',
'[title*="back to top"]',
'.back-to-top',
'#back-to-top',
'[class*="scrolltop"]',
'[class*="backtotop"]',
'button[class*="top"]',
'a[href="#top"]',
'a[href="#"]'
];
for (const selector of scrollToTopPatterns) {
const element = doc.querySelector(selector);
if (element && this.isVisible(element as HTMLElement)) {
score += 2;
indicators.push(`Scroll-to-top button (+2)`);
break;
}
}
if (initialHeight > viewportHeight * 3) {
score += 3;
indicators.push(`Very tall page (${(initialHeight / viewportHeight).toFixed(1)}x viewport) (+3)`);
} else if (initialHeight > viewportHeight * 2) {
score += 2;
indicators.push(`Tall page (${(initialHeight / viewportHeight).toFixed(1)}x viewport) (+2)`);
}
if (initialItemCount >= 20) {
score += 2;
indicators.push(`Many list items (${initialItemCount}) (+2)`);
} else if (initialItemCount >= 10) {
score += 1;
indicators.push(`Good number of list items (${initialItemCount}) (+1)`);
}
const infiniteScrollLibraries = [
'.infinite-scroll',
'[data-infinite-scroll]',
'[data-flickity]',
'[data-slick]',
'.masonry',
'[data-masonry]',
'[class*="infinite-scroll"]',
'[class*="lazy-load"]',
'[data-lazy]'
];
for (const selector of infiniteScrollLibraries) {
if (doc.querySelector(selector)) {
score += 4;
indicators.push(`Infinite scroll library: ${selector} (+4)`);
break;
}
}
const lastListItem = listElements[listElements.length - 1];
if (lastListItem) {
const lastItemRect = lastListItem.getBoundingClientRect();
const lastItemY = lastItemRect.bottom + window.scrollY;
const viewportBottom = window.scrollY + viewportHeight;
if (lastItemY > viewportBottom + viewportHeight) {
score += 3;
indicators.push(`List extends far below viewport (+3)`);
} else if (lastItemY > viewportBottom) {
score += 2;
indicators.push(`List extends below viewport (+2)`);
}
}
const hiddenLoadMore = doc.querySelectorAll('[class*="load"], [class*="more"]');
for (let i = 0; i < hiddenLoadMore.length; i++) {
const el = hiddenLoadMore[i] as HTMLElement;
const style = window.getComputedStyle(el);
if (style.opacity === '0' || style.visibility === 'hidden') {
score += 2;
indicators.push(`Hidden load trigger element (+2)`);
break;
}
}
const paginationControls = doc.querySelectorAll('[class*="pagination"], [class*="pager"]');
if (paginationControls.length === 0) {
score += 1;
indicators.push(`No pagination controls found (+1)`);
}
return score;
} catch (error) {
console.error('Infinite scroll detection error:', error);
return 0;
}
}
/**
* Generate selectors for element using ClientSelectorGenerator approach
* Returns the primary selector chain
*/
private generateSelectorsForElement(
element: HTMLElement,
doc: Document,
selectorGenerator: ClientSelectorGenerator
): string | null {
try {
const primary = selectorGenerator.generateSelectorsFromElement(element, doc);
if (!primary) {
console.warn('Could not generate selectors for element');
return null;
}
const selectorChain = [
primary && 'iframeSelector' in primary && primary.iframeSelector?.full
? primary.iframeSelector.full
: null,
primary && 'shadowSelector' in primary && primary.shadowSelector?.full
? primary.shadowSelector.full
: null,
primary && 'testIdSelector' in primary ? primary.testIdSelector : null,
primary && 'id' in primary ? primary.id : null,
primary && 'hrefSelector' in primary ? primary.hrefSelector : null,
primary && 'relSelector' in primary ? primary.relSelector : null,
primary && 'accessibilitySelector' in primary ? primary.accessibilitySelector : null,
primary && 'attrSelector' in primary ? primary.attrSelector : null,
primary && 'generalSelector' in primary ? primary.generalSelector : null,
]
.filter(selector => selector !== null && selector !== undefined && selector !== '')
.join(',');
return selectorChain || null;
} catch (error) {
console.error('Error generating selectors:', error);
return null;
}
}
}
export const clientPaginationDetector = new ClientPaginationDetector();

View File

@@ -2476,6 +2476,46 @@ class ClientSelectorGenerator {
return null;
};
/**
* Generate selectors directly from an element
* Scrolls the element into view within the iframe only (instant scroll)
*/
public generateSelectorsFromElement = (
element: HTMLElement,
iframeDoc: Document
): any | null => {
try {
try {
const rect = element.getBoundingClientRect();
const iframeWindow = iframeDoc.defaultView;
if (iframeWindow) {
const targetY = rect.top + iframeWindow.scrollY - (iframeWindow.innerHeight / 2) + (rect.height / 2);
iframeWindow.scrollTo({
top: targetY,
behavior: 'auto'
});
}
} catch (scrollError) {
console.warn('[ClientSelectorGenerator] Could not scroll element into view:', scrollError);
}
const rect = element.getBoundingClientRect();
const coordinates = {
x: rect.left + rect.width / 2,
y: rect.top + rect.height / 2
};
return this.getSelectors(iframeDoc, coordinates);
} catch (e) {
const { message, stack } = e as Error;
console.warn(`Error generating selectors from element: ${message}`);
console.warn(`Stack: ${stack}`);
return null;
}
};
public getChildSelectors = (
iframeDoc: Document,
parentSelector: string
@@ -4297,4 +4337,5 @@ class ClientSelectorGenerator {
}
}
export { ClientSelectorGenerator };
export const clientSelectorGenerator = new ClientSelectorGenerator();