feat: add pagination support for SPA and Ajax pagination

This commit is contained in:
Rohit
2025-03-17 16:03:14 +05:30
parent a047733e37
commit b172636eb1

View File

@@ -704,13 +704,13 @@ export default class Interpreter extends EventEmitter {
await scrapeCurrentPage(); await scrapeCurrentPage();
if (checkLimit()) return allResults; if (checkLimit()) return allResults;
const { button, workingSelector, updatedSelectors } = await findWorkingButton(availableSelectors); const { button, workingSelector, updatedSelectors } = await findWorkingButton(availableSelectors);
availableSelectors = updatedSelectors; availableSelectors = updatedSelectors;
if (!button || !workingSelector) { if (!button || !workingSelector) {
// Final retry for navigation when no selectors work // Final retry for navigation when no selectors work
const success = await retryOperation(async () => { const success = await retryOperation(async () => {
try { try {
await page.evaluate(() => window.history.forward()); await page.evaluate(() => window.history.forward());
@@ -724,70 +724,102 @@ export default class Interpreter extends EventEmitter {
if (!success) return allResults; if (!success) return allResults;
break; break;
} }
let retryCount = 0; let retryCount = 0;
let navigationSuccess = false; let paginationSuccess = false;
while (retryCount < MAX_RETRIES && !navigationSuccess) { // Capture basic content signature before click
const captureContentSignature = async () => {
return await page.evaluate((selector) => {
const items = document.querySelectorAll(selector);
return {
url: window.location.href,
itemCount: items.length,
firstItems: Array.from(items).slice(0, 3).map(el => el.textContent || '').join('|')
};
}, config.listSelector);
};
const beforeSignature = await captureContentSignature();
debugLog(`Before click: ${beforeSignature.itemCount} items`);
while (retryCount < MAX_RETRIES && !paginationSuccess) {
try { try {
try { try {
await Promise.all([ await Promise.all([
page.waitForNavigation({ page.waitForNavigation({
waitUntil: 'networkidle', waitUntil: 'networkidle',
timeout: 15000 timeout: 15000
}).catch(e => {
throw e;
}), }),
button.click() button.click()
]); ]);
navigationSuccess = true; debugLog("Navigation successful after regular click");
} catch (error) { paginationSuccess = true;
debugLog(`Regular click failed on attempt ${retryCount + 1}. Trying DispatchEvent`); } catch (navError) {
debugLog("Regular click with navigation failed, trying dispatch event with navigation");
// If regular click fails, try dispatchEvent try {
if (page.url() === currentUrl) { await Promise.all([
page.waitForNavigation({
waitUntil: 'networkidle',
timeout: 15000
}).catch(e => {
throw e;
}),
button.dispatchEvent('click')
]);
debugLog("Navigation successful after dispatch event");
paginationSuccess = true;
} catch (dispatchNavError) {
try { try {
await Promise.all([ await button.click();
page.waitForNavigation({ await page.waitForTimeout(2000);
waitUntil: 'networkidle', } catch (clickError) {
timeout: 15000 await button.dispatchEvent('click');
}), await page.waitForTimeout(2000);
button.dispatchEvent('click')
]);
navigationSuccess = true;
} catch (dispatchError) {
debugLog(`DispatchEvent failed on attempt ${retryCount + 1}.`);
} }
} else {
navigationSuccess = true;
} }
} }
const newUrl = page.url(); await page.waitForLoadState('networkidle', { timeout: 5000 }).catch(() => {});
if (visitedUrls.has(newUrl)) {
debugLog(`Detected navigation to previously visited URL ${newUrl} on attempt ${retryCount + 1}`); if (!paginationSuccess) {
navigationSuccess = false; const newUrl = page.url();
} const afterSignature = await captureContentSignature();
if (navigationSuccess) { if (newUrl !== currentUrl) {
await page.waitForTimeout(1000); debugLog(`URL changed to ${newUrl}`);
visitedUrls.add(newUrl);
paginationSuccess = true;
}
else if (afterSignature.firstItems !== beforeSignature.firstItems) {
debugLog("Content changed without URL change");
paginationSuccess = true;
}
else if (afterSignature.itemCount !== beforeSignature.itemCount) {
debugLog(`Item count changed from ${beforeSignature.itemCount} to ${afterSignature.itemCount}`);
paginationSuccess = true;
}
} }
} catch (error) { } catch (error) {
debugLog(`Navigation attempt ${retryCount + 1} failed completely.`); debugLog(`Pagination attempt ${retryCount + 1} failed: ${error.message}`);
navigationSuccess = false;
} }
if (!navigationSuccess) { if (!paginationSuccess) {
retryCount++; retryCount++;
if (retryCount < MAX_RETRIES) { if (retryCount < MAX_RETRIES) {
debugLog(`Retrying navigation - attempt ${retryCount + 1} of ${MAX_RETRIES}`); debugLog(`Retrying pagination - attempt ${retryCount + 1} of ${MAX_RETRIES}`);
await page.waitForTimeout(RETRY_DELAY); await page.waitForTimeout(RETRY_DELAY);
} }
} }
} }
if (!navigationSuccess) { if (!paginationSuccess) {
debugLog(`Navigation failed after ${MAX_RETRIES} attempts`); debugLog(`Pagination failed after ${MAX_RETRIES} attempts`);
return allResults; return allResults;
} }
break; break;
} }