feat: revert load more pagination logic

This commit is contained in:
Rohit
2025-02-03 23:34:28 +05:30
parent c37fe42a2f
commit 56bb09ef5a

View File

@@ -759,55 +759,76 @@ export default class Interpreter extends EventEmitter {
break; break;
} }
case 'clickLoadMore': { case 'clickLoadMore':
while (true) { while (true) {
const { button, workingSelector } = await findWorkingButton(availableSelectors); let checkButton = null;
if (!button || !workingSelector) { let workingSelector = null;
// Final retry for load more when no selectors work
const success = await retryOperation(async () => {
await scrapeCurrentPage();
return allResults.length > 0;
});
if (!success) return allResults; for (const selector of availableSelectors) {
try {
checkButton = await page.waitForSelector(selector, {
state: 'attached',
timeout: 30000
});
if (checkButton) {
workingSelector = selector;
debugLog('Found working selector:', selector);
break; break;
} }
} catch (error) {
debugLog(`Load More selector failed: ${selector}`);
}
}
availableSelectors = availableSelectors.slice( if (!workingSelector) {
availableSelectors.indexOf(workingSelector) debugLog('No working Load More selector found');
); const finalResults = await page.evaluate((cfg) => window.scrapeList(cfg), config);
allResults = allResults.concat(finalResults);
return allResults;
}
const loadMoreButton = await page.$(workingSelector);
if (!loadMoreButton) {
debugLog('Load More button not found');
const finalResults = await page.evaluate((cfg) => window.scrapeList(cfg), config);
allResults = allResults.concat(finalResults);
return allResults;
}
const selectorIndex = availableSelectors.indexOf(workingSelector);
availableSelectors = availableSelectors.slice(selectorIndex);
const loadMoreSuccess = await retryOperation(async () => {
try { try {
await button.click().catch(() => button.dispatchEvent('click')); try {
await page.waitForTimeout(1000); await loadMoreButton.click();
} catch (error) {
await loadMoreButton.dispatchEvent('click');
}
} catch (error) {
const finalResults = await page.evaluate((cfg) => window.scrapeList(cfg), config);
allResults = allResults.concat(finalResults);
return allResults;
}
await page.evaluate(() => await page.waitForTimeout(2000);
window.scrollTo(0, document.body.scrollHeight) await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
); await page.waitForTimeout(2000);
await page.waitForTimeout(1000);
const currentHeight = await page.evaluate(() =>
document.body.scrollHeight
);
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
if (currentHeight === previousHeight) { if (currentHeight === previousHeight) {
await scrapeCurrentPage(); debugLog('No more items loaded after Load More');
return false; const finalResults = await page.evaluate((cfg) => window.scrapeList(cfg), config);
allResults = allResults.concat(finalResults);
return allResults;
} }
previousHeight = currentHeight; previousHeight = currentHeight;
return true; if (config.limit && allResults.length >= config.limit) {
} catch (error) { allResults = allResults.slice(0, config.limit);
await scrapeCurrentPage();
return false;
}
});
if (!loadMoreSuccess || checkLimit()) return allResults;
}
break; break;
} }
}
break;
default: default:
await scrapeCurrentPage(); await scrapeCurrentPage();