feat: revert load more pagination logic

This commit is contained in:
Rohit
2025-02-03 23:34:28 +05:30
parent c37fe42a2f
commit 56bb09ef5a

View File

@@ -759,55 +759,76 @@ export default class Interpreter extends EventEmitter {
break; break;
} }
case 'clickLoadMore': { case 'clickLoadMore':
while (true) { while (true) {
const { button, workingSelector } = await findWorkingButton(availableSelectors); let checkButton = null;
if (!button || !workingSelector) { let workingSelector = null;
// Final retry for load more when no selectors work
const success = await retryOperation(async () => { for (const selector of availableSelectors) {
await scrapeCurrentPage(); try {
return allResults.length > 0; checkButton = await page.waitForSelector(selector, {
}); state: 'attached',
timeout: 30000
if (!success) return allResults; });
break; if (checkButton) {
workingSelector = selector;
debugLog('Found working selector:', selector);
break;
}
} catch (error) {
debugLog(`Load More selector failed: ${selector}`);
}
} }
availableSelectors = availableSelectors.slice( if (!workingSelector) {
availableSelectors.indexOf(workingSelector) debugLog('No working Load More selector found');
); const finalResults = await page.evaluate((cfg) => window.scrapeList(cfg), config);
allResults = allResults.concat(finalResults);
return allResults;
}
const loadMoreSuccess = await retryOperation(async () => { const loadMoreButton = await page.$(workingSelector);
if (!loadMoreButton) {
debugLog('Load More button not found');
const finalResults = await page.evaluate((cfg) => window.scrapeList(cfg), config);
allResults = allResults.concat(finalResults);
return allResults;
}
const selectorIndex = availableSelectors.indexOf(workingSelector);
availableSelectors = availableSelectors.slice(selectorIndex);
try {
try { try {
await button.click().catch(() => button.dispatchEvent('click')); await loadMoreButton.click();
await page.waitForTimeout(1000);
await page.evaluate(() =>
window.scrollTo(0, document.body.scrollHeight)
);
await page.waitForTimeout(1000);
const currentHeight = await page.evaluate(() =>
document.body.scrollHeight
);
if (currentHeight === previousHeight) {
await scrapeCurrentPage();
return false;
}
previousHeight = currentHeight;
return true;
} catch (error) { } catch (error) {
await scrapeCurrentPage(); await loadMoreButton.dispatchEvent('click');
return false;
} }
}); } catch (error) {
const finalResults = await page.evaluate((cfg) => window.scrapeList(cfg), config);
allResults = allResults.concat(finalResults);
return allResults;
}
if (!loadMoreSuccess || checkLimit()) return allResults; await page.waitForTimeout(2000);
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
await page.waitForTimeout(2000);
const currentHeight = await page.evaluate(() => document.body.scrollHeight);
if (currentHeight === previousHeight) {
debugLog('No more items loaded after Load More');
const finalResults = await page.evaluate((cfg) => window.scrapeList(cfg), config);
allResults = allResults.concat(finalResults);
return allResults;
}
previousHeight = currentHeight;
if (config.limit && allResults.length >= config.limit) {
allResults = allResults.slice(0, config.limit);
break;
}
} }
break; break;
}
default: default:
await scrapeCurrentPage(); await scrapeCurrentPage();