feat: better load more pagination handling

This commit is contained in:
Rohit
2025-02-03 23:50:29 +05:30
parent dc44f6a9cd
commit 7e2d1ea4cd

View File

@@ -763,55 +763,65 @@ export default class Interpreter extends EventEmitter {
case 'clickLoadMore': { case 'clickLoadMore': {
while (true) { while (true) {
let checkButton = null; // Find working button with retry mechanism, consistent with clickNext
let workingSelector = null; const { button: loadMoreButton, workingSelector } = await findWorkingButton(availableSelectors);
for (const selector of availableSelectors) { if (!workingSelector || !loadMoreButton) {
try { debugLog('No working Load More selector found after retries');
checkButton = await page.waitForSelector(selector, {
state: 'attached',
timeout: 30000
});
if (checkButton) {
workingSelector = selector;
debugLog('Found working selector:', selector);
break;
}
} catch (error) {
debugLog(`Load More selector failed: ${selector}`);
}
}
if (!workingSelector) {
debugLog('No working Load More selector found');
const finalResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); const finalResults = await page.evaluate((cfg) => window.scrapeList(cfg), config);
allResults = allResults.concat(finalResults); allResults = allResults.concat(finalResults);
return allResults; return allResults;
} }
const loadMoreButton = await page.$(workingSelector); // Update available selectors to start from the working one
if (!loadMoreButton) { availableSelectors = availableSelectors.slice(
debugLog('Load More button not found'); availableSelectors.indexOf(workingSelector)
const finalResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); );
allResults = allResults.concat(finalResults);
return allResults;
}
const selectorIndex = availableSelectors.indexOf(workingSelector); // Implement retry mechanism for clicking the button
availableSelectors = availableSelectors.slice(selectorIndex); let retryCount = 0;
let clickSuccess = false;
while (retryCount < MAX_RETRIES && !clickSuccess) {
try { try {
try { try {
await loadMoreButton.click(); await loadMoreButton.click();
clickSuccess = true;
} catch (error) { } catch (error) {
debugLog(`Regular click failed on attempt ${retryCount + 1}. Trying DispatchEvent`);
// If regular click fails, try dispatchEvent
try {
await loadMoreButton.dispatchEvent('click'); await loadMoreButton.dispatchEvent('click');
clickSuccess = true;
} catch (dispatchError) {
debugLog(`DispatchEvent failed on attempt ${retryCount + 1}.`);
throw dispatchError; // Propagate error to trigger retry
}
}
if (clickSuccess) {
await page.waitForTimeout(1000);
} }
} catch (error) { } catch (error) {
debugLog(`Click attempt ${retryCount + 1} failed completely.`);
retryCount++;
if (retryCount < MAX_RETRIES) {
debugLog(`Retrying click - attempt ${retryCount + 1} of ${MAX_RETRIES}`);
await page.waitForTimeout(RETRY_DELAY);
}
}
}
if (!clickSuccess) {
debugLog(`Load More clicking failed after ${MAX_RETRIES} attempts`);
const finalResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); const finalResults = await page.evaluate((cfg) => window.scrapeList(cfg), config);
allResults = allResults.concat(finalResults); allResults = allResults.concat(finalResults);
return allResults; return allResults;
} }
// Wait for content to load and check scroll height
await page.waitForTimeout(2000); await page.waitForTimeout(2000);
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight)); await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
await page.waitForTimeout(2000); await page.waitForTimeout(2000);