feat: better load more pagination handling

This commit is contained in:
Rohit
2025-02-03 23:50:29 +05:30
parent dc44f6a9cd
commit 7e2d1ea4cd

View File

@@ -763,55 +763,65 @@ export default class Interpreter extends EventEmitter {
case 'clickLoadMore': { case 'clickLoadMore': {
while (true) { while (true) {
let checkButton = null; // Find working button with retry mechanism, consistent with clickNext
let workingSelector = null; const { button: loadMoreButton, workingSelector } = await findWorkingButton(availableSelectors);
for (const selector of availableSelectors) { if (!workingSelector || !loadMoreButton) {
debugLog('No working Load More selector found after retries');
const finalResults = await page.evaluate((cfg) => window.scrapeList(cfg), config);
allResults = allResults.concat(finalResults);
return allResults;
}
// Update available selectors to start from the working one
availableSelectors = availableSelectors.slice(
availableSelectors.indexOf(workingSelector)
);
// Implement retry mechanism for clicking the button
let retryCount = 0;
let clickSuccess = false;
while (retryCount < MAX_RETRIES && !clickSuccess) {
try { try {
checkButton = await page.waitForSelector(selector, { try {
state: 'attached', await loadMoreButton.click();
timeout: 30000 clickSuccess = true;
}); } catch (error) {
if (checkButton) { debugLog(`Regular click failed on attempt ${retryCount + 1}. Trying DispatchEvent`);
workingSelector = selector;
debugLog('Found working selector:', selector); // If regular click fails, try dispatchEvent
break; try {
await loadMoreButton.dispatchEvent('click');
clickSuccess = true;
} catch (dispatchError) {
debugLog(`DispatchEvent failed on attempt ${retryCount + 1}.`);
throw dispatchError; // Propagate error to trigger retry
}
}
if (clickSuccess) {
await page.waitForTimeout(1000);
} }
} catch (error) { } catch (error) {
debugLog(`Load More selector failed: ${selector}`); debugLog(`Click attempt ${retryCount + 1} failed completely.`);
retryCount++;
if (retryCount < MAX_RETRIES) {
debugLog(`Retrying click - attempt ${retryCount + 1} of ${MAX_RETRIES}`);
await page.waitForTimeout(RETRY_DELAY);
}
} }
} }
if (!workingSelector) { if (!clickSuccess) {
debugLog('No working Load More selector found'); debugLog(`Load More clicking failed after ${MAX_RETRIES} attempts`);
const finalResults = await page.evaluate((cfg) => window.scrapeList(cfg), config);
allResults = allResults.concat(finalResults);
return allResults;
}
const loadMoreButton = await page.$(workingSelector);
if (!loadMoreButton) {
debugLog('Load More button not found');
const finalResults = await page.evaluate((cfg) => window.scrapeList(cfg), config);
allResults = allResults.concat(finalResults);
return allResults;
}
const selectorIndex = availableSelectors.indexOf(workingSelector);
availableSelectors = availableSelectors.slice(selectorIndex);
try {
try {
await loadMoreButton.click();
} catch (error) {
await loadMoreButton.dispatchEvent('click');
}
} catch (error) {
const finalResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); const finalResults = await page.evaluate((cfg) => window.scrapeList(cfg), config);
allResults = allResults.concat(finalResults); allResults = allResults.concat(finalResults);
return allResults; return allResults;
} }
// Wait for content to load and check scroll height
await page.waitForTimeout(2000); await page.waitForTimeout(2000);
await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight)); await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight));
await page.waitForTimeout(2000); await page.waitForTimeout(2000);