diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 09076a4d..b7bc3e10 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -372,7 +372,8 @@ export default class Interpreter extends EventEmitter { private async handlePagination(page: Page, config: { listSelector: string, fields: any, limit?: number, pagination: any }) { let allResults: Record[] = []; let previousHeight = 0; - let scrapedItems: Set = new Set(); // Track unique items to avoid re-scraping + // track unique items to avoid re-scraping + let scrapedItems: Set = new Set(); while (true) { switch (config.pagination.type) { @@ -393,34 +394,26 @@ export default class Interpreter extends EventEmitter { break; case 'clickNext': const pageResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); - - // Filter out items that have already been scraped + // filter out items that have already been scraped const newResults = pageResults.filter(item => { const uniqueKey = JSON.stringify(item); if (scrapedItems.has(uniqueKey)) return false; scrapedItems.add(uniqueKey); return true; }); - allResults = allResults.concat(newResults); - - // If the limit is reached, return the required number of items + // if the limit is reached, return the required number of items if (config.limit && allResults.length >= config.limit) { return allResults.slice(0, config.limit); } - - // Check if there's a next page button const nextButton = await page.$(config.pagination.selector); if (!nextButton) { - return allResults; // No more pages to navigate + return allResults; } - - // Click the next button and wait for the navigation to complete await Promise.all([ nextButton.click(), page.waitForNavigation({ waitUntil: 'networkidle' }) ]); - break; case 'clickLoadMore': const loadMoreButton = await page.$(config.pagination.selector);