chore: add comments

This commit is contained in:
karishmas6
2024-08-19 01:39:55 +05:30
parent b6bcc1d516
commit bdf68fb66d

View File

@@ -372,7 +372,8 @@ export default class Interpreter extends EventEmitter {
private async handlePagination(page: Page, config: { listSelector: string, fields: any, limit?: number, pagination: any }) { private async handlePagination(page: Page, config: { listSelector: string, fields: any, limit?: number, pagination: any }) {
let allResults: Record<string, any>[] = []; let allResults: Record<string, any>[] = [];
let previousHeight = 0; let previousHeight = 0;
let scrapedItems: Set<string> = new Set(); // Track unique items to avoid re-scraping // track unique items to avoid re-scraping
let scrapedItems: Set<string> = new Set();
while (true) { while (true) {
switch (config.pagination.type) { switch (config.pagination.type) {
@@ -393,34 +394,26 @@ export default class Interpreter extends EventEmitter {
break; break;
case 'clickNext': case 'clickNext':
const pageResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); const pageResults = await page.evaluate((cfg) => window.scrapeList(cfg), config);
// filter out items that have already been scraped
// Filter out items that have already been scraped
const newResults = pageResults.filter(item => { const newResults = pageResults.filter(item => {
const uniqueKey = JSON.stringify(item); const uniqueKey = JSON.stringify(item);
if (scrapedItems.has(uniqueKey)) return false; if (scrapedItems.has(uniqueKey)) return false;
scrapedItems.add(uniqueKey); scrapedItems.add(uniqueKey);
return true; return true;
}); });
allResults = allResults.concat(newResults); allResults = allResults.concat(newResults);
// if the limit is reached, return the required number of items
// If the limit is reached, return the required number of items
if (config.limit && allResults.length >= config.limit) { if (config.limit && allResults.length >= config.limit) {
return allResults.slice(0, config.limit); return allResults.slice(0, config.limit);
} }
// Check if there's a next page button
const nextButton = await page.$(config.pagination.selector); const nextButton = await page.$(config.pagination.selector);
if (!nextButton) { if (!nextButton) {
return allResults; // No more pages to navigate return allResults;
} }
// Click the next button and wait for the navigation to complete
await Promise.all([ await Promise.all([
nextButton.click(), nextButton.click(),
page.waitForNavigation({ waitUntil: 'networkidle' }) page.waitForNavigation({ waitUntil: 'networkidle' })
]); ]);
break; break;
case 'clickLoadMore': case 'clickLoadMore':
const loadMoreButton = await page.$(config.pagination.selector); const loadMoreButton = await page.$(config.pagination.selector);