feat: handle chained selectors for click next pagination

This commit is contained in:
RohitR311
2025-01-09 17:01:08 +05:30
parent 7136beeeba
commit 2ad0b2ac8c

View File

@@ -546,6 +546,9 @@ export default class Interpreter extends EventEmitter {
// track unique items per page to avoid re-scraping // track unique items per page to avoid re-scraping
let scrapedItems: Set<string> = new Set<string>(); let scrapedItems: Set<string> = new Set<string>();
let availableSelectors = config.pagination.selector.split(',');
console.log("Initial selectors:", availableSelectors);
while (true) { while (true) {
switch (config.pagination.type) { switch (config.pagination.type) {
case 'scrollDown': case 'scrollDown':
@@ -575,6 +578,7 @@ export default class Interpreter extends EventEmitter {
previousHeight = currentTopHeight; previousHeight = currentTopHeight;
break; break;
case 'clickNext': case 'clickNext':
console.log("PAGE URL:", page.url());
const pageResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); const pageResults = await page.evaluate((cfg) => window.scrapeList(cfg), config);
// console.log("Page results:", pageResults); // console.log("Page results:", pageResults);
@@ -593,16 +597,46 @@ export default class Interpreter extends EventEmitter {
return allResults.slice(0, config.limit); return allResults.slice(0, config.limit);
} }
const nextButton = await page.$(config.pagination.selector); let checkButton = null;
let workingSelector = null;
for (let i = 0; i < availableSelectors.length; i++) {
const selector = availableSelectors[i];
try {
// Wait for selector with a short timeout
checkButton = await page.waitForSelector(selector, { state: 'attached', timeout: 10000 });
if (checkButton) {
workingSelector = selector;
break;
}
} catch (error) {
console.log(`Selector failed: ${selector}`);
continue;
}
}
const nextButton = await page.$(workingSelector);
if (!nextButton) { if (!nextButton) {
return allResults; // No more pages to scrape return allResults; // No more pages to scrape
} }
await Promise.all([
nextButton.dispatchEvent('click'),
page.waitForNavigation({ waitUntil: 'networkidle' })
]);
await page.waitForTimeout(1000); const selectorIndex = availableSelectors.indexOf(workingSelector!);
availableSelectors = availableSelectors.slice(selectorIndex);
console.log("Updated selectors:", availableSelectors);
try {
await Promise.all([
nextButton.click(),
page.waitForNavigation({ waitUntil: 'networkidle' })
]);
await page.waitForTimeout(1000);
} catch (navigationError) {
console.log(`Navigation failed with selector ${workingSelector}:`, navigationError);
availableSelectors.shift();
console.log("Updated selectors:", availableSelectors);
continue
}
break; break;
case 'clickLoadMore': case 'clickLoadMore':
while (true) { while (true) {