feat: handle chained selectors for click next pagination
This commit is contained in:
@@ -546,6 +546,9 @@ export default class Interpreter extends EventEmitter {
|
|||||||
// track unique items per page to avoid re-scraping
|
// track unique items per page to avoid re-scraping
|
||||||
let scrapedItems: Set<string> = new Set<string>();
|
let scrapedItems: Set<string> = new Set<string>();
|
||||||
|
|
||||||
|
let availableSelectors = config.pagination.selector.split(',');
|
||||||
|
console.log("Initial selectors:", availableSelectors);
|
||||||
|
|
||||||
while (true) {
|
while (true) {
|
||||||
switch (config.pagination.type) {
|
switch (config.pagination.type) {
|
||||||
case 'scrollDown':
|
case 'scrollDown':
|
||||||
@@ -575,6 +578,7 @@ export default class Interpreter extends EventEmitter {
|
|||||||
previousHeight = currentTopHeight;
|
previousHeight = currentTopHeight;
|
||||||
break;
|
break;
|
||||||
case 'clickNext':
|
case 'clickNext':
|
||||||
|
console.log("PAGE URL:", page.url());
|
||||||
const pageResults = await page.evaluate((cfg) => window.scrapeList(cfg), config);
|
const pageResults = await page.evaluate((cfg) => window.scrapeList(cfg), config);
|
||||||
|
|
||||||
// console.log("Page results:", pageResults);
|
// console.log("Page results:", pageResults);
|
||||||
@@ -593,16 +597,46 @@ export default class Interpreter extends EventEmitter {
|
|||||||
return allResults.slice(0, config.limit);
|
return allResults.slice(0, config.limit);
|
||||||
}
|
}
|
||||||
|
|
||||||
const nextButton = await page.$(config.pagination.selector);
|
let checkButton = null;
|
||||||
|
let workingSelector = null;
|
||||||
|
|
||||||
|
for (let i = 0; i < availableSelectors.length; i++) {
|
||||||
|
const selector = availableSelectors[i];
|
||||||
|
try {
|
||||||
|
// Wait for selector with a short timeout
|
||||||
|
checkButton = await page.waitForSelector(selector, { state: 'attached', timeout: 10000 });
|
||||||
|
if (checkButton) {
|
||||||
|
workingSelector = selector;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} catch (error) {
|
||||||
|
console.log(`Selector failed: ${selector}`);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
const nextButton = await page.$(workingSelector);
|
||||||
if (!nextButton) {
|
if (!nextButton) {
|
||||||
return allResults; // No more pages to scrape
|
return allResults; // No more pages to scrape
|
||||||
}
|
}
|
||||||
await Promise.all([
|
|
||||||
nextButton.dispatchEvent('click'),
|
|
||||||
page.waitForNavigation({ waitUntil: 'networkidle' })
|
|
||||||
]);
|
|
||||||
|
|
||||||
await page.waitForTimeout(1000);
|
const selectorIndex = availableSelectors.indexOf(workingSelector!);
|
||||||
|
availableSelectors = availableSelectors.slice(selectorIndex);
|
||||||
|
console.log("Updated selectors:", availableSelectors);
|
||||||
|
|
||||||
|
try {
|
||||||
|
await Promise.all([
|
||||||
|
nextButton.click(),
|
||||||
|
page.waitForNavigation({ waitUntil: 'networkidle' })
|
||||||
|
]);
|
||||||
|
|
||||||
|
await page.waitForTimeout(1000);
|
||||||
|
} catch (navigationError) {
|
||||||
|
console.log(`Navigation failed with selector ${workingSelector}:`, navigationError);
|
||||||
|
availableSelectors.shift();
|
||||||
|
console.log("Updated selectors:", availableSelectors);
|
||||||
|
continue
|
||||||
|
}
|
||||||
break;
|
break;
|
||||||
case 'clickLoadMore':
|
case 'clickLoadMore':
|
||||||
while (true) {
|
while (true) {
|
||||||
|
|||||||
Reference in New Issue
Block a user