feat: check url visit and navigate
This commit is contained in:
@@ -545,6 +545,7 @@ export default class Interpreter extends EventEmitter {
|
|||||||
let previousHeight = 0;
|
let previousHeight = 0;
|
||||||
// track unique items per page to avoid re-scraping
|
// track unique items per page to avoid re-scraping
|
||||||
let scrapedItems: Set<string> = new Set<string>();
|
let scrapedItems: Set<string> = new Set<string>();
|
||||||
|
let visitedUrls: string[] = [];
|
||||||
|
|
||||||
let availableSelectors = config.pagination.selector.split(',');
|
let availableSelectors = config.pagination.selector.split(',');
|
||||||
|
|
||||||
@@ -633,6 +634,7 @@ export default class Interpreter extends EventEmitter {
|
|||||||
// ]);
|
// ]);
|
||||||
|
|
||||||
const previousUrl = page.url();
|
const previousUrl = page.url();
|
||||||
|
visitedUrls.push(previousUrl);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// Try both click methods simultaneously
|
// Try both click methods simultaneously
|
||||||
@@ -651,10 +653,31 @@ export default class Interpreter extends EventEmitter {
|
|||||||
const currentUrl = page.url();
|
const currentUrl = page.url();
|
||||||
if (currentUrl === previousUrl) {
|
if (currentUrl === previousUrl) {
|
||||||
console.log("Previous URL same as current URL. Navigation failed.");
|
console.log("Previous URL same as current URL. Navigation failed.");
|
||||||
continue;
|
|
||||||
}
|
}
|
||||||
// Otherwise, log and continue
|
}
|
||||||
console.log('Navigation succeeded despite click error');
|
|
||||||
|
const currentUrl = page.url();
|
||||||
|
if (visitedUrls.includes(currentUrl)) {
|
||||||
|
console.log(`Detected navigation to a previously visited URL: ${currentUrl}`);
|
||||||
|
|
||||||
|
// Extract the current page number from the URL
|
||||||
|
const match = currentUrl.match(/\d+/);
|
||||||
|
if (match) {
|
||||||
|
const currentNumber = match[0];
|
||||||
|
// Use visitedUrls.length + 1 as the next page number
|
||||||
|
const nextNumber = visitedUrls.length + 1;
|
||||||
|
|
||||||
|
// Create new URL by replacing the current number with the next number
|
||||||
|
const nextUrl = currentUrl.replace(currentNumber, nextNumber.toString());
|
||||||
|
|
||||||
|
console.log(`Navigating to constructed URL: ${nextUrl}`);
|
||||||
|
|
||||||
|
// Navigate to the next page
|
||||||
|
await Promise.all([
|
||||||
|
page.waitForNavigation({ waitUntil: 'networkidle' }),
|
||||||
|
page.goto(nextUrl)
|
||||||
|
]);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Give the page a moment to stabilize after navigation
|
// Give the page a moment to stabilize after navigation
|
||||||
|
|||||||
Reference in New Issue
Block a user