From 54f7deb3ede1b78754fe76669a16e9486fdf0e40 Mon Sep 17 00:00:00 2001 From: Rohit Date: Mon, 27 Jan 2025 15:00:19 +0530 Subject: [PATCH 1/7] feat: add retry mechanism, modularization --- maxun-core/src/interpret.ts | 420 ++++++++++++++++++++---------------- 1 file changed, 231 insertions(+), 189 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index d87e2477..251dda6d 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -1,5 +1,5 @@ /* eslint-disable no-await-in-loop, no-restricted-syntax */ -import { Page, PageScreenshotOptions } from 'playwright'; +import { ElementHandle, Page, PageScreenshotOptions } from 'playwright'; import { PlaywrightBlocker } from '@cliqz/adblocker-playwright'; import fetch from 'cross-fetch'; import path from 'path'; @@ -548,232 +548,274 @@ export default class Interpreter extends EventEmitter { } } - private async handlePagination(page: Page, config: { listSelector: string, fields: any, limit?: number, pagination: any }) { + private async handlePagination(page: Page, config: { + listSelector: string, + fields: any, + limit?: number, + pagination: any +}) { let allResults: Record[] = []; let previousHeight = 0; - // track unique items per page to avoid re-scraping let scrapedItems: Set = new Set(); - let visitedUrls: string[] = []; + let visitedUrls: Set = new Set(); + const MAX_RETRIES = 3; + const RETRY_DELAY = 1000; // 1 second delay between retries - // Debug logging helper const debugLog = (message: string, ...args: any[]) => { - console.log(`[Page ${visitedUrls.length + 1}] ${message}`, ...args); + console.log(`[Page ${visitedUrls.size}] [URL: ${page.url()}] ${message}`, ...args); + }; + + const scrapeCurrentPage = async () => { + const results = await page.evaluate((cfg) => window.scrapeList(cfg), config); + const newResults = results.filter(item => { + const uniqueKey = JSON.stringify(item); + if (scrapedItems.has(uniqueKey)) return false; + scrapedItems.add(uniqueKey); + return true; + }); + allResults = allResults.concat(newResults); + debugLog("Results collected:", allResults.length); + }; + + const checkLimit = () => { + if (config.limit && allResults.length >= config.limit) { + allResults = allResults.slice(0, config.limit); + return true; + } + return false; + }; + + // Enhanced button finder with retry mechanism + const findWorkingButton = async (selectors: string[], retryCount = 0): Promise<{ + button: ElementHandle | null, + workingSelector: string | null + }> => { + for (const selector of selectors) { + try { + const button = await page.waitForSelector(selector, { + state: 'attached', + timeout: 10000 // Reduced timeout for faster checks + }); + if (button) { + debugLog('Found working selector:', selector); + return { button, workingSelector: selector }; + } + } catch (error) { + debugLog(`Selector failed: ${selector}`); + } + } + + // Implement retry mechanism when no selectors work + if (selectors.length > 0 && retryCount < MAX_RETRIES) { + debugLog(`Retry attempt ${retryCount + 1} of ${MAX_RETRIES}`); + await page.waitForTimeout(RETRY_DELAY); + return findWorkingButton(selectors, retryCount + 1); + } + + return { button: null, workingSelector: null }; + }; + + const retryOperation = async (operation: () => Promise, retryCount = 0): Promise => { + try { + return await operation(); + } catch (error) { + if (retryCount < MAX_RETRIES) { + debugLog(`Retrying operation. Attempt ${retryCount + 1} of ${MAX_RETRIES}`); + await page.waitForTimeout(RETRY_DELAY); + return retryOperation(operation, retryCount + 1); + } + debugLog(`Operation failed after ${MAX_RETRIES} retries`); + return false; + } }; let availableSelectors = config.pagination.selector.split(','); - while (true) { + try { + while (true) { + // Reduced timeout for faster performance + await page.waitForLoadState('networkidle', { timeout: 10000 }).catch(() => {}); + switch (config.pagination.type) { - case 'scrollDown': - await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight)); - await page.waitForTimeout(2000); + case 'scrollDown': + await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight)); + await page.waitForTimeout(2000); - const currentHeight = await page.evaluate(() => document.body.scrollHeight); - if (currentHeight === previousHeight) { - const finalResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); - allResults = allResults.concat(finalResults); - return allResults; - } + const currentHeight = await page.evaluate(() => document.body.scrollHeight); + if (currentHeight === previousHeight) { + const finalResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); + allResults = allResults.concat(finalResults); + return allResults; + } - previousHeight = currentHeight; - break; + previousHeight = currentHeight; + break; - case 'scrollUp': - await page.evaluate(() => window.scrollTo(0, 0)); - await page.waitForTimeout(2000); + case 'scrollUp': + await page.evaluate(() => window.scrollTo(0, 0)); + await page.waitForTimeout(2000); - const currentTopHeight = await page.evaluate(() => document.documentElement.scrollTop); - if (currentTopHeight === 0) { - const finalResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); - allResults = allResults.concat(finalResults); - return allResults; - } + const currentTopHeight = await page.evaluate(() => document.documentElement.scrollTop); + if (currentTopHeight === 0) { + const finalResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); + allResults = allResults.concat(finalResults); + return allResults; + } - previousHeight = currentTopHeight; - break; + previousHeight = currentTopHeight; + break; - case 'clickNext': - debugLog("Current URL:", page.url()); - const pageResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); - - // Filter out already scraped items - const newResults = pageResults.filter(item => { - const uniqueKey = JSON.stringify(item); - if (scrapedItems.has(uniqueKey)) return false; - scrapedItems.add(uniqueKey); - return true; - }); - - allResults = allResults.concat(newResults); - debugLog("Results collected so far:", allResults.length); - - if (config.limit && allResults.length >= config.limit) { - return allResults.slice(0, config.limit); - } + case 'clickNext': { + const currentUrl = page.url(); + visitedUrls.add(currentUrl); + + await scrapeCurrentPage(); + if (checkLimit()) return allResults; - await page.waitForLoadState('networkidle', { timeout: 30000 }); - await page.waitForTimeout(2000); - - let checkButton = null; - let workingSelector = null; - - // Try each selector with explicit waiting - for (const selector of availableSelectors) { + const { button, workingSelector } = await findWorkingButton(availableSelectors); + if (!button || !workingSelector) { + // Final retry for navigation when no selectors work + const success = await retryOperation(async () => { try { - checkButton = await page.waitForSelector(selector, { - state: 'attached', - timeout: 30000 - }); - if (checkButton) { - workingSelector = selector; - debugLog('Found working selector:', selector); - break; - } - } catch (error) { - debugLog(`Selector failed: ${selector} - ${error.message}`); + await page.evaluate(() => window.history.forward()); + const newUrl = page.url(); + return !visitedUrls.has(newUrl); + } catch { + return false; } - } + }); + + if (!success) return allResults; + break; + } - if (!workingSelector) { - debugLog('No working selector found after trying all options'); - return allResults; - } + availableSelectors = availableSelectors.slice( + availableSelectors.indexOf(workingSelector) + ); - const nextButton = await page.$(workingSelector); - if (!nextButton) { - debugLog('Next button not found'); - return allResults; - } - - const selectorIndex = availableSelectors.indexOf(workingSelector); - availableSelectors = availableSelectors.slice(selectorIndex); + let retryCount = 0; + let navigationSuccess = false; + while (retryCount < MAX_RETRIES && !navigationSuccess) { try { - // Store current URL to check if navigation succeeded - const previousUrl = page.url(); - visitedUrls.push(previousUrl); - - // Try both click methods in sequence try { await Promise.all([ page.waitForNavigation({ - waitUntil: 'networkidle', - timeout: 15000 + waitUntil: 'networkidle', + timeout: 15000 }), - nextButton.click() + button.click() ]); - } catch (error) { - // If we're still on the same URL, try dispatch event - if (page.url() === previousUrl) { - await Promise.all([ - page.waitForNavigation({ - waitUntil: 'networkidle', - timeout: 15000 - }), - nextButton.dispatchEvent('click') - ]); - } - } - - await page.waitForLoadState('domcontentloaded'); - await page.waitForLoadState('networkidle', { timeout: 30000 }); - - const currentUrl = page.url(); - if (visitedUrls.includes(currentUrl)) { - debugLog(`Navigation failed/Detected navigation to previously visited URL: ${currentUrl}`); - return allResults; - } - - // Give the page a moment to stabilize after navigation - await page.waitForTimeout(1000); - - } catch (error) { - debugLog(`Navigation failed completely: ${error.message}`); - return allResults; - } - break; - - case 'clickLoadMore': - while (true) { - let checkButton = null; - let workingSelector = null; - - for (const selector of availableSelectors) { - try { - checkButton = await page.waitForSelector(selector, { - state: 'attached', - timeout: 30000 - }); - if (checkButton) { - workingSelector = selector; - debugLog('Found working selector:', selector); - break; - } - } catch (error) { - debugLog(`Load More selector failed: ${selector}`); - } - } - - if (!workingSelector) { - debugLog('No working Load More selector found'); - const finalResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); - allResults = allResults.concat(finalResults); - return allResults; - } - - const loadMoreButton = await page.$(workingSelector); - if (!loadMoreButton) { - debugLog('Load More button not found'); - const finalResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); - allResults = allResults.concat(finalResults); - return allResults; - } - - const selectorIndex = availableSelectors.indexOf(workingSelector); - availableSelectors = availableSelectors.slice(selectorIndex); - - try { - try { - await loadMoreButton.click(); - } catch (error) { - await loadMoreButton.dispatchEvent('click'); - } + navigationSuccess = true; } catch (error) { - const finalResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); - allResults = allResults.concat(finalResults); - return allResults; + debugLog(`Regular click failed on attempt ${retryCount + 1}. Trying DispatchEvent`); + + // If regular click fails, try dispatchEvent + if (page.url() === currentUrl) { + try { + await Promise.all([ + page.waitForNavigation({ + waitUntil: 'networkidle', + timeout: 15000 + }), + button.dispatchEvent('click') + ]); + navigationSuccess = true; + } catch (dispatchError) { + debugLog(`DispatchEvent failed on attempt ${retryCount + 1}.`); + } + } } - await page.waitForTimeout(2000); - await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight)); - await page.waitForTimeout(2000); - - const currentHeight = await page.evaluate(() => document.body.scrollHeight); - if (currentHeight === previousHeight) { - debugLog('No more items loaded after Load More'); - const finalResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); - allResults = allResults.concat(finalResults); - return allResults; + const newUrl = page.url(); + if (visitedUrls.has(newUrl)) { + debugLog(`Detected navigation to previously visited URL ${newUrl} on attempt ${retryCount + 1}`); + navigationSuccess = false; } - previousHeight = currentHeight; - - if (config.limit && allResults.length >= config.limit) { - allResults = allResults.slice(0, config.limit); - break; + + if (navigationSuccess) { + await page.waitForTimeout(1000); + } + } catch (error) { + debugLog(`Navigation attempt ${retryCount + 1} failed completely.`); + navigationSuccess = false; + } + + if (!navigationSuccess) { + retryCount++; + if (retryCount < MAX_RETRIES) { + debugLog(`Retrying navigation - attempt ${retryCount + 1} of ${MAX_RETRIES}`); + await page.waitForTimeout(RETRY_DELAY); } } - break; + } - default: - const results = await page.evaluate((cfg) => window.scrapeList(cfg), config); - allResults = allResults.concat(results); + if (!navigationSuccess) { + debugLog(`Navigation failed after ${MAX_RETRIES} attempts`); return allResults; + } + break; + } + + case 'clickLoadMore': { + while (true) { + const { button, workingSelector } = await findWorkingButton(availableSelectors); + if (!button || !workingSelector) { + // Final retry for load more when no selectors work + const success = await retryOperation(async () => { + await scrapeCurrentPage(); + return allResults.length > 0; + }); + + if (!success) return allResults; + break; + } + + availableSelectors = availableSelectors.slice( + availableSelectors.indexOf(workingSelector) + ); + + const loadMoreSuccess = await retryOperation(async () => { + try { + await button.click().catch(() => button.dispatchEvent('click')); + await page.waitForTimeout(1000); + + await page.evaluate(() => + window.scrollTo(0, document.body.scrollHeight) + ); + await page.waitForTimeout(1000); + + const currentHeight = await page.evaluate(() => + document.body.scrollHeight + ); + + if (currentHeight === previousHeight) { + await scrapeCurrentPage(); + return false; + } + previousHeight = currentHeight; + + return true; + } catch (error) { + await scrapeCurrentPage(); + return false; + } + }); + + if (!loadMoreSuccess || checkLimit()) return allResults; + } + } + + default: + await scrapeCurrentPage(); + return allResults; } - if (config.limit && allResults.length >= config.limit) { - allResults = allResults.slice(0, config.limit); - break; - } + if (checkLimit()) break; + } + } catch (error) { + debugLog(`Fatal error: ${error.message}`); + return allResults; } return allResults; From a07413df93fbbf5f6db37400d2868981abb8a98f Mon Sep 17 00:00:00 2001 From: Rohit Date: Tue, 28 Jan 2025 14:52:27 +0530 Subject: [PATCH 2/7] feat: rm general selector from chained selectors --- server/src/workflow-management/classes/Generator.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/server/src/workflow-management/classes/Generator.ts b/server/src/workflow-management/classes/Generator.ts index 6e36f287..509c0177 100644 --- a/server/src/workflow-management/classes/Generator.ts +++ b/server/src/workflow-management/classes/Generator.ts @@ -759,8 +759,7 @@ export class WorkflowGenerator { selectors?.id, selectors?.hrefSelector, selectors?.accessibilitySelector, - selectors?.attrSelector, - selectors?.generalSelector + selectors?.attrSelector ] .filter(selector => selector !== null && selector !== undefined) .join(','); From 02268bc694caff65a28d43921fbcd1827d0f4299 Mon Sep 17 00:00:00 2001 From: Rohit Date: Mon, 3 Feb 2025 22:48:13 +0530 Subject: [PATCH 3/7] feat: add navigation true if url different --- maxun-core/src/interpret.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 251dda6d..4bdfad73 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -724,6 +724,8 @@ export default class Interpreter extends EventEmitter { } catch (dispatchError) { debugLog(`DispatchEvent failed on attempt ${retryCount + 1}.`); } + } else { + navigationSuccess = true; } } From c37fe42a2fa89b4f00c3b3f806a101c36c675937 Mon Sep 17 00:00:00 2001 From: Rohit Date: Mon, 3 Feb 2025 23:26:06 +0530 Subject: [PATCH 4/7] feat: add break for loadmore pagination --- maxun-core/src/interpret.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 4bdfad73..ecb267c9 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -806,6 +806,7 @@ export default class Interpreter extends EventEmitter { if (!loadMoreSuccess || checkLimit()) return allResults; } + break; } default: From 56bb09ef5a542c49fa392310e17d46fcc4e51250 Mon Sep 17 00:00:00 2001 From: Rohit Date: Mon, 3 Feb 2025 23:34:28 +0530 Subject: [PATCH 5/7] feat: revert load more pagination logic --- maxun-core/src/interpret.ts | 99 ++++++++++++++++++++++--------------- 1 file changed, 60 insertions(+), 39 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index ecb267c9..6e2e5311 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -759,55 +759,76 @@ export default class Interpreter extends EventEmitter { break; } - case 'clickLoadMore': { + case 'clickLoadMore': while (true) { - const { button, workingSelector } = await findWorkingButton(availableSelectors); - if (!button || !workingSelector) { - // Final retry for load more when no selectors work - const success = await retryOperation(async () => { - await scrapeCurrentPage(); - return allResults.length > 0; - }); - - if (!success) return allResults; - break; + let checkButton = null; + let workingSelector = null; + + for (const selector of availableSelectors) { + try { + checkButton = await page.waitForSelector(selector, { + state: 'attached', + timeout: 30000 + }); + if (checkButton) { + workingSelector = selector; + debugLog('Found working selector:', selector); + break; + } + } catch (error) { + debugLog(`Load More selector failed: ${selector}`); + } } - availableSelectors = availableSelectors.slice( - availableSelectors.indexOf(workingSelector) - ); + if (!workingSelector) { + debugLog('No working Load More selector found'); + const finalResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); + allResults = allResults.concat(finalResults); + return allResults; + } - const loadMoreSuccess = await retryOperation(async () => { + const loadMoreButton = await page.$(workingSelector); + if (!loadMoreButton) { + debugLog('Load More button not found'); + const finalResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); + allResults = allResults.concat(finalResults); + return allResults; + } + + const selectorIndex = availableSelectors.indexOf(workingSelector); + availableSelectors = availableSelectors.slice(selectorIndex); + + try { try { - await button.click().catch(() => button.dispatchEvent('click')); - await page.waitForTimeout(1000); - - await page.evaluate(() => - window.scrollTo(0, document.body.scrollHeight) - ); - await page.waitForTimeout(1000); - - const currentHeight = await page.evaluate(() => - document.body.scrollHeight - ); - - if (currentHeight === previousHeight) { - await scrapeCurrentPage(); - return false; - } - previousHeight = currentHeight; - - return true; + await loadMoreButton.click(); } catch (error) { - await scrapeCurrentPage(); - return false; + await loadMoreButton.dispatchEvent('click'); } - }); + } catch (error) { + const finalResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); + allResults = allResults.concat(finalResults); + return allResults; + } - if (!loadMoreSuccess || checkLimit()) return allResults; + await page.waitForTimeout(2000); + await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight)); + await page.waitForTimeout(2000); + + const currentHeight = await page.evaluate(() => document.body.scrollHeight); + if (currentHeight === previousHeight) { + debugLog('No more items loaded after Load More'); + const finalResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); + allResults = allResults.concat(finalResults); + return allResults; + } + previousHeight = currentHeight; + + if (config.limit && allResults.length >= config.limit) { + allResults = allResults.slice(0, config.limit); + break; + } } break; - } default: await scrapeCurrentPage(); From dc44f6a9cd653d78a5bf02132f97b447a58fde92 Mon Sep 17 00:00:00 2001 From: Rohit Date: Mon, 3 Feb 2025 23:36:57 +0530 Subject: [PATCH 6/7] feat: add block scope --- maxun-core/src/interpret.ts | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 6e2e5311..d65cd4ac 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -637,7 +637,7 @@ export default class Interpreter extends EventEmitter { await page.waitForLoadState('networkidle', { timeout: 10000 }).catch(() => {}); switch (config.pagination.type) { - case 'scrollDown': + case 'scrollDown': { await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight)); await page.waitForTimeout(2000); @@ -650,8 +650,9 @@ export default class Interpreter extends EventEmitter { previousHeight = currentHeight; break; + } - case 'scrollUp': + case 'scrollUp': { await page.evaluate(() => window.scrollTo(0, 0)); await page.waitForTimeout(2000); @@ -664,6 +665,7 @@ export default class Interpreter extends EventEmitter { previousHeight = currentTopHeight; break; + } case 'clickNext': { const currentUrl = page.url(); @@ -759,7 +761,7 @@ export default class Interpreter extends EventEmitter { break; } - case 'clickLoadMore': + case 'clickLoadMore': { while (true) { let checkButton = null; let workingSelector = null; @@ -829,10 +831,12 @@ export default class Interpreter extends EventEmitter { } } break; + } - default: + default: { await scrapeCurrentPage(); return allResults; + } } if (checkLimit()) break; From 7e2d1ea4cd71d60bd159c5a76b495d961d51e217 Mon Sep 17 00:00:00 2001 From: Rohit Date: Mon, 3 Feb 2025 23:50:29 +0530 Subject: [PATCH 7/7] feat: better load more pagination handling --- maxun-core/src/interpret.ts | 92 ++++++++++++++++++++----------------- 1 file changed, 51 insertions(+), 41 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index d65cd4ac..7fc4d17a 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -763,59 +763,69 @@ export default class Interpreter extends EventEmitter { case 'clickLoadMore': { while (true) { - let checkButton = null; - let workingSelector = null; - - for (const selector of availableSelectors) { + // Find working button with retry mechanism, consistent with clickNext + const { button: loadMoreButton, workingSelector } = await findWorkingButton(availableSelectors); + + if (!workingSelector || !loadMoreButton) { + debugLog('No working Load More selector found after retries'); + const finalResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); + allResults = allResults.concat(finalResults); + return allResults; + } + + // Update available selectors to start from the working one + availableSelectors = availableSelectors.slice( + availableSelectors.indexOf(workingSelector) + ); + + // Implement retry mechanism for clicking the button + let retryCount = 0; + let clickSuccess = false; + + while (retryCount < MAX_RETRIES && !clickSuccess) { try { - checkButton = await page.waitForSelector(selector, { - state: 'attached', - timeout: 30000 - }); - if (checkButton) { - workingSelector = selector; - debugLog('Found working selector:', selector); - break; + try { + await loadMoreButton.click(); + clickSuccess = true; + } catch (error) { + debugLog(`Regular click failed on attempt ${retryCount + 1}. Trying DispatchEvent`); + + // If regular click fails, try dispatchEvent + try { + await loadMoreButton.dispatchEvent('click'); + clickSuccess = true; + } catch (dispatchError) { + debugLog(`DispatchEvent failed on attempt ${retryCount + 1}.`); + throw dispatchError; // Propagate error to trigger retry + } + } + + if (clickSuccess) { + await page.waitForTimeout(1000); } } catch (error) { - debugLog(`Load More selector failed: ${selector}`); + debugLog(`Click attempt ${retryCount + 1} failed completely.`); + retryCount++; + + if (retryCount < MAX_RETRIES) { + debugLog(`Retrying click - attempt ${retryCount + 1} of ${MAX_RETRIES}`); + await page.waitForTimeout(RETRY_DELAY); + } } } - - if (!workingSelector) { - debugLog('No working Load More selector found'); + + if (!clickSuccess) { + debugLog(`Load More clicking failed after ${MAX_RETRIES} attempts`); const finalResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); allResults = allResults.concat(finalResults); return allResults; } - - const loadMoreButton = await page.$(workingSelector); - if (!loadMoreButton) { - debugLog('Load More button not found'); - const finalResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); - allResults = allResults.concat(finalResults); - return allResults; - } - - const selectorIndex = availableSelectors.indexOf(workingSelector); - availableSelectors = availableSelectors.slice(selectorIndex); - - try { - try { - await loadMoreButton.click(); - } catch (error) { - await loadMoreButton.dispatchEvent('click'); - } - } catch (error) { - const finalResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); - allResults = allResults.concat(finalResults); - return allResults; - } - + + // Wait for content to load and check scroll height await page.waitForTimeout(2000); await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight)); await page.waitForTimeout(2000); - + const currentHeight = await page.evaluate(() => document.body.scrollHeight); if (currentHeight === previousHeight) { debugLog('No more items loaded after Load More');