From 96454861d42408bcd0afeec8275b9072650b6727 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Thu, 9 Jan 2025 16:56:00 +0530 Subject: [PATCH 01/14] feat: emit socket event to stop pagination mode --- src/components/organisms/BrowserWindow.tsx | 1 + 1 file changed, 1 insertion(+) diff --git a/src/components/organisms/BrowserWindow.tsx b/src/components/organisms/BrowserWindow.tsx index 5a49a89f..84fac079 100644 --- a/src/components/organisms/BrowserWindow.tsx +++ b/src/components/organisms/BrowserWindow.tsx @@ -260,6 +260,7 @@ export const BrowserWindow = () => { setPaginationSelector(highlighterData.selector); notify(`info`, t('browser_window.attribute_modal.notifications.pagination_select_success')); addListStep(listSelector!, fields, currentListId || 0, { type: paginationType, selector: highlighterData.selector }); + socket?.emit('setPaginationMode', { pagination: false }); } return; } From c6e998a34a1e7ad88506068969f6ef4fff49c303 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Thu, 9 Jan 2025 16:57:23 +0530 Subject: [PATCH 02/14] feat: emit socket event to start pagination mode --- src/context/browserActions.tsx | 1 + 1 file changed, 1 insertion(+) diff --git a/src/context/browserActions.tsx b/src/context/browserActions.tsx index fd951f92..f8699864 100644 --- a/src/context/browserActions.tsx +++ b/src/context/browserActions.tsx @@ -54,6 +54,7 @@ export const ActionProvider = ({ children }: { children: ReactNode }) => { setPaginationMode(true); setCaptureStage('pagination'); socket?.emit('setGetList', { getList: false }); + socket?.emit('setPaginationMode', { pagination: true }); }; const stopPaginationMode = () => setPaginationMode(false); From 7136beeeba0ca062a3532ebc7686c1b2c0254529 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Thu, 9 Jan 2025 16:59:13 +0530 Subject: [PATCH 03/14] feat: chain and store selectors in pagination mode --- .../workflow-management/classes/Generator.ts | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/server/src/workflow-management/classes/Generator.ts b/server/src/workflow-management/classes/Generator.ts index d1bccbe4..354f43da 100644 --- a/server/src/workflow-management/classes/Generator.ts +++ b/server/src/workflow-management/classes/Generator.ts @@ -64,6 +64,8 @@ export class WorkflowGenerator { private listSelector: string = ''; + private paginationMode: boolean = false; + /** * The public constructor of the WorkflowGenerator. * Takes socket for communication as a parameter and registers some important events on it. @@ -120,6 +122,9 @@ export class WorkflowGenerator { this.socket.on('listSelector', (data: { selector: string }) => { this.listSelector = data.selector; }) + this.socket.on('setPaginationMode', (data: { pagination: boolean }) => { + this.paginationMode = data.pagination; + }) } /** @@ -702,6 +707,24 @@ export class WorkflowGenerator { const selectorBasedOnCustomAction = (this.getList === true) ? await getNonUniqueSelectors(page, coordinates, this.listSelector) : await getSelectors(page, coordinates); + + if (this.paginationMode && selectorBasedOnCustomAction) { + // Chain selectors in specific priority order + const selectors = selectorBasedOnCustomAction; + const selectorChain = [ + selectors?.testIdSelector, + selectors?.id, + selectors?.hrefSelector, + selectors?.accessibilitySelector, + selectors?.attrSelector, + selectors?.generalSelector + ] + .filter(selector => selector !== null && selector !== undefined) + .join(','); + + console.log("CHAINED PAGINATION SELECTOR:", selectorChain); + return selectorChain; + } const bestSelector = getBestSelectorForAction( { From 2ad0b2ac8c18d267695872e942ff17d7f2b2cd73 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Thu, 9 Jan 2025 17:01:08 +0530 Subject: [PATCH 04/14] feat: handle chained selectors for click next pagination --- maxun-core/src/interpret.ts | 46 ++++++++++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 6 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index e09ac5d5..eb0021a7 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -546,6 +546,9 @@ export default class Interpreter extends EventEmitter { // track unique items per page to avoid re-scraping let scrapedItems: Set = new Set(); + let availableSelectors = config.pagination.selector.split(','); + console.log("Initial selectors:", availableSelectors); + while (true) { switch (config.pagination.type) { case 'scrollDown': @@ -575,6 +578,7 @@ export default class Interpreter extends EventEmitter { previousHeight = currentTopHeight; break; case 'clickNext': + console.log("PAGE URL:", page.url()); const pageResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); // console.log("Page results:", pageResults); @@ -593,16 +597,46 @@ export default class Interpreter extends EventEmitter { return allResults.slice(0, config.limit); } - const nextButton = await page.$(config.pagination.selector); + let checkButton = null; + let workingSelector = null; + + for (let i = 0; i < availableSelectors.length; i++) { + const selector = availableSelectors[i]; + try { + // Wait for selector with a short timeout + checkButton = await page.waitForSelector(selector, { state: 'attached', timeout: 10000 }); + if (checkButton) { + workingSelector = selector; + break; + } + } catch (error) { + console.log(`Selector failed: ${selector}`); + continue; + } + } + + const nextButton = await page.$(workingSelector); if (!nextButton) { return allResults; // No more pages to scrape } - await Promise.all([ - nextButton.dispatchEvent('click'), - page.waitForNavigation({ waitUntil: 'networkidle' }) - ]); - await page.waitForTimeout(1000); + const selectorIndex = availableSelectors.indexOf(workingSelector!); + availableSelectors = availableSelectors.slice(selectorIndex); + console.log("Updated selectors:", availableSelectors); + + try { + await Promise.all([ + nextButton.click(), + page.waitForNavigation({ waitUntil: 'networkidle' }) + ]); + + await page.waitForTimeout(1000); + } catch (navigationError) { + console.log(`Navigation failed with selector ${workingSelector}:`, navigationError); + availableSelectors.shift(); + console.log("Updated selectors:", availableSelectors); + continue + } break; case 'clickLoadMore': while (true) { From 906b72fbc3d0cdac174fac6e05e847630ed21c75 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Thu, 9 Jan 2025 17:44:43 +0530 Subject: [PATCH 05/14] feat: add fallback dispatch event if click fails --- maxun-core/src/interpret.ts | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index eb0021a7..09b3d93a 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -625,17 +625,30 @@ export default class Interpreter extends EventEmitter { console.log("Updated selectors:", availableSelectors); try { + // First try with regular click await Promise.all([ nextButton.click(), page.waitForNavigation({ waitUntil: 'networkidle' }) ]); - + await page.waitForTimeout(1000); - } catch (navigationError) { - console.log(`Navigation failed with selector ${workingSelector}:`, navigationError); - availableSelectors.shift(); - console.log("Updated selectors:", availableSelectors); - continue + } catch (clickError) { + console.log('Regular click failed, trying dispatchEvent:', clickError); + + try { + // Fallback to dispatchEvent + await Promise.all([ + nextButton.dispatchEvent('click'), + page.waitForNavigation({ waitUntil: 'networkidle' }) + ]); + + await page.waitForTimeout(1000); + } catch (navigationError) { + console.log(`Navigation failed with selector ${workingSelector}:`, navigationError); + availableSelectors.shift(); + console.log("Updated selectors:", availableSelectors); + continue; + } } break; case 'clickLoadMore': From c7d6dea8451d08a7f0da24d9b612f708e77b27d0 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Fri, 10 Jan 2025 12:38:50 +0530 Subject: [PATCH 06/14] feat: cleanup console logs --- maxun-core/src/interpret.ts | 8 +++----- server/src/workflow-management/classes/Generator.ts | 1 - 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 09b3d93a..9a3df8cd 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -547,7 +547,6 @@ export default class Interpreter extends EventEmitter { let scrapedItems: Set = new Set(); let availableSelectors = config.pagination.selector.split(','); - console.log("Initial selectors:", availableSelectors); while (true) { switch (config.pagination.type) { @@ -615,6 +614,7 @@ export default class Interpreter extends EventEmitter { } } + // const nextButton = await page.$(config.pagination.selector); const nextButton = await page.$(workingSelector); if (!nextButton) { return allResults; // No more pages to scrape @@ -622,7 +622,6 @@ export default class Interpreter extends EventEmitter { const selectorIndex = availableSelectors.indexOf(workingSelector!); availableSelectors = availableSelectors.slice(selectorIndex); - console.log("Updated selectors:", availableSelectors); try { // First try with regular click @@ -633,7 +632,7 @@ export default class Interpreter extends EventEmitter { await page.waitForTimeout(1000); } catch (clickError) { - console.log('Regular click failed, trying dispatchEvent:', clickError); + console.log('Regular click failed, trying dispatchEvent'); try { // Fallback to dispatchEvent @@ -644,9 +643,8 @@ export default class Interpreter extends EventEmitter { await page.waitForTimeout(1000); } catch (navigationError) { - console.log(`Navigation failed with selector ${workingSelector}:`, navigationError); + console.log(`Navigation failed with selector ${workingSelector}:`); availableSelectors.shift(); - console.log("Updated selectors:", availableSelectors); continue; } } diff --git a/server/src/workflow-management/classes/Generator.ts b/server/src/workflow-management/classes/Generator.ts index 354f43da..0ec251b5 100644 --- a/server/src/workflow-management/classes/Generator.ts +++ b/server/src/workflow-management/classes/Generator.ts @@ -722,7 +722,6 @@ export class WorkflowGenerator { .filter(selector => selector !== null && selector !== undefined) .join(','); - console.log("CHAINED PAGINATION SELECTOR:", selectorChain); return selectorChain; } From 73bbdeb6bb7b9acb54d095a13756b4f968815a85 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Fri, 10 Jan 2025 12:49:14 +0530 Subject: [PATCH 07/14] feat: handle chained selectors for click load more pagination --- maxun-core/src/interpret.ts | 28 ++++++++++++++++++++++++++-- 1 file changed, 26 insertions(+), 2 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 9a3df8cd..de2f88e6 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -651,7 +651,25 @@ export default class Interpreter extends EventEmitter { break; case 'clickLoadMore': while (true) { - const loadMoreButton = await page.$(config.pagination.selector); + let checkButton = null; + let workingSelector = null; + + for (let i = 0; i < availableSelectors.length; i++) { + const selector = availableSelectors[i]; + try { + // Wait for selector with a short timeout + checkButton = await page.waitForSelector(selector, { state: 'attached', timeout: 10000 }); + if (checkButton) { + workingSelector = selector; + break; + } + } catch (error) { + console.log(`Selector failed: ${selector}`); + continue; + } + } + + const loadMoreButton = await page.$(workingSelector); if (!loadMoreButton) { // No more "Load More" button, so scrape the remaining items const finalResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); @@ -659,8 +677,14 @@ export default class Interpreter extends EventEmitter { return allResults; } // Click the 'Load More' button to load additional items - await loadMoreButton.dispatchEvent('click'); + try { + await loadMoreButton.click(); + } catch { + console.log('Regular click failed, trying dispatchEvent'); + await loadMoreButton.dispatchEvent('click') + } await page.waitForTimeout(2000); // Wait for new items to load + // After clicking 'Load More', scroll down to load more items await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight)); await page.waitForTimeout(2000); From 7aa7dc27b3b9ebc9e7869502eac517b86b658044 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Sat, 11 Jan 2025 17:51:31 +0530 Subject: [PATCH 08/14] feat: better handling for click next navigation --- maxun-core/src/interpret.ts | 66 ++++++++++++++++++++++++------------- 1 file changed, 44 insertions(+), 22 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index de2f88e6..e385a138 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -577,9 +577,9 @@ export default class Interpreter extends EventEmitter { previousHeight = currentTopHeight; break; case 'clickNext': - console.log("PAGE URL:", page.url()); + console.log("Page URL:", page.url()); const pageResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); - + // console.log("Page results:", pageResults); // Filter out already scraped items @@ -591,6 +591,7 @@ export default class Interpreter extends EventEmitter { }); allResults = allResults.concat(newResults); + console.log("Results so far:", allResults.length); if (config.limit && allResults.length >= config.limit) { return allResults.slice(0, config.limit); @@ -623,31 +624,52 @@ export default class Interpreter extends EventEmitter { const selectorIndex = availableSelectors.indexOf(workingSelector!); availableSelectors = availableSelectors.slice(selectorIndex); + // await Promise.all([ + // nextButton.dispatchEvent('click'), + // page.waitForNavigation({ waitUntil: 'networkidle' }) + // ]); + + const initialUrl = page.url(); + let navigationSuccessful = false; + try { - // First try with regular click - await Promise.all([ - nextButton.click(), - page.waitForNavigation({ waitUntil: 'networkidle' }) - ]); - - await page.waitForTimeout(1000); + // Start watching for navigation before clicking + const navigationPromise = page.waitForNavigation({ + waitUntil: 'networkidle', + timeout: 30000 + }); + + // Perform the click + await nextButton.click(); + + // Wait for navigation to complete + await navigationPromise; + navigationSuccessful = true; } catch (clickError) { - console.log('Regular click failed, trying dispatchEvent'); - + console.log('Initial navigation attempt failed:', clickError.message); + } + + if (!navigationSuccessful) { try { - // Fallback to dispatchEvent - await Promise.all([ - nextButton.dispatchEvent('click'), - page.waitForNavigation({ waitUntil: 'networkidle' }) - ]); - - await page.waitForTimeout(1000); - } catch (navigationError) { - console.log(`Navigation failed with selector ${workingSelector}:`); - availableSelectors.shift(); - continue; + // Start watching for navigation before the event + const navigationPromise = page.waitForNavigation({ + waitUntil: 'networkidle', + timeout: 30000 + }); + + await nextButton.dispatchEvent('click'); + await navigationPromise; + } catch (dispatchError) { + console.log(`Navigation failed with selector ${workingSelector}:`, dispatchError.message); + // Check if we actually navigated despite the error + if (page.url() === initialUrl) { + continue; // Only continue if we're still on the same page + } } } + + // Give the page a moment to stabilize after navigation + await page.waitForTimeout(1000); break; case 'clickLoadMore': while (true) { From ab073990177eb7ce7007b932dfb90b6e70801560 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Sat, 11 Jan 2025 17:56:17 +0530 Subject: [PATCH 09/14] feat: rm selector from array if not visible --- maxun-core/src/interpret.ts | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index e385a138..b231b03b 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -698,6 +698,10 @@ export default class Interpreter extends EventEmitter { allResults = allResults.concat(finalResults); return allResults; } + + const selectorIndex = availableSelectors.indexOf(workingSelector!); + availableSelectors = availableSelectors.slice(selectorIndex); + // Click the 'Load More' button to load additional items try { await loadMoreButton.click(); @@ -719,6 +723,8 @@ export default class Interpreter extends EventEmitter { return allResults; } previousHeight = currentHeight; + + console.log("Results so far:", allResults.length); if (config.limit && allResults.length >= config.limit) { // If limit is set and reached, return the limited results allResults = allResults.slice(0, config.limit); From 3bb36ae89a80314974289aa42edef17caa0f672b Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Sat, 11 Jan 2025 19:33:28 +0530 Subject: [PATCH 10/14] feat: add race condition to handle click action --- maxun-core/src/interpret.ts | 67 ++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 39 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index b231b03b..e7c11533 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -629,43 +629,29 @@ export default class Interpreter extends EventEmitter { // page.waitForNavigation({ waitUntil: 'networkidle' }) // ]); - const initialUrl = page.url(); - let navigationSuccessful = false; + const previousUrl = page.url(); try { - // Start watching for navigation before clicking - const navigationPromise = page.waitForNavigation({ - waitUntil: 'networkidle', - timeout: 30000 - }); - - // Perform the click - await nextButton.click(); - - // Wait for navigation to complete - await navigationPromise; - navigationSuccessful = true; - } catch (clickError) { - console.log('Initial navigation attempt failed:', clickError.message); - } - - if (!navigationSuccessful) { - try { - // Start watching for navigation before the event - const navigationPromise = page.waitForNavigation({ - waitUntil: 'networkidle', - timeout: 30000 - }); - - await nextButton.dispatchEvent('click'); - await navigationPromise; - } catch (dispatchError) { - console.log(`Navigation failed with selector ${workingSelector}:`, dispatchError.message); - // Check if we actually navigated despite the error - if (page.url() === initialUrl) { - continue; // Only continue if we're still on the same page - } + // Try both click methods simultaneously + await Promise.race([ + Promise.all([ + page.waitForNavigation({ waitUntil: 'networkidle', timeout: 30000 }), + nextButton.click() + ]), + Promise.all([ + page.waitForNavigation({ waitUntil: 'networkidle', timeout: 30000 }), + nextButton.dispatchEvent('click') + ]) + ]); + } catch (error) { + // Verify if navigation actually succeeded + const currentUrl = page.url(); + if (currentUrl === previousUrl) { + console.log("Previous URL same as current URL. Navigation failed."); + continue; } + // Otherwise, log and continue + console.log('Navigation succeeded despite click error'); } // Give the page a moment to stabilize after navigation @@ -704,10 +690,13 @@ export default class Interpreter extends EventEmitter { // Click the 'Load More' button to load additional items try { - await loadMoreButton.click(); - } catch { - console.log('Regular click failed, trying dispatchEvent'); - await loadMoreButton.dispatchEvent('click') + await Promise.race([ + loadMoreButton.click(), + loadMoreButton.dispatchEvent('click') + ]); + } catch (error) { + console.log('Both click attempts failed'); + continue; } await page.waitForTimeout(2000); // Wait for new items to load @@ -723,7 +712,7 @@ export default class Interpreter extends EventEmitter { return allResults; } previousHeight = currentHeight; - + console.log("Results so far:", allResults.length); if (config.limit && allResults.length >= config.limit) { // If limit is set and reached, return the limited results From 681aa8045e19aad65387a91e88e78c5bce37e5c8 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Sat, 11 Jan 2025 20:14:02 +0530 Subject: [PATCH 11/14] feat: rm timeout for wait for selector --- maxun-core/src/interpret.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index e7c11533..aaecb415 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -604,7 +604,7 @@ export default class Interpreter extends EventEmitter { const selector = availableSelectors[i]; try { // Wait for selector with a short timeout - checkButton = await page.waitForSelector(selector, { state: 'attached', timeout: 10000 }); + checkButton = await page.waitForSelector(selector, { state: 'attached' }); if (checkButton) { workingSelector = selector; break; @@ -666,7 +666,7 @@ export default class Interpreter extends EventEmitter { const selector = availableSelectors[i]; try { // Wait for selector with a short timeout - checkButton = await page.waitForSelector(selector, { state: 'attached', timeout: 10000 }); + checkButton = await page.waitForSelector(selector, { state: 'attached' }); if (checkButton) { workingSelector = selector; break; From 03e3098b587170a71225ef0a03165acd658beda2 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Sat, 11 Jan 2025 22:53:39 +0530 Subject: [PATCH 12/14] feat: handle chain selector logic for click load more pagination --- maxun-core/src/interpret.ts | 22 +++++++++++++++------- 1 file changed, 15 insertions(+), 7 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index aaecb415..e5f0a162 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -611,10 +611,13 @@ export default class Interpreter extends EventEmitter { } } catch (error) { console.log(`Selector failed: ${selector}`); - continue; } } + if (!workingSelector) { + return allResults; + } + // const nextButton = await page.$(config.pagination.selector); const nextButton = await page.$(workingSelector); if (!nextButton) { @@ -673,10 +676,16 @@ export default class Interpreter extends EventEmitter { } } catch (error) { console.log(`Selector failed: ${selector}`); - continue; } } + if (!workingSelector) { + // No more working selectors available, so scrape the remaining items + const finalResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); + allResults = allResults.concat(finalResults); + return allResults; + } + const loadMoreButton = await page.$(workingSelector); if (!loadMoreButton) { // No more "Load More" button, so scrape the remaining items @@ -687,8 +696,9 @@ export default class Interpreter extends EventEmitter { const selectorIndex = availableSelectors.indexOf(workingSelector!); availableSelectors = availableSelectors.slice(selectorIndex); - + // Click the 'Load More' button to load additional items + // await loadMoreButton.dispatchEvent('click'); try { await Promise.race([ loadMoreButton.click(), @@ -696,13 +706,12 @@ export default class Interpreter extends EventEmitter { ]); } catch (error) { console.log('Both click attempts failed'); - continue; } await page.waitForTimeout(2000); // Wait for new items to load - // After clicking 'Load More', scroll down to load more items await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight)); await page.waitForTimeout(2000); + // Check if more items are available const currentHeight = await page.evaluate(() => document.body.scrollHeight); if (currentHeight === previousHeight) { @@ -712,8 +721,7 @@ export default class Interpreter extends EventEmitter { return allResults; } previousHeight = currentHeight; - - console.log("Results so far:", allResults.length); + if (config.limit && allResults.length >= config.limit) { // If limit is set and reached, return the limited results allResults = allResults.slice(0, config.limit); From a2314798c1bd725501da3bc3921b76ca13559a90 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Sun, 12 Jan 2025 02:47:25 +0530 Subject: [PATCH 13/14] feat: check url visit and navigate --- maxun-core/src/interpret.ts | 29 ++++++++++++++++++++++++++--- 1 file changed, 26 insertions(+), 3 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index e5f0a162..70d425a1 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -545,6 +545,7 @@ export default class Interpreter extends EventEmitter { let previousHeight = 0; // track unique items per page to avoid re-scraping let scrapedItems: Set = new Set(); + let visitedUrls: string[] = []; let availableSelectors = config.pagination.selector.split(','); @@ -633,6 +634,7 @@ export default class Interpreter extends EventEmitter { // ]); const previousUrl = page.url(); + visitedUrls.push(previousUrl); try { // Try both click methods simultaneously @@ -651,10 +653,31 @@ export default class Interpreter extends EventEmitter { const currentUrl = page.url(); if (currentUrl === previousUrl) { console.log("Previous URL same as current URL. Navigation failed."); - continue; } - // Otherwise, log and continue - console.log('Navigation succeeded despite click error'); + } + + const currentUrl = page.url(); + if (visitedUrls.includes(currentUrl)) { + console.log(`Detected navigation to a previously visited URL: ${currentUrl}`); + + // Extract the current page number from the URL + const match = currentUrl.match(/\d+/); + if (match) { + const currentNumber = match[0]; + // Use visitedUrls.length + 1 as the next page number + const nextNumber = visitedUrls.length + 1; + + // Create new URL by replacing the current number with the next number + const nextUrl = currentUrl.replace(currentNumber, nextNumber.toString()); + + console.log(`Navigating to constructed URL: ${nextUrl}`); + + // Navigate to the next page + await Promise.all([ + page.waitForNavigation({ waitUntil: 'networkidle' }), + page.goto(nextUrl) + ]); + } } // Give the page a moment to stabilize after navigation From 78e85c78d44e0c75169a04db65ea42974ce6c387 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Sun, 12 Jan 2025 14:56:40 +0530 Subject: [PATCH 14/14] feat: add support for iframe and shadowDom selector in pagination logic --- server/src/workflow-management/classes/Generator.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/server/src/workflow-management/classes/Generator.ts b/server/src/workflow-management/classes/Generator.ts index 0ec251b5..7368f0cb 100644 --- a/server/src/workflow-management/classes/Generator.ts +++ b/server/src/workflow-management/classes/Generator.ts @@ -712,6 +712,8 @@ export class WorkflowGenerator { // Chain selectors in specific priority order const selectors = selectorBasedOnCustomAction; const selectorChain = [ + selectors?.iframeSelector?.full, + selectors?.shadowSelector?.full, selectors?.testIdSelector, selectors?.id, selectors?.hrefSelector,