From a5b75d8ef28c4cb077e092b70b99b5a13352690d Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Tue, 6 Aug 2024 18:38:41 +0530 Subject: [PATCH 001/172] feat: add scrapeLists to custom action --- src/shared/types.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shared/types.ts b/src/shared/types.ts index aa5f254e..a0e44a96 100644 --- a/src/shared/types.ts +++ b/src/shared/types.ts @@ -23,4 +23,4 @@ export interface ScreenshotSettings { type?: "jpeg" | "png"; }; -export declare type CustomActions = 'scrape' | 'scrapeSchema' | 'scroll' | 'screenshot' | 'script' | 'enqueueLinks' | 'flag'; +export declare type CustomActions = 'scrape' | 'scrapeSchema' | 'scroll' | 'screenshot' | 'script' | 'enqueueLinks' | 'flag' | 'scrapeLists'; From abd5f2bd1e6d76f519c5a2da1144303b8750a777 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Tue, 6 Aug 2024 21:16:43 +0530 Subject: [PATCH 002/172] feat: init scrapeList --- maxun-core/src/browserSide/scraper.js | 66 +++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index 262e63ec..6942bd2a 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -250,4 +250,70 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, )); } + + /** + * Scrapes multiple lists of similar items based on a template item. + * @param {Object} config - Configuration object + * @param {string} config.listSelector - Selector for the list container(s) + * @param {Object.} config.fields - Fields to scrape + * @param {number} [config.limit] - Maximum number of items to scrape per list (optional) + * @param {boolean} [config.flexible=false] - Whether to use flexible matching for field selectors + * @returns {Array.>} Array of arrays of scraped items, one sub-array per list + */ +window.scrapeList = function(config) { + const { listSelector, fields, limit, flexible = false } = config; + + // Get all lists + const lists = Array.from(document.querySelectorAll(listSelector)); + + return lists.map(list => { + // Get all list items within this list + const listItems = Array.from(list.children); + + // Apply limit if specified + const itemsToScrape = limit ? listItems.slice(0, limit) : listItems; + + // Scrape each item + return itemsToScrape.map(item => { + const scrapedItem = {}; + + for (const [fieldName, fieldConfig] of Object.entries(fields)) { + let element; + + if (flexible) { + // Try multiple strategies to find the element + element = item.querySelector(fieldConfig.selector) || + item.querySelector(`[class*="${fieldConfig.selector}"]`) || + Array.from(item.querySelectorAll('*')) + .find(el => el.textContent.trim() === fieldConfig.selector); + } else { + element = item.querySelector(fieldConfig.selector); + } + + if (element) { + switch (fieldConfig.attribute) { + case 'href': + scrapedItem[fieldName] = element.getAttribute('href'); + break; + case 'src': + scrapedItem[fieldName] = element.getAttribute('src'); + break; + case 'textContent': + scrapedItem[fieldName] = element.textContent.trim(); + break; + case 'innerText': + default: + scrapedItem[fieldName] = element.innerText.trim(); + break; + } + } else { + scrapedItem[fieldName] = null; + } + } + + return scrapedItem; + }); + }); +}; + })(window); \ No newline at end of file From 9c6f21ce914cc3f05338551d0194feb06aaf899e Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Tue, 6 Aug 2024 23:12:31 +0530 Subject: [PATCH 003/172] feat: non unique css selectors --- server/src/workflow-management/selector.ts | 53 ++++++++++++++++++++++ 1 file changed, 53 insertions(+) diff --git a/server/src/workflow-management/selector.ts b/server/src/workflow-management/selector.ts index b94ed2d1..7c86dfb7 100644 --- a/server/src/workflow-management/selector.ts +++ b/server/src/workflow-management/selector.ts @@ -721,6 +721,59 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => { return null; }; + +/** + * Returns the best non-unique css {@link Selectors} for the element on the page. + * Internally uses a finder function from https://github.com/antonmedv/finder/blob/master/finder.ts + * available as a npm package: @medv/finder + * + * The finder needs to be executed and defined inside a browser context. Meaning, + * the code needs to be available inside a page evaluate function. + * @param page The page instance. + * @param coordinates Coordinates of an element. + * @category WorkflowManagement-Selectors + * @returns {Promise} + */ +export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates) => { + try { + const selectors: any = await page.evaluate(async ({ x, y }) => { + // version @medv/finder + // https://github.com/antonmedv/finder/blob/master/finder.ts + + const genNonUniqueSelectors = (element: HTMLElement | null) => { + if (element == null) { + return null; + } + + const tagName = element.tagName.toLowerCase(); + const classNames = Array.from(element.classList).map(cls => `.${cls}`).join(''); + + const nonUniqueSelector = `${tagName}${classNames}`; + + return { + nonUniqueSelector, + text: element.innerText, + }; + }; + + const hoveredElement = document.elementFromPoint(x, y) as HTMLElement; + if (hoveredElement != null && !hoveredElement.closest('#overlay-controls')) { + const { parentElement } = hoveredElement; + const element = parentElement?.tagName === 'A' ? parentElement : hoveredElement; + const generatedSelectors = genNonUniqueSelectors(element); + return generatedSelectors; + } + }, { x: coordinates.x, y: coordinates.y }); + return selectors; + } catch (e) { + const { message, stack } = e as Error; + console.error('Error while retrieving element:', message); + console.error('Stack:', stack); + } + return null; +}; + + /** * Returns the first pair from the given workflow that contains the given selector * inside the where condition, and it is the only selector there. From ffa992f1862bf6df095715c728b10e16989b8375 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Tue, 6 Aug 2024 23:15:12 +0530 Subject: [PATCH 004/172] feat: pass scrapeList to ensureScriptsLoaded --- maxun-core/src/interpret.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 4860d2fd..d624f930 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -29,6 +29,7 @@ interface InterpreterOptions { }> } + /** * Class for running the Smart Workflows. */ @@ -410,7 +411,7 @@ export default class Interpreter extends EventEmitter { } private async ensureScriptsLoaded(page: Page) { - const isScriptLoaded = await page.evaluate(() => typeof window.scrape === 'function' && typeof window.scrapeSchema === 'function'); + const isScriptLoaded = await page.evaluate(() => typeof window.scrape === 'function' && typeof window.scrapeSchema === 'function' && typeof window.scrapeList === 'function'); if (!isScriptLoaded) { await page.addInitScript({ path: path.join(__dirname, 'browserSide', 'scraper.js') }); } From a3d699b7761c70faf2935c1e96f78796c0d65358 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Tue, 6 Aug 2024 23:18:58 +0530 Subject: [PATCH 005/172] feat: add scrapeList in CustomFunctions --- maxun-core/src/types/workflow.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/maxun-core/src/types/workflow.ts b/maxun-core/src/types/workflow.ts index 36c6d14d..459cfa88 100644 --- a/maxun-core/src/types/workflow.ts +++ b/maxun-core/src/types/workflow.ts @@ -28,7 +28,7 @@ type MethodNames = { [K in keyof T]: T[K] extends Function ? K : never; }[keyof T]; -export type CustomFunctions = 'scrape' | 'scrapeSchema' | 'scroll' | 'screenshot' | 'script' | 'enqueueLinks' | 'flag'; +export type CustomFunctions = 'scrape' | 'scrapeSchema' | 'scroll' | 'screenshot' | 'script' | 'enqueueLinks' | 'flag' | 'scrapeList'; export type What = { action: MethodNames | CustomFunctions, From 20810aa85b28a4153923bb5eac8c0e0e811fe899 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Tue, 6 Aug 2024 23:20:10 +0530 Subject: [PATCH 006/172] feat: handle scrapeList in WAW actions --- maxun-core/src/interpret.ts | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index d624f930..506c7bd7 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -291,6 +291,12 @@ export default class Interpreter extends EventEmitter { await this.options.serializableCallback(scrapeResult); }, + scrapeList: async (config: { listSelector: string, fields: any, limit?: number, flexible?: boolean }) => { + await this.ensureScriptsLoaded(page); + const scrapeResults: Record[] = await page.evaluate((cfg) => window.scrapeList(cfg), config); + await this.options.serializableCallback(scrapeResults); + }, + scroll: async (pages?: number) => { await page.evaluate(async (pagesInternal) => { for (let i = 1; i <= (pagesInternal ?? 1); i += 1) { From 7a14fe2c1edc4bbc149a22aeef4986908fa957f0 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 7 Aug 2024 00:28:45 +0530 Subject: [PATCH 007/172] feat: WORKING getNonUniqueSelectors --- server/src/workflow-management/selector.ts | 96 ++++++++++++---------- 1 file changed, 51 insertions(+), 45 deletions(-) diff --git a/server/src/workflow-management/selector.ts b/server/src/workflow-management/selector.ts index 7c86dfb7..cfe7741d 100644 --- a/server/src/workflow-management/selector.ts +++ b/server/src/workflow-management/selector.ts @@ -98,19 +98,19 @@ export const getElementInformation = async ( { x: coordinates.x, y: coordinates.y }, ); - if (elementInfo) { - if (elementInfo.tagName === 'A') { - if (elementInfo.innerText) { - console.log(`Link text: ${elementInfo.innerText}, URL: ${elementInfo.url}`); - } else { - console.log(`URL: ${elementInfo.url}`); - } - } else if (elementInfo.tagName === 'IMG') { - console.log(`Image URL: ${elementInfo.imageUrl}`); - } else { - console.log(`Element innerText: ${elementInfo.innerText}`); - } - } + // if (elementInfo) { + // if (elementInfo.tagName === 'A') { + // if (elementInfo.innerText) { + // console.log(`Link text: ${elementInfo.innerText}, URL: ${elementInfo.url}`); + // } else { + // console.log(`URL: ${elementInfo.url}`); + // } + // } else if (elementInfo.tagName === 'IMG') { + // console.log(`Image URL: ${elementInfo.imageUrl}`); + // } else { + // console.log(`Element innerText: ${elementInfo.innerText}`); + // } + // } return elementInfo; } catch (error) { @@ -734,46 +734,52 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => { * @category WorkflowManagement-Selectors * @returns {Promise} */ + export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates) => { try { - const selectors: any = await page.evaluate(async ({ x, y }) => { - // version @medv/finder - // https://github.com/antonmedv/finder/blob/master/finder.ts - - const genNonUniqueSelectors = (element: HTMLElement | null) => { - if (element == null) { - return null; + const selectors = await page.evaluate(({ x, y }) => { + function getSelector(element: Element): string { + let selector = element.tagName.toLowerCase(); + if (element.className) { + const classes = element.className.split(/\s+/).filter(Boolean); + if (classes.length > 0) { + selector += '.' + classes.join('.'); + } } - - const tagName = element.tagName.toLowerCase(); - const classNames = Array.from(element.classList).map(cls => `.${cls}`).join(''); - - const nonUniqueSelector = `${tagName}${classNames}`; - - return { - nonUniqueSelector, - text: element.innerText, - }; - }; - - const hoveredElement = document.elementFromPoint(x, y) as HTMLElement; - if (hoveredElement != null && !hoveredElement.closest('#overlay-controls')) { - const { parentElement } = hoveredElement; - const element = parentElement?.tagName === 'A' ? parentElement : hoveredElement; - const generatedSelectors = genNonUniqueSelectors(element); - return generatedSelectors; + return selector; } - }, { x: coordinates.x, y: coordinates.y }); - return selectors; - } catch (e) { - const { message, stack } = e as Error; - console.error('Error while retrieving element:', message); - console.error('Stack:', stack); + + function getSelectorPath(element: Element): string { + const path = []; + while (element && element !== document.body) { + path.unshift(getSelector(element)); + element = element.parentElement!; + } + return path.join(' > '); + } + + const element = document.elementFromPoint(x, y) as Element; + if (!element) return null; + + const generalSelector = getSelectorPath(element); + return { + generalSelector, + tagName: element.tagName.toLowerCase(), + className: element.className, + text: element.textContent?.trim() || '', + href: (element as HTMLAnchorElement).href || undefined, + }; + }, coordinates); + + return selectors || {}; + } catch (error) { + console.error('Error in getNonUniqueSelectors:', error); + return {}; } - return null; }; + /** * Returns the first pair from the given workflow that contains the given selector * inside the where condition, and it is the only selector there. From 12a62df9723438e07e0a67e7781bbfbbad932a16 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 7 Aug 2024 05:19:35 +0530 Subject: [PATCH 008/172] docs: getNonUniqueSelectors --- server/src/workflow-management/selector.ts | 5 ----- 1 file changed, 5 deletions(-) diff --git a/server/src/workflow-management/selector.ts b/server/src/workflow-management/selector.ts index cfe7741d..418d42c1 100644 --- a/server/src/workflow-management/selector.ts +++ b/server/src/workflow-management/selector.ts @@ -724,11 +724,6 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => { /** * Returns the best non-unique css {@link Selectors} for the element on the page. - * Internally uses a finder function from https://github.com/antonmedv/finder/blob/master/finder.ts - * available as a npm package: @medv/finder - * - * The finder needs to be executed and defined inside a browser context. Meaning, - * the code needs to be available inside a page evaluate function. * @param page The page instance. * @param coordinates Coordinates of an element. * @category WorkflowManagement-Selectors From 6cf7aa2ace6f650bfa0bd7b6dd543a24ebc4c089 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 7 Aug 2024 05:25:00 +0530 Subject: [PATCH 009/172] feat: remove text & href --- server/src/workflow-management/selector.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/server/src/workflow-management/selector.ts b/server/src/workflow-management/selector.ts index 418d42c1..4ae13af0 100644 --- a/server/src/workflow-management/selector.ts +++ b/server/src/workflow-management/selector.ts @@ -761,8 +761,6 @@ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates generalSelector, tagName: element.tagName.toLowerCase(), className: element.className, - text: element.textContent?.trim() || '', - href: (element as HTMLAnchorElement).href || undefined, }; }, coordinates); From e453b45ac966f71442c7f24823c41fa711be8294 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 7 Aug 2024 05:25:28 +0530 Subject: [PATCH 010/172] feat: remove tag & class --- server/src/workflow-management/selector.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/server/src/workflow-management/selector.ts b/server/src/workflow-management/selector.ts index 4ae13af0..635a6318 100644 --- a/server/src/workflow-management/selector.ts +++ b/server/src/workflow-management/selector.ts @@ -759,8 +759,6 @@ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates const generalSelector = getSelectorPath(element); return { generalSelector, - tagName: element.tagName.toLowerCase(), - className: element.className, }; }, coordinates); From 09e2fd19a0cc276ef1e0ff160044fc2e70d38dfd Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 7 Aug 2024 05:26:10 +0530 Subject: [PATCH 011/172] feat: return generalSelector --- server/src/workflow-management/selector.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/server/src/workflow-management/selector.ts b/server/src/workflow-management/selector.ts index 635a6318..80af9d73 100644 --- a/server/src/workflow-management/selector.ts +++ b/server/src/workflow-management/selector.ts @@ -757,9 +757,9 @@ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates if (!element) return null; const generalSelector = getSelectorPath(element); - return { - generalSelector, - }; + + return generalSelector; + }, coordinates); return selectors || {}; From b185686ff96383e59955d9c421ccae41150b7a01 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 7 Aug 2024 05:31:37 +0530 Subject: [PATCH 012/172] fix: pass generalSelector in object --- server/src/workflow-management/selector.ts | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/server/src/workflow-management/selector.ts b/server/src/workflow-management/selector.ts index 80af9d73..6ab0a8fb 100644 --- a/server/src/workflow-management/selector.ts +++ b/server/src/workflow-management/selector.ts @@ -757,9 +757,10 @@ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates if (!element) return null; const generalSelector = getSelectorPath(element); - - return generalSelector; - + return { + generalSelector, + }; + }, coordinates); return selectors || {}; From 3111757d82df1b2924d61c03a526e23b027bb4a6 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 7 Aug 2024 05:32:00 +0530 Subject: [PATCH 013/172] chore: lint --- server/src/workflow-management/selector.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/workflow-management/selector.ts b/server/src/workflow-management/selector.ts index 6ab0a8fb..e73e9629 100644 --- a/server/src/workflow-management/selector.ts +++ b/server/src/workflow-management/selector.ts @@ -111,7 +111,7 @@ export const getElementInformation = async ( // console.log(`Element innerText: ${elementInfo.innerText}`); // } // } - + return elementInfo; } catch (error) { const { message, stack } = error as Error; @@ -760,7 +760,7 @@ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates return { generalSelector, }; - + }, coordinates); return selectors || {}; From c053f968476f15762531e9da2111b94ae47e0b9f Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 7 Aug 2024 05:32:16 +0530 Subject: [PATCH 014/172] chore: -rm whitespace --- server/src/workflow-management/selector.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/server/src/workflow-management/selector.ts b/server/src/workflow-management/selector.ts index e73e9629..06915466 100644 --- a/server/src/workflow-management/selector.ts +++ b/server/src/workflow-management/selector.ts @@ -770,8 +770,6 @@ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates } }; - - /** * Returns the first pair from the given workflow that contains the given selector * inside the where condition, and it is the only selector there. From 6664ca421cbb004d84b0dc48816921d41c341165 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 7 Aug 2024 05:33:33 +0530 Subject: [PATCH 015/172] fix: pass scrapeList instead of scrapeLists --- src/shared/types.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shared/types.ts b/src/shared/types.ts index a0e44a96..686eaf20 100644 --- a/src/shared/types.ts +++ b/src/shared/types.ts @@ -23,4 +23,4 @@ export interface ScreenshotSettings { type?: "jpeg" | "png"; }; -export declare type CustomActions = 'scrape' | 'scrapeSchema' | 'scroll' | 'screenshot' | 'script' | 'enqueueLinks' | 'flag' | 'scrapeLists'; +export declare type CustomActions = 'scrape' | 'scrapeSchema' | 'scroll' | 'screenshot' | 'script' | 'enqueueLinks' | 'flag' | 'scrapeList'; From e113799029de7a11e7ada6a9f4e02e58e719646d Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 7 Aug 2024 05:57:55 +0530 Subject: [PATCH 016/172] feat: add just text & capture list buttons --- src/components/organisms/RightSidePanel.tsx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/components/organisms/RightSidePanel.tsx b/src/components/organisms/RightSidePanel.tsx index 514db44f..1aa54da8 100644 --- a/src/components/organisms/RightSidePanel.tsx +++ b/src/components/organisms/RightSidePanel.tsx @@ -104,6 +104,8 @@ export const RightSidePanel = () => { {!getText && !getScreenshot && } {getText && <> + + From a7e4200677e41c7598b8c8e68cbed3ecb6d2b149 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 7 Aug 2024 17:55:36 +0530 Subject: [PATCH 017/172] feat: less brittle non unique selectors --- server/src/workflow-management/selector.ts | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/server/src/workflow-management/selector.ts b/server/src/workflow-management/selector.ts index 06915466..eccd9b23 100644 --- a/server/src/workflow-management/selector.ts +++ b/server/src/workflow-management/selector.ts @@ -730,37 +730,41 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => { * @returns {Promise} */ -export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates) => { +export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates) => { try { const selectors = await page.evaluate(({ x, y }) => { - function getSelector(element: Element): string { + function getSelector(element: any) { let selector = element.tagName.toLowerCase(); + + // Capture a single, relevant class if present if (element.className) { const classes = element.className.split(/\s+/).filter(Boolean); if (classes.length > 0) { - selector += '.' + classes.join('.'); + // Use only the first class to avoid over-specificity + selector += '.' + classes[0]; } } + return selector; } - function getSelectorPath(element: Element): string { + function getSelectorPath(element: any) { const path = []; while (element && element !== document.body) { - path.unshift(getSelector(element)); - element = element.parentElement!; + const selector = getSelector(element); + path.unshift(selector); + element = element.parentElement; } return path.join(' > '); } - const element = document.elementFromPoint(x, y) as Element; + const element = document.elementFromPoint(x, y); if (!element) return null; const generalSelector = getSelectorPath(element); return { generalSelector, }; - }, coordinates); return selectors || {}; @@ -770,6 +774,7 @@ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates } }; + /** * Returns the first pair from the given workflow that contains the given selector * inside the where condition, and it is the only selector there. From 3bd8a54258e66601e76ce39a78a6660dbf413988 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 7 Aug 2024 17:58:47 +0530 Subject: [PATCH 018/172] chore: remove comments --- maxun-core/src/browserSide/scraper.js | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index 6942bd2a..5de56ae6 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -263,17 +263,14 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, window.scrapeList = function(config) { const { listSelector, fields, limit, flexible = false } = config; - // Get all lists const lists = Array.from(document.querySelectorAll(listSelector)); return lists.map(list => { - // Get all list items within this list const listItems = Array.from(list.children); - // Apply limit if specified const itemsToScrape = limit ? listItems.slice(0, limit) : listItems; - // Scrape each item + // scrape each item return itemsToScrape.map(item => { const scrapedItem = {}; @@ -281,7 +278,7 @@ window.scrapeList = function(config) { let element; if (flexible) { - // Try multiple strategies to find the element + // try multiple strategies to find the element element = item.querySelector(fieldConfig.selector) || item.querySelector(`[class*="${fieldConfig.selector}"]`) || Array.from(item.querySelectorAll('*')) From 202ecf977b95ca9bfd3f3146ad5b20f030c43ace Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 7 Aug 2024 18:19:48 +0530 Subject: [PATCH 019/172] chore: lint --- maxun-core/src/browserSide/scraper.js | 98 +++++++++++++-------------- 1 file changed, 49 insertions(+), 49 deletions(-) diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index 5de56ae6..8528f529 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -260,57 +260,57 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, * @param {boolean} [config.flexible=false] - Whether to use flexible matching for field selectors * @returns {Array.>} Array of arrays of scraped items, one sub-array per list */ -window.scrapeList = function(config) { - const { listSelector, fields, limit, flexible = false } = config; - - const lists = Array.from(document.querySelectorAll(listSelector)); - - return lists.map(list => { - const listItems = Array.from(list.children); - - const itemsToScrape = limit ? listItems.slice(0, limit) : listItems; - - // scrape each item - return itemsToScrape.map(item => { - const scrapedItem = {}; - - for (const [fieldName, fieldConfig] of Object.entries(fields)) { - let element; - - if (flexible) { - // try multiple strategies to find the element - element = item.querySelector(fieldConfig.selector) || - item.querySelector(`[class*="${fieldConfig.selector}"]`) || - Array.from(item.querySelectorAll('*')) - .find(el => el.textContent.trim() === fieldConfig.selector); - } else { - element = item.querySelector(fieldConfig.selector); - } - - if (element) { - switch (fieldConfig.attribute) { - case 'href': - scrapedItem[fieldName] = element.getAttribute('href'); - break; - case 'src': - scrapedItem[fieldName] = element.getAttribute('src'); - break; - case 'textContent': - scrapedItem[fieldName] = element.textContent.trim(); - break; - case 'innerText': - default: - scrapedItem[fieldName] = element.innerText.trim(); - break; + window.scrapeList = function (config) { + const { listSelector, fields, limit, flexible = false } = config; + + const lists = Array.from(document.querySelectorAll(listSelector)); + + return lists.map(list => { + const listItems = Array.from(list.children); + + const itemsToScrape = limit ? listItems.slice(0, limit) : listItems; + + // scrape each item + return itemsToScrape.map(item => { + const scrapedItem = {}; + + for (const [fieldName, fieldConfig] of Object.entries(fields)) { + let element; + + if (flexible) { + // try multiple strategies to find the element + element = item.querySelector(fieldConfig.selector) || + item.querySelector(`[class*="${fieldConfig.selector}"]`) || + Array.from(item.querySelectorAll('*')) + .find(el => el.textContent.trim() === fieldConfig.selector); + } else { + element = item.querySelector(fieldConfig.selector); + } + + if (element) { + switch (fieldConfig.attribute) { + case 'href': + scrapedItem[fieldName] = element.getAttribute('href'); + break; + case 'src': + scrapedItem[fieldName] = element.getAttribute('src'); + break; + case 'textContent': + scrapedItem[fieldName] = element.textContent.trim(); + break; + case 'innerText': + default: + scrapedItem[fieldName] = element.innerText.trim(); + break; + } + } else { + scrapedItem[fieldName] = null; } - } else { - scrapedItem[fieldName] = null; } - } - - return scrapedItem; + + return scrapedItem; + }); }); - }); -}; + }; })(window); \ No newline at end of file From b9ff4b5facb847025a6abcd98b7a695fbea9ddcf Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Thu, 8 Aug 2024 00:12:31 +0530 Subject: [PATCH 020/172] fix: revert to capture text & screenshot --- src/components/organisms/RightSidePanel.tsx | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/components/organisms/RightSidePanel.tsx b/src/components/organisms/RightSidePanel.tsx index 1aa54da8..514db44f 100644 --- a/src/components/organisms/RightSidePanel.tsx +++ b/src/components/organisms/RightSidePanel.tsx @@ -104,8 +104,6 @@ export const RightSidePanel = () => { {!getText && !getScreenshot && } {getText && <> - - From 1cea5a7ac65d415c89d8cebd774639e3046a38e8 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Thu, 8 Aug 2024 00:17:47 +0530 Subject: [PATCH 021/172] feat: getList action --- src/context/browserActions.tsx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/context/browserActions.tsx b/src/context/browserActions.tsx index 50d86777..e91c1b12 100644 --- a/src/context/browserActions.tsx +++ b/src/context/browserActions.tsx @@ -2,6 +2,7 @@ import React, { createContext, useContext, useState, ReactNode } from 'react'; interface ActionContextProps { getText: boolean; + getList: boolean; getScreenshot: boolean; startGetText: () => void; stopGetText: () => void; @@ -13,6 +14,7 @@ const ActionContext = createContext(undefined); export const ActionProvider = ({ children }: { children: ReactNode }) => { const [getText, setGetText] = useState(false); + const [getList, setGetList] = useState(false); const [getScreenshot, setGetScreenshot] = useState(false); const startGetText = () => setGetText(true); @@ -22,7 +24,7 @@ export const ActionProvider = ({ children }: { children: ReactNode }) => { const stopGetScreenshot = () => setGetScreenshot(false); return ( - + {children} ); From 5caeefc83ef61b064c388416061d4c9192e954ed Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Thu, 8 Aug 2024 00:24:07 +0530 Subject: [PATCH 022/172] feat: startGetList & stopGetList --- src/context/browserActions.tsx | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/context/browserActions.tsx b/src/context/browserActions.tsx index e91c1b12..b05d8220 100644 --- a/src/context/browserActions.tsx +++ b/src/context/browserActions.tsx @@ -6,6 +6,8 @@ interface ActionContextProps { getScreenshot: boolean; startGetText: () => void; stopGetText: () => void; + startGetList: () => void; + stopGetList: () => void; startGetScreenshot: () => void; stopGetScreenshot: () => void; } @@ -20,11 +22,14 @@ export const ActionProvider = ({ children }: { children: ReactNode }) => { const startGetText = () => setGetText(true); const stopGetText = () => setGetText(false); + const startGetList = () => setGetList(true); + const stopGetList = () => setGetList(false); + const startGetScreenshot = () => setGetScreenshot(true); const stopGetScreenshot = () => setGetScreenshot(false); return ( - + {children} ); From d7a3f23ab2075ff86270575c831404a880a80b86 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Thu, 8 Aug 2024 00:26:32 +0530 Subject: [PATCH 023/172] chore: lint --- src/context/browserActions.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/context/browserActions.tsx b/src/context/browserActions.tsx index b05d8220..5a7c12b5 100644 --- a/src/context/browserActions.tsx +++ b/src/context/browserActions.tsx @@ -41,4 +41,4 @@ export const useActionContext = () => { throw new Error('useActionContext must be used within an ActionProvider'); } return context; -}; \ No newline at end of file +}; From 64b94d0db277da66ae46b03b2c135631dec4b825 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Thu, 8 Aug 2024 00:35:37 +0530 Subject: [PATCH 024/172] feat: getList --- src/components/organisms/BrowserWindow.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/organisms/BrowserWindow.tsx b/src/components/organisms/BrowserWindow.tsx index fd1589da..afd16c92 100644 --- a/src/components/organisms/BrowserWindow.tsx +++ b/src/components/organisms/BrowserWindow.tsx @@ -47,7 +47,7 @@ export const BrowserWindow = () => { const { socket } = useSocketStore(); const { width, height } = useBrowserDimensionsStore(); - const { getText } = useActionContext(); + const { getText, getList } = useActionContext(); const { addTextStep } = useBrowserSteps(); const onMouseMove = (e: MouseEvent) => { From b13a92a24b56a8886686eda5f975d4fb82e91c95 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Thu, 8 Aug 2024 00:39:28 +0530 Subject: [PATCH 025/172] feat: use getList --- src/components/organisms/BrowserWindow.tsx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/components/organisms/BrowserWindow.tsx b/src/components/organisms/BrowserWindow.tsx index afd16c92..08cae087 100644 --- a/src/components/organisms/BrowserWindow.tsx +++ b/src/components/organisms/BrowserWindow.tsx @@ -111,7 +111,7 @@ export const BrowserWindow = () => { clickY >= highlightRect.top && clickY <= highlightRect.bottom ) { - if (getText === true) { + if (getText === true || getList === true) { const options = getAttributeOptions(highlighterData.elementInfo?.tagName || ''); if (options.length > 1) { setAttributeOptions(options); @@ -146,7 +146,7 @@ export const BrowserWindow = () => { data = selectedElement.info?.innerText || ''; } { - if (getText === true) { + if (getText === true || getList === true) { addTextStep('', data, { selector: selectedElement.selector, tag: selectedElement.info?.tagName, @@ -161,7 +161,7 @@ export const BrowserWindow = () => { return (
{ - getText === true ? ( + getText === true || getList === true ? ( { }} @@ -179,7 +179,7 @@ export const BrowserWindow = () => { ) : null } - {(getText === true && !showAttributeModal && highlighterData?.rect != null && highlighterData?.rect.top != null) && canvasRef?.current ? + {((getText === true || getList === true) && !showAttributeModal && highlighterData?.rect != null && highlighterData?.rect.top != null) && canvasRef?.current ? Date: Thu, 8 Aug 2024 00:40:08 +0530 Subject: [PATCH 026/172] fix: -rm getScreenshot --- src/components/atoms/canvas.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/atoms/canvas.tsx b/src/components/atoms/canvas.tsx index f6e6fb1c..e9ca1c80 100644 --- a/src/components/atoms/canvas.tsx +++ b/src/components/atoms/canvas.tsx @@ -27,7 +27,7 @@ const Canvas = ({ width, height, onCreateRef }: CanvasProps) => { const canvasRef = useRef(null); const { socket } = useSocketStore(); const { setLastAction, lastAction } = useGlobalInfoStore(); - const { getText, getScreenshot } = useActionContext(); + const { getText } = useActionContext(); const getTextRef = useRef(getText); const notifyLastAction = (action: string) => { From 8484e647700220085f8e246eeb2dbc06cc0c62c0 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Thu, 8 Aug 2024 00:41:32 +0530 Subject: [PATCH 027/172] feat: getList & getList ref --- src/components/atoms/canvas.tsx | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/components/atoms/canvas.tsx b/src/components/atoms/canvas.tsx index e9ca1c80..cbef98c2 100644 --- a/src/components/atoms/canvas.tsx +++ b/src/components/atoms/canvas.tsx @@ -27,8 +27,9 @@ const Canvas = ({ width, height, onCreateRef }: CanvasProps) => { const canvasRef = useRef(null); const { socket } = useSocketStore(); const { setLastAction, lastAction } = useGlobalInfoStore(); - const { getText } = useActionContext(); + const { getText, getList } = useActionContext(); const getTextRef = useRef(getText); + const getListRef = useRef(getList); const notifyLastAction = (action: string) => { if (lastAction !== action) { @@ -40,7 +41,8 @@ const Canvas = ({ width, height, onCreateRef }: CanvasProps) => { useEffect(() => { getTextRef.current = getText; - }, [getText]); + getListRef.current = getList; + }, [getText, getList]); const onMouseEvent = useCallback((event: MouseEvent) => { if (socket) { From cf3bf0ab2933e09646cc8570e672da51ba2cfa5c Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Thu, 8 Aug 2024 00:42:39 +0530 Subject: [PATCH 028/172] feat: check getListRef.current for mousedown --- src/components/atoms/canvas.tsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/components/atoms/canvas.tsx b/src/components/atoms/canvas.tsx index cbef98c2..cd0e2323 100644 --- a/src/components/atoms/canvas.tsx +++ b/src/components/atoms/canvas.tsx @@ -53,8 +53,8 @@ const Canvas = ({ width, height, onCreateRef }: CanvasProps) => { switch (event.type) { case 'mousedown': const clickCoordinates = getMappedCoordinates(event, canvasRef.current, width, height); - if (getTextRef.current === true) { - console.log('get text') + if (getTextRef.current === true || getListRef.current === true) { + console.log('get text or get list is true'); } else { socket.emit('input:mousedown', clickCoordinates); } From 8d0b24181f62e25b8949ea9cc8722443216e80b3 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Thu, 8 Aug 2024 00:43:20 +0530 Subject: [PATCH 029/172] chore: add todo --- src/components/atoms/canvas.tsx | 1 + 1 file changed, 1 insertion(+) diff --git a/src/components/atoms/canvas.tsx b/src/components/atoms/canvas.tsx index cd0e2323..3e9d89c0 100644 --- a/src/components/atoms/canvas.tsx +++ b/src/components/atoms/canvas.tsx @@ -54,6 +54,7 @@ const Canvas = ({ width, height, onCreateRef }: CanvasProps) => { case 'mousedown': const clickCoordinates = getMappedCoordinates(event, canvasRef.current, width, height); if (getTextRef.current === true || getListRef.current === true) { + // todo: remove console.log and return console.log('get text or get list is true'); } else { socket.emit('input:mousedown', clickCoordinates); From 0ba4a2be34fa54e15b0173f2759aa4e3e9787f3b Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Thu, 8 Aug 2024 00:44:29 +0530 Subject: [PATCH 030/172] feat: get list related actions from action context --- src/components/organisms/RightSidePanel.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/organisms/RightSidePanel.tsx b/src/components/organisms/RightSidePanel.tsx index 514db44f..77bc7881 100644 --- a/src/components/organisms/RightSidePanel.tsx +++ b/src/components/organisms/RightSidePanel.tsx @@ -20,7 +20,7 @@ export const RightSidePanel = () => { const [confirmedTextSteps, setConfirmedTextSteps] = useState<{ [id: number]: boolean }>({}); const { lastAction, notify } = useGlobalInfoStore(); - const { getText, startGetText, stopGetText, getScreenshot, startGetScreenshot, stopGetScreenshot } = useActionContext(); + const { getText, startGetText, stopGetText, getScreenshot, startGetScreenshot, stopGetScreenshot, getList, startGetList, stopGetList } = useActionContext(); const { browserSteps, updateBrowserTextStepLabel, deleteBrowserStep, addScreenshotStep } = useBrowserSteps(); const { socket } = useSocketStore(); From 1d13956da8f02156fa6c3095105c699961ff2e9e Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Thu, 8 Aug 2024 02:14:39 +0530 Subject: [PATCH 031/172] feat: pass !getList for conditional rendering --- src/components/organisms/RightSidePanel.tsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/components/organisms/RightSidePanel.tsx b/src/components/organisms/RightSidePanel.tsx index 77bc7881..1d097ef0 100644 --- a/src/components/organisms/RightSidePanel.tsx +++ b/src/components/organisms/RightSidePanel.tsx @@ -101,7 +101,7 @@ export const RightSidePanel = () => { - {!getText && !getScreenshot && } + {!getText && !getScreenshot && !getList && } {getText && <> @@ -111,7 +111,7 @@ export const RightSidePanel = () => { } - {!getText && !getScreenshot && } + {!getText && !getScreenshot && !getList && } {getScreenshot && ( From 79861d6e32f15c820e3d3d860de5dac77f163820 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Thu, 8 Aug 2024 02:18:39 +0530 Subject: [PATCH 032/172] feat: capture list button --- src/components/organisms/RightSidePanel.tsx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/components/organisms/RightSidePanel.tsx b/src/components/organisms/RightSidePanel.tsx index 1d097ef0..7a1a93ff 100644 --- a/src/components/organisms/RightSidePanel.tsx +++ b/src/components/organisms/RightSidePanel.tsx @@ -101,6 +101,8 @@ export const RightSidePanel = () => { + {!getText && !getScreenshot && !getList && } + {!getText && !getScreenshot && !getList && } {getText && <> From 384ca11a4de587d907346660f14cdad765d5bf4c Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Thu, 8 Aug 2024 02:19:38 +0530 Subject: [PATCH 033/172] feat: capture list confirm discard --- src/components/organisms/RightSidePanel.tsx | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/src/components/organisms/RightSidePanel.tsx b/src/components/organisms/RightSidePanel.tsx index 7a1a93ff..d4cef3e7 100644 --- a/src/components/organisms/RightSidePanel.tsx +++ b/src/components/organisms/RightSidePanel.tsx @@ -102,6 +102,14 @@ export const RightSidePanel = () => { {!getText && !getScreenshot && !getList && } + {getList && + <> + + + + + + } {!getText && !getScreenshot && !getList && } {getText && From f1d87f3b91434cdd402bc92f2627fefa48b06697 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Thu, 8 Aug 2024 02:20:05 +0530 Subject: [PATCH 034/172] fix: add missing {} --- src/components/organisms/RightSidePanel.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/organisms/RightSidePanel.tsx b/src/components/organisms/RightSidePanel.tsx index d4cef3e7..5149a85c 100644 --- a/src/components/organisms/RightSidePanel.tsx +++ b/src/components/organisms/RightSidePanel.tsx @@ -105,7 +105,7 @@ export const RightSidePanel = () => { {getList && <> - + From 56bd00e6c3a893e465cbc33a69b2cd77156591f9 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Thu, 8 Aug 2024 03:12:39 +0530 Subject: [PATCH 035/172] feat: red outline instead of pink --- src/components/atoms/Highlighter.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/atoms/Highlighter.tsx b/src/components/atoms/Highlighter.tsx index 34fb8f47..973f9103 100644 --- a/src/components/atoms/Highlighter.tsx +++ b/src/components/atoms/Highlighter.tsx @@ -54,7 +54,7 @@ const HighlighterOutline = styled.div` pointer-events: none !important; position: fixed !important; background: #ff5d5b26 !important; - outline: 4px solid pink !important; + outline: 4px solid red !important; //border: 4px solid #ff5d5b !important; z-index: 2147483647 !important; //border-radius: 5px; From 63dbeffd2ef6891277a15d4fbeb7218cd7d0617e Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Thu, 8 Aug 2024 03:13:14 +0530 Subject: [PATCH 036/172] chore: remove console logs --- src/components/atoms/Highlighter.tsx | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/components/atoms/Highlighter.tsx b/src/components/atoms/Highlighter.tsx index 973f9103..66d09327 100644 --- a/src/components/atoms/Highlighter.tsx +++ b/src/components/atoms/Highlighter.tsx @@ -24,10 +24,6 @@ export const Highlighter = ({ unmodifiedRect, displayedSelector = '', width, hei }; - //console.log('unmodifiedRect:', unmodifiedRect) - //console.log('rectangle:', rect) - //console.log('canvas rectangle:', canvasRect) - return (
Date: Thu, 8 Aug 2024 04:42:52 +0530 Subject: [PATCH 037/172] feat: use selectors as per getList value --- server/src/workflow-management/classes/Generator.ts | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/server/src/workflow-management/classes/Generator.ts b/server/src/workflow-management/classes/Generator.ts index ae2d2016..c629d3bb 100644 --- a/server/src/workflow-management/classes/Generator.ts +++ b/server/src/workflow-management/classes/Generator.ts @@ -7,6 +7,7 @@ import { getElementInformation, getRect, getSelectors, + getNonUniqueSelectors, isRuleOvershadowing, selectorAlreadyInWorkflow } from "../selector"; @@ -457,15 +458,22 @@ export class WorkflowGenerator { * @private * @returns {Promise} */ - private generateSelector = async (page: Page, coordinates: Coordinates, action: ActionType) => { + private generateSelector = async (page: Page, coordinates: Coordinates, action: ActionType, getList: boolean) => { const elementInfo = await getElementInformation(page, coordinates); + const selectorInfo = await getNonUniqueSelectors(page, coordinates); + + const selectorBasedOnCustomAction = (getList === true) + ? await getNonUniqueSelectors(page, coordinates) + : await getSelectors(page, coordinates); + + console.log('Selector Info:', selectorInfo); const bestSelector = getBestSelectorForAction( { type: action, tagName: elementInfo?.tagName as TagName || '', inputType: undefined, value: undefined, - selectors: await getSelectors(page, coordinates) || {}, + selectors: selectorBasedOnCustomAction || {}, timestamp: 0, isPassword: false, hasOnlyText: elementInfo?.hasOnlyText || false, From c2285312c1d10c3fbc23c2275a45b8ea32ff8bca Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Thu, 8 Aug 2024 04:43:26 +0530 Subject: [PATCH 038/172] fix: remove selectorInfo --- server/src/workflow-management/classes/Generator.ts | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/server/src/workflow-management/classes/Generator.ts b/server/src/workflow-management/classes/Generator.ts index c629d3bb..08fdcc22 100644 --- a/server/src/workflow-management/classes/Generator.ts +++ b/server/src/workflow-management/classes/Generator.ts @@ -460,13 +460,11 @@ export class WorkflowGenerator { */ private generateSelector = async (page: Page, coordinates: Coordinates, action: ActionType, getList: boolean) => { const elementInfo = await getElementInformation(page, coordinates); - const selectorInfo = await getNonUniqueSelectors(page, coordinates); - + const selectorBasedOnCustomAction = (getList === true) ? await getNonUniqueSelectors(page, coordinates) : await getSelectors(page, coordinates); - console.log('Selector Info:', selectorInfo); const bestSelector = getBestSelectorForAction( { type: action, From 9d53d033fbfdc912d85edf07bfdb9282640beb01 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Thu, 8 Aug 2024 05:27:50 +0530 Subject: [PATCH 039/172] feat: socket listener for getList --- .../workflow-management/classes/Generator.ts | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/server/src/workflow-management/classes/Generator.ts b/server/src/workflow-management/classes/Generator.ts index 08fdcc22..5bae460b 100644 --- a/server/src/workflow-management/classes/Generator.ts +++ b/server/src/workflow-management/classes/Generator.ts @@ -47,6 +47,8 @@ export class WorkflowGenerator { */ private socket: Socket; + private getList: boolean = false; + /** * The public constructor of the WorkflowGenerator. * Takes socket for communication as a parameter and registers some important events on it. @@ -56,6 +58,7 @@ export class WorkflowGenerator { public constructor(socket: Socket) { this.socket = socket; this.registerEventHandlers(socket); + this.initializeSocketListeners(); } /** @@ -89,6 +92,13 @@ export class WorkflowGenerator { lastAction: '', } + private initializeSocketListeners() { + this.socket.on('setGetList', (data: { getList: boolean }) => { + this.getList = data.getList; + }); + } + + /** * Registers the event handlers for all generator-related events on the socket. * @param socket The socket used to communicate with the client. @@ -458,13 +468,12 @@ export class WorkflowGenerator { * @private * @returns {Promise} */ - private generateSelector = async (page: Page, coordinates: Coordinates, action: ActionType, getList: boolean) => { + private generateSelector = async (page: Page, coordinates: Coordinates, action: ActionType, getList?: boolean) => { const elementInfo = await getElementInformation(page, coordinates); - + const selectorBasedOnCustomAction = (getList === true) ? await getNonUniqueSelectors(page, coordinates) : await getSelectors(page, coordinates); - const bestSelector = getBestSelectorForAction( { type: action, @@ -487,9 +496,9 @@ export class WorkflowGenerator { * @param coordinates The coordinates of the element. * @returns {Promise} */ - public generateDataForHighlighter = async (page: Page, coordinates: Coordinates) => { + public generateDataForHighlighter = async (page: Page, coordinates: Coordinates, getList?: boolean) => { const rect = await getRect(page, coordinates); - const displaySelector = await this.generateSelector(page, coordinates, ActionType.Click); + const displaySelector = await this.generateSelector(page, coordinates, ActionType.Click, getList); const elementInfo = await getElementInformation(page, coordinates); if (rect) { this.socket.emit('highlighter', { rect, selector: displaySelector, elementInfo }); From 05ee905cd1811b0dc51f5ebbea6ff943bf0cbdab Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Thu, 8 Aug 2024 05:28:50 +0530 Subject: [PATCH 040/172] feat: use getList from class --- server/src/workflow-management/classes/Generator.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/workflow-management/classes/Generator.ts b/server/src/workflow-management/classes/Generator.ts index 5bae460b..5194bde7 100644 --- a/server/src/workflow-management/classes/Generator.ts +++ b/server/src/workflow-management/classes/Generator.ts @@ -471,7 +471,7 @@ export class WorkflowGenerator { private generateSelector = async (page: Page, coordinates: Coordinates, action: ActionType, getList?: boolean) => { const elementInfo = await getElementInformation(page, coordinates); - const selectorBasedOnCustomAction = (getList === true) + const selectorBasedOnCustomAction = (this.getList === true) ? await getNonUniqueSelectors(page, coordinates) : await getSelectors(page, coordinates); const bestSelector = getBestSelectorForAction( From 3eb56e20b18dca533d9d3c4d6f68ec4de29913d0 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Thu, 8 Aug 2024 05:29:48 +0530 Subject: [PATCH 041/172] feat: -rm getList as argument --- server/src/workflow-management/classes/Generator.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/server/src/workflow-management/classes/Generator.ts b/server/src/workflow-management/classes/Generator.ts index 5194bde7..03c64d4e 100644 --- a/server/src/workflow-management/classes/Generator.ts +++ b/server/src/workflow-management/classes/Generator.ts @@ -468,7 +468,7 @@ export class WorkflowGenerator { * @private * @returns {Promise} */ - private generateSelector = async (page: Page, coordinates: Coordinates, action: ActionType, getList?: boolean) => { + private generateSelector = async (page: Page, coordinates: Coordinates, action: ActionType) => { const elementInfo = await getElementInformation(page, coordinates); const selectorBasedOnCustomAction = (this.getList === true) @@ -496,9 +496,9 @@ export class WorkflowGenerator { * @param coordinates The coordinates of the element. * @returns {Promise} */ - public generateDataForHighlighter = async (page: Page, coordinates: Coordinates, getList?: boolean) => { + public generateDataForHighlighter = async (page: Page, coordinates: Coordinates) => { const rect = await getRect(page, coordinates); - const displaySelector = await this.generateSelector(page, coordinates, ActionType.Click, getList); + const displaySelector = await this.generateSelector(page, coordinates, ActionType.Click); const elementInfo = await getElementInformation(page, coordinates); if (rect) { this.socket.emit('highlighter', { rect, selector: displaySelector, elementInfo }); From 078f8ee765ea7dc4e5f53668a8d6813e509d7394 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Thu, 8 Aug 2024 06:23:48 +0530 Subject: [PATCH 042/172] feat: reset getList post use --- server/src/workflow-management/classes/Generator.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/server/src/workflow-management/classes/Generator.ts b/server/src/workflow-management/classes/Generator.ts index 03c64d4e..80994b4c 100644 --- a/server/src/workflow-management/classes/Generator.ts +++ b/server/src/workflow-management/classes/Generator.ts @@ -503,6 +503,7 @@ export class WorkflowGenerator { if (rect) { this.socket.emit('highlighter', { rect, selector: displaySelector, elementInfo }); } + this.getList = false; } /** From 97ebcf9929b7009b18639ef8552393df463a4f81 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Thu, 8 Aug 2024 06:24:20 +0530 Subject: [PATCH 043/172] chore: lint --- server/src/workflow-management/classes/Generator.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/server/src/workflow-management/classes/Generator.ts b/server/src/workflow-management/classes/Generator.ts index 80994b4c..25300e8d 100644 --- a/server/src/workflow-management/classes/Generator.ts +++ b/server/src/workflow-management/classes/Generator.ts @@ -471,9 +471,9 @@ export class WorkflowGenerator { private generateSelector = async (page: Page, coordinates: Coordinates, action: ActionType) => { const elementInfo = await getElementInformation(page, coordinates); - const selectorBasedOnCustomAction = (this.getList === true) - ? await getNonUniqueSelectors(page, coordinates) - : await getSelectors(page, coordinates); + const selectorBasedOnCustomAction = (this.getList === true) + ? await getNonUniqueSelectors(page, coordinates) + : await getSelectors(page, coordinates); const bestSelector = getBestSelectorForAction( { type: action, From 68ed6087edfbf9c329b186673abe0554f8cc6862 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Thu, 8 Aug 2024 06:26:27 +0530 Subject: [PATCH 044/172] docs: getList property --- server/src/workflow-management/classes/Generator.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/server/src/workflow-management/classes/Generator.ts b/server/src/workflow-management/classes/Generator.ts index 25300e8d..c66cd2d9 100644 --- a/server/src/workflow-management/classes/Generator.ts +++ b/server/src/workflow-management/classes/Generator.ts @@ -47,6 +47,10 @@ export class WorkflowGenerator { */ private socket: Socket; + /** + * getList is one of the custom actions from maxun-core. + * Used to provide appropriate selectors for the getList action. + */ private getList: boolean = false; /** From d2b4f550048966edbac49a16c90e6b27844f2a45 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Thu, 8 Aug 2024 06:27:02 +0530 Subject: [PATCH 045/172] docss: genertaor socket listeners --- server/src/workflow-management/classes/Generator.ts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/server/src/workflow-management/classes/Generator.ts b/server/src/workflow-management/classes/Generator.ts index c66cd2d9..e94a8bc0 100644 --- a/server/src/workflow-management/classes/Generator.ts +++ b/server/src/workflow-management/classes/Generator.ts @@ -96,6 +96,9 @@ export class WorkflowGenerator { lastAction: '', } + /** + * Initializes the socket listeners for the generator. + */ private initializeSocketListeners() { this.socket.on('setGetList', (data: { getList: boolean }) => { this.getList = data.getList; From 7dc3ea816143344039b4d86a8a315dd632c60228 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Thu, 8 Aug 2024 06:27:21 +0530 Subject: [PATCH 046/172] chore: remove whitespace --- server/src/workflow-management/classes/Generator.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/server/src/workflow-management/classes/Generator.ts b/server/src/workflow-management/classes/Generator.ts index e94a8bc0..d9fd2024 100644 --- a/server/src/workflow-management/classes/Generator.ts +++ b/server/src/workflow-management/classes/Generator.ts @@ -105,7 +105,6 @@ export class WorkflowGenerator { }); } - /** * Registers the event handlers for all generator-related events on the socket. * @param socket The socket used to communicate with the client. From 5ce81b1f97ca7a5a69a14a6b00d16b0b1089f7e0 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Thu, 8 Aug 2024 06:28:40 +0530 Subject: [PATCH 047/172] chore: comment for reset --- server/src/workflow-management/classes/Generator.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/server/src/workflow-management/classes/Generator.ts b/server/src/workflow-management/classes/Generator.ts index d9fd2024..a7b5a641 100644 --- a/server/src/workflow-management/classes/Generator.ts +++ b/server/src/workflow-management/classes/Generator.ts @@ -509,6 +509,7 @@ export class WorkflowGenerator { if (rect) { this.socket.emit('highlighter', { rect, selector: displaySelector, elementInfo }); } + // reset getList after usage this.getList = false; } From 0cda60717ae18a199d60e41c11acf0dde46e3e43 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Thu, 8 Aug 2024 06:29:34 +0530 Subject: [PATCH 048/172] feat: emit setGetList if getList true --- src/components/organisms/BrowserWindow.tsx | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/components/organisms/BrowserWindow.tsx b/src/components/organisms/BrowserWindow.tsx index 08cae087..0c3c655e 100644 --- a/src/components/organisms/BrowserWindow.tsx +++ b/src/components/organisms/BrowserWindow.tsx @@ -84,8 +84,11 @@ export const BrowserWindow = () => { }, [screenShot, canvasRef, socket, screencastHandler]); const highlighterHandler = useCallback((data: { rect: DOMRect, selector: string, elementInfo: ElementInfo | null }) => { + if (getList === true) { + socket?.emit('setGetList', { getList: true }); + } setHighlighterData(data); - }, [highlighterData]) + }, [highlighterData, getList, socket]); useEffect(() => { document.addEventListener('mousemove', onMouseMove, false); From cc17935d4b8f95f5d8ea8766f465402b44bafa5a Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Thu, 8 Aug 2024 06:29:59 +0530 Subject: [PATCH 049/172] chore: lint --- src/components/organisms/BrowserWindow.tsx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/components/organisms/BrowserWindow.tsx b/src/components/organisms/BrowserWindow.tsx index 0c3c655e..75e370e4 100644 --- a/src/components/organisms/BrowserWindow.tsx +++ b/src/components/organisms/BrowserWindow.tsx @@ -84,9 +84,9 @@ export const BrowserWindow = () => { }, [screenShot, canvasRef, socket, screencastHandler]); const highlighterHandler = useCallback((data: { rect: DOMRect, selector: string, elementInfo: ElementInfo | null }) => { - if (getList === true) { - socket?.emit('setGetList', { getList: true }); - } + if (getList === true) { + socket?.emit('setGetList', { getList: true }); + } setHighlighterData(data); }, [highlighterData, getList, socket]); From ce78b0102502f3826c451d9f507c358d73b419e3 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Thu, 8 Aug 2024 21:11:02 +0530 Subject: [PATCH 050/172] feat: list step interface --- src/context/browserSteps.tsx | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/context/browserSteps.tsx b/src/context/browserSteps.tsx index e2984e53..b951046b 100644 --- a/src/context/browserSteps.tsx +++ b/src/context/browserSteps.tsx @@ -14,8 +14,14 @@ interface ScreenshotStep { fullPage: boolean; } +interface ListStep { + id: number; + type: 'list'; + listSelector: string; + fields: { [key: string]: TextStep }; +} -type BrowserStep = TextStep | ScreenshotStep; +type BrowserStep = TextStep | ScreenshotStep | ListStep; interface SelectorObject { selector: string; From f64886227fc395e8973f2f3ceee075fb41f55031 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Thu, 8 Aug 2024 23:56:00 +0530 Subject: [PATCH 051/172] feat: add lis step --- src/context/browserSteps.tsx | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/src/context/browserSteps.tsx b/src/context/browserSteps.tsx index b951046b..21ff07e1 100644 --- a/src/context/browserSteps.tsx +++ b/src/context/browserSteps.tsx @@ -50,6 +50,13 @@ export const BrowserStepsProvider: React.FC<{ children: React.ReactNode }> = ({ ]); }; + const addListStep = (listSelector: string, fields: { [key: string]: TextStep }) => { + setBrowserSteps(prevSteps => [ + ...prevSteps, + { id: Date.now(), type: 'list', listSelector, fields } + ]); + }; + const addScreenshotStep = (fullPage: boolean) => { setBrowserSteps(prevSteps => [ ...prevSteps, From b3a15701b07cd4a7280f95fd3b06d6e4ecfc7834 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Thu, 8 Aug 2024 23:56:57 +0530 Subject: [PATCH 052/172] feat: pass addListStep in value --- src/context/browserSteps.tsx | 1 + 1 file changed, 1 insertion(+) diff --git a/src/context/browserSteps.tsx b/src/context/browserSteps.tsx index 21ff07e1..49048422 100644 --- a/src/context/browserSteps.tsx +++ b/src/context/browserSteps.tsx @@ -80,6 +80,7 @@ export const BrowserStepsProvider: React.FC<{ children: React.ReactNode }> = ({ Date: Thu, 8 Aug 2024 23:57:26 +0530 Subject: [PATCH 053/172] feat: addList interface --- src/context/browserSteps.tsx | 1 + 1 file changed, 1 insertion(+) diff --git a/src/context/browserSteps.tsx b/src/context/browserSteps.tsx index 49048422..b9d69762 100644 --- a/src/context/browserSteps.tsx +++ b/src/context/browserSteps.tsx @@ -33,6 +33,7 @@ interface SelectorObject { interface BrowserStepsContextType { browserSteps: BrowserStep[]; addTextStep: (label: string, data: string, selectorObj: SelectorObject) => void; + addListStep: (listSelector: string, fields: { [key: string]: TextStep }) => void addScreenshotStep: (fullPage: boolean) => void; deleteBrowserStep: (id: number) => void; updateBrowserTextStepLabel: (id: number, newLabel: string) => void; From 2a2f16af0a0d6d3cdb0cb76ec0cd003e1e188176 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Fri, 9 Aug 2024 00:19:34 +0530 Subject: [PATCH 054/172] fix: remove getList for addTextStep --- src/components/organisms/BrowserWindow.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/organisms/BrowserWindow.tsx b/src/components/organisms/BrowserWindow.tsx index 75e370e4..0c841f45 100644 --- a/src/components/organisms/BrowserWindow.tsx +++ b/src/components/organisms/BrowserWindow.tsx @@ -114,7 +114,7 @@ export const BrowserWindow = () => { clickY >= highlightRect.top && clickY <= highlightRect.bottom ) { - if (getText === true || getList === true) { + if (getText === true) { const options = getAttributeOptions(highlighterData.elementInfo?.tagName || ''); if (options.length > 1) { setAttributeOptions(options); From 54722d20255b240b6555e46300b9293b572bf128 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Fri, 9 Aug 2024 06:23:10 +0530 Subject: [PATCH 055/172] feat: handle addListStep --- src/components/organisms/BrowserWindow.tsx | 68 +++++++++++++++++++++- 1 file changed, 65 insertions(+), 3 deletions(-) diff --git a/src/components/organisms/BrowserWindow.tsx b/src/components/organisms/BrowserWindow.tsx index 0c841f45..99f2d246 100644 --- a/src/components/organisms/BrowserWindow.tsx +++ b/src/components/organisms/BrowserWindow.tsx @@ -5,7 +5,7 @@ import { useBrowserDimensionsStore } from "../../context/browserDimensions"; import { Highlighter } from "../atoms/Highlighter"; import { GenericModal } from '../atoms/GenericModal'; import { useActionContext } from '../../context/browserActions'; -import { useBrowserSteps } from '../../context/browserSteps'; +import { useBrowserSteps, TextStep } from '../../context/browserSteps'; interface ElementInfo { tagName: string; @@ -45,10 +45,14 @@ export const BrowserWindow = () => { const [attributeOptions, setAttributeOptions] = useState([]); const [selectedElement, setSelectedElement] = useState<{ selector: string, info: ElementInfo | null } | null>(null); + const [listSelector, setListSelector] = useState(null); + const [fields, setFields] = useState>({}); + + const { socket } = useSocketStore(); const { width, height } = useBrowserDimensionsStore(); const { getText, getList } = useActionContext(); - const { addTextStep } = useBrowserSteps(); + const { addTextStep, addListStep } = useBrowserSteps(); const onMouseMove = (e: MouseEvent) => { if (canvasRef && canvasRef.current && highlighterData) { @@ -130,11 +134,69 @@ export const BrowserWindow = () => { attribute: 'innerText' }); } + } + + if (getList === true && !listSelector) { + setListSelector(highlighterData.selector); + //console.log('added list selector', highlighterData.selector); + } else if (getList === true && listSelector) { + const options = getAttributeOptions(highlighterData.elementInfo?.tagName || ''); + if (options.length > 1) { + setAttributeOptions(options); + setSelectedElement({ + selector: highlighterData.selector, + info: highlighterData.elementInfo + }); + setShowAttributeModal(true); + } else { + // When setting fields, ensure it matches the TextStep structure + const newField: TextStep = { + id: Date.now(), + type: 'text', // or another appropriate type + label: `label ${Object.keys(fields).length + 1}`, + data: highlighterData.elementInfo?.innerText || '', + selectorObj: { + selector: highlighterData.selector, + tag: highlighterData.elementInfo?.tagName, + attribute: 'innerText' + } + }; + //console.log('added new field:', newField) + + setFields(prevFields => { + const updatedFields = { + ...prevFields, + [newField.id]: newField + }; + + if (Object.keys(updatedFields).length > 0 && listSelector) { + console.log('listSelector before addListStep:', listSelector); + console.log('fields before addListStep:', updatedFields); + + addListStep(listSelector, updatedFields); + console.log('Called addListStep with:', { listSelector, updatedFields }); + } + + return updatedFields; + }); + } + } } } }; + + // useEffect(() => { + // // Save the ListStep after the fields are set + // if (Object.keys(fields).length > 0 && listSelector) { + // addListStep(listSelector, fields); + // // Reset after adding the step + // setListSelector(null); + // setFields({}); + // } + // }, [fields, listSelector, addListStep]); + const handleAttributeSelection = (attribute: string) => { if (selectedElement) { let data = ''; @@ -149,7 +211,7 @@ export const BrowserWindow = () => { data = selectedElement.info?.innerText || ''; } { - if (getText === true || getList === true) { + if (getText === true) { addTextStep('', data, { selector: selectedElement.selector, tag: selectedElement.info?.tagName, From 1a6f4b5b3cf5c23064c3d2b341f693a12e28237f Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Fri, 9 Aug 2024 06:25:57 +0530 Subject: [PATCH 056/172] refactor: simplify setFields --- src/components/organisms/BrowserWindow.tsx | 26 ++++++++++------------ 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/src/components/organisms/BrowserWindow.tsx b/src/components/organisms/BrowserWindow.tsx index 99f2d246..7d4cf386 100644 --- a/src/components/organisms/BrowserWindow.tsx +++ b/src/components/organisms/BrowserWindow.tsx @@ -163,22 +163,20 @@ export const BrowserWindow = () => { }; //console.log('added new field:', newField) - setFields(prevFields => { - const updatedFields = { - ...prevFields, - [newField.id]: newField - }; + setFields(prevFields => ({ + ...prevFields, + [newField.id]: newField + })); + - if (Object.keys(updatedFields).length > 0 && listSelector) { - console.log('listSelector before addListStep:', listSelector); - console.log('fields before addListStep:', updatedFields); + // if (Object.keys(updatedFields).length > 0 && listSelector) { + // console.log('listSelector before addListStep:', listSelector); + // console.log('fields before addListStep:', updatedFields); - addListStep(listSelector, updatedFields); - console.log('Called addListStep with:', { listSelector, updatedFields }); - } - - return updatedFields; - }); + // addListStep(listSelector, updatedFields); + // console.log('Called addListStep with:', { listSelector, updatedFields }); + // } + } } From 99ccaed16a206d13a566f035d09a84f516214f4f Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Fri, 9 Aug 2024 07:38:18 +0530 Subject: [PATCH 057/172] refactor: isolate field for state updates --- src/components/organisms/BrowserWindow.tsx | 69 ++++++++++++---------- 1 file changed, 39 insertions(+), 30 deletions(-) diff --git a/src/components/organisms/BrowserWindow.tsx b/src/components/organisms/BrowserWindow.tsx index 7d4cf386..0f6fa8bf 100644 --- a/src/components/organisms/BrowserWindow.tsx +++ b/src/components/organisms/BrowserWindow.tsx @@ -105,12 +105,12 @@ export const BrowserWindow = () => { }; }, [socket, onMouseMove]); - const handleClick = (e: React.MouseEvent) => { + const handleClick = useCallback((e: React.MouseEvent) => { if (highlighterData && canvasRef?.current) { const canvasRect = canvasRef.current.getBoundingClientRect(); const clickX = e.clientX - canvasRect.left; const clickY = e.clientY - canvasRect.top; - + const highlightRect = highlighterData.rect; if ( clickX >= highlightRect.left && @@ -134,11 +134,10 @@ export const BrowserWindow = () => { attribute: 'innerText' }); } - } - + } + if (getList === true && !listSelector) { setListSelector(highlighterData.selector); - //console.log('added list selector', highlighterData.selector); } else if (getList === true && listSelector) { const options = getAttributeOptions(highlighterData.elementInfo?.tagName || ''); if (options.length > 1) { @@ -149,10 +148,9 @@ export const BrowserWindow = () => { }); setShowAttributeModal(true); } else { - // When setting fields, ensure it matches the TextStep structure const newField: TextStep = { id: Date.now(), - type: 'text', // or another appropriate type + type: 'text', label: `label ${Object.keys(fields).length + 1}`, data: highlighterData.elementInfo?.innerText || '', selectorObj: { @@ -161,38 +159,49 @@ export const BrowserWindow = () => { attribute: 'innerText' } }; - //console.log('added new field:', newField) - - setFields(prevFields => ({ - ...prevFields, - [newField.id]: newField - })); - - - // if (Object.keys(updatedFields).length > 0 && listSelector) { - // console.log('listSelector before addListStep:', listSelector); - // console.log('fields before addListStep:', updatedFields); - // addListStep(listSelector, updatedFields); - // console.log('Called addListStep with:', { listSelector, updatedFields }); - // } - + // Use a callback to update the state after rendering + setTimeout(() => { + processFieldsUpdate(newField); + }, 0); } - } } } + }, [highlighterData, canvasRef, getText, getList, listSelector, fields, addTextStep, addListStep]); + + const processFieldsUpdate = (newField: TextStep) => { + setFields(prevFields => { + const updatedFields = { + ...prevFields, + [newField.id]: newField + }; + + if (Object.keys(updatedFields).length > 0 && listSelector) { + addListStep(listSelector, updatedFields); + console.log('Called addListStep with:', { listSelector, updatedFields }) + // Reset the state after adding the step + setListSelector(null); + setFields({}); + } + + return updatedFields; + }); }; // useEffect(() => { - // // Save the ListStep after the fields are set - // if (Object.keys(fields).length > 0 && listSelector) { - // addListStep(listSelector, fields); - // // Reset after adding the step - // setListSelector(null); - // setFields({}); - // } + // // Save the ListStep after the fields are set + // if (Object.keys(fields).length > 0 && listSelector) { + // console.log('listSelector before addListStep:', listSelector); + // console.log('fields before addListStep:', fields) + // // ; + // addListStep(listSelector, fields); + // console.log('Called addListStep with:', { listSelector, fields }) + // // Reset after adding the step + // //setListSelector(null); + // // setFields({}); + // } // }, [fields, listSelector, addListStep]); const handleAttributeSelection = (attribute: string) => { From 6d2d84c232cdae75531bb4c50124ffa8f05f4348 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Fri, 9 Aug 2024 08:54:26 +0530 Subject: [PATCH 058/172] feat:revert all changes --- src/components/organisms/BrowserWindow.tsx | 73 ++++++++++------------ 1 file changed, 33 insertions(+), 40 deletions(-) diff --git a/src/components/organisms/BrowserWindow.tsx b/src/components/organisms/BrowserWindow.tsx index 0f6fa8bf..5d72cbf1 100644 --- a/src/components/organisms/BrowserWindow.tsx +++ b/src/components/organisms/BrowserWindow.tsx @@ -105,12 +105,12 @@ export const BrowserWindow = () => { }; }, [socket, onMouseMove]); - const handleClick = useCallback((e: React.MouseEvent) => { + const handleClick = (e: React.MouseEvent) => { if (highlighterData && canvasRef?.current) { const canvasRect = canvasRef.current.getBoundingClientRect(); const clickX = e.clientX - canvasRect.left; const clickY = e.clientY - canvasRect.top; - + const highlightRect = highlighterData.rect; if ( clickX >= highlightRect.left && @@ -134,10 +134,11 @@ export const BrowserWindow = () => { attribute: 'innerText' }); } - } - + } + if (getList === true && !listSelector) { setListSelector(highlighterData.selector); + //console.log('added list selector', highlighterData.selector); } else if (getList === true && listSelector) { const options = getAttributeOptions(highlighterData.elementInfo?.tagName || ''); if (options.length > 1) { @@ -148,9 +149,10 @@ export const BrowserWindow = () => { }); setShowAttributeModal(true); } else { + // When setting fields, ensure it matches the TextStep structure const newField: TextStep = { id: Date.now(), - type: 'text', + type: 'text', // or another appropriate type label: `label ${Object.keys(fields).length + 1}`, data: highlighterData.elementInfo?.innerText || '', selectorObj: { @@ -159,49 +161,40 @@ export const BrowserWindow = () => { attribute: 'innerText' } }; - - // Use a callback to update the state after rendering - setTimeout(() => { - processFieldsUpdate(newField); - }, 0); + //console.log('added new field:', newField) + + setFields(prevFields => { + const updatedFields = { + ...prevFields, + [newField.id]: newField + }; + + if (Object.keys(updatedFields).length > 0 && listSelector) { + // console.log('listSelector before addListStep:', listSelector); + // console.log('fields before addListStep:', updatedFields); + + addListStep(listSelector, updatedFields); + console.log('Called addListStep with:', { listSelector, updatedFields }); + } + + return updatedFields; + }); } + } } } - }, [highlighterData, canvasRef, getText, getList, listSelector, fields, addTextStep, addListStep]); - - const processFieldsUpdate = (newField: TextStep) => { - setFields(prevFields => { - const updatedFields = { - ...prevFields, - [newField.id]: newField - }; - - if (Object.keys(updatedFields).length > 0 && listSelector) { - addListStep(listSelector, updatedFields); - console.log('Called addListStep with:', { listSelector, updatedFields }) - // Reset the state after adding the step - setListSelector(null); - setFields({}); - } - - return updatedFields; - }); }; // useEffect(() => { - // // Save the ListStep after the fields are set - // if (Object.keys(fields).length > 0 && listSelector) { - // console.log('listSelector before addListStep:', listSelector); - // console.log('fields before addListStep:', fields) - // // ; - // addListStep(listSelector, fields); - // console.log('Called addListStep with:', { listSelector, fields }) - // // Reset after adding the step - // //setListSelector(null); - // // setFields({}); - // } + // // Save the ListStep after the fields are set + // if (Object.keys(fields).length > 0 && listSelector) { + // addListStep(listSelector, fields); + // // Reset after adding the step + // setListSelector(null); + // setFields({}); + // } // }, [fields, listSelector, addListStep]); const handleAttributeSelection = (attribute: string) => { From 336ef302f32849608212dca4859e9d4e1e4835dd Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Fri, 9 Aug 2024 09:23:56 +0530 Subject: [PATCH 059/172] fix: no re-renders of addListStep!!!!!!!!!!!!!!!!!! --- src/components/organisms/BrowserWindow.tsx | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/src/components/organisms/BrowserWindow.tsx b/src/components/organisms/BrowserWindow.tsx index 5d72cbf1..1f7c0626 100644 --- a/src/components/organisms/BrowserWindow.tsx +++ b/src/components/organisms/BrowserWindow.tsx @@ -169,16 +169,22 @@ export const BrowserWindow = () => { [newField.id]: newField }; - if (Object.keys(updatedFields).length > 0 && listSelector) { - // console.log('listSelector before addListStep:', listSelector); - // console.log('fields before addListStep:', updatedFields); + // if (Object.keys(updatedFields).length > 0 && listSelector) { + // // console.log('listSelector before addListStep:', listSelector); + // // console.log('fields before addListStep:', updatedFields); - addListStep(listSelector, updatedFields); - console.log('Called addListStep with:', { listSelector, updatedFields }); - } + // addListStep(listSelector, updatedFields); + // console.log('Called addListStep with:', { listSelector, updatedFields }); + // } return updatedFields; }); + + // Call addListStep outside of setFields + if (listSelector) { + addListStep(listSelector, {...fields, [newField.id]: newField}); + console.log('Called addListStep with:', { listSelector, updatedFields: {...fields, [newField.id]: newField} }); + } } } From 208e149235ae70621ed2866516f73ba7c84544cf Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Fri, 9 Aug 2024 09:24:18 +0530 Subject: [PATCH 060/172] fix: remove useEffect --- src/components/organisms/BrowserWindow.tsx | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/src/components/organisms/BrowserWindow.tsx b/src/components/organisms/BrowserWindow.tsx index 1f7c0626..01f74fbf 100644 --- a/src/components/organisms/BrowserWindow.tsx +++ b/src/components/organisms/BrowserWindow.tsx @@ -193,16 +193,6 @@ export const BrowserWindow = () => { }; - // useEffect(() => { - // // Save the ListStep after the fields are set - // if (Object.keys(fields).length > 0 && listSelector) { - // addListStep(listSelector, fields); - // // Reset after adding the step - // setListSelector(null); - // setFields({}); - // } - // }, [fields, listSelector, addListStep]); - const handleAttributeSelection = (attribute: string) => { if (selectedElement) { let data = ''; From 57cce5a2723b993e1ea23cba0cb007950d214d06 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Fri, 9 Aug 2024 09:24:58 +0530 Subject: [PATCH 061/172] chore: remove unwanted code --- src/components/organisms/BrowserWindow.tsx | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) diff --git a/src/components/organisms/BrowserWindow.tsx b/src/components/organisms/BrowserWindow.tsx index 01f74fbf..8147fdd4 100644 --- a/src/components/organisms/BrowserWindow.tsx +++ b/src/components/organisms/BrowserWindow.tsx @@ -152,7 +152,7 @@ export const BrowserWindow = () => { // When setting fields, ensure it matches the TextStep structure const newField: TextStep = { id: Date.now(), - type: 'text', // or another appropriate type + type: 'text', label: `label ${Object.keys(fields).length + 1}`, data: highlighterData.elementInfo?.innerText || '', selectorObj: { @@ -169,18 +169,10 @@ export const BrowserWindow = () => { [newField.id]: newField }; - // if (Object.keys(updatedFields).length > 0 && listSelector) { - // // console.log('listSelector before addListStep:', listSelector); - // // console.log('fields before addListStep:', updatedFields); - - // addListStep(listSelector, updatedFields); - // console.log('Called addListStep with:', { listSelector, updatedFields }); - // } return updatedFields; }); - // Call addListStep outside of setFields if (listSelector) { addListStep(listSelector, {...fields, [newField.id]: newField}); console.log('Called addListStep with:', { listSelector, updatedFields: {...fields, [newField.id]: newField} }); From 4bf687db34de2a9efd17dbfc3fa9f8483eeea350 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Fri, 9 Aug 2024 09:25:20 +0530 Subject: [PATCH 062/172] chore: clean console logs --- src/components/organisms/BrowserWindow.tsx | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/components/organisms/BrowserWindow.tsx b/src/components/organisms/BrowserWindow.tsx index 8147fdd4..61166233 100644 --- a/src/components/organisms/BrowserWindow.tsx +++ b/src/components/organisms/BrowserWindow.tsx @@ -138,7 +138,6 @@ export const BrowserWindow = () => { if (getList === true && !listSelector) { setListSelector(highlighterData.selector); - //console.log('added list selector', highlighterData.selector); } else if (getList === true && listSelector) { const options = getAttributeOptions(highlighterData.elementInfo?.tagName || ''); if (options.length > 1) { @@ -148,8 +147,7 @@ export const BrowserWindow = () => { info: highlighterData.elementInfo }); setShowAttributeModal(true); - } else { - // When setting fields, ensure it matches the TextStep structure + } else const newField: TextStep = { id: Date.now(), type: 'text', From 07ea6327ff70ba6924f2b3a75ad0ad2724801778 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Fri, 9 Aug 2024 09:25:43 +0530 Subject: [PATCH 063/172] chore: lint --- src/components/organisms/BrowserWindow.tsx | 32 +++++++++++----------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/src/components/organisms/BrowserWindow.tsx b/src/components/organisms/BrowserWindow.tsx index 61166233..0116d03c 100644 --- a/src/components/organisms/BrowserWindow.tsx +++ b/src/components/organisms/BrowserWindow.tsx @@ -134,7 +134,7 @@ export const BrowserWindow = () => { attribute: 'innerText' }); } - } + } if (getList === true && !listSelector) { setListSelector(highlighterData.selector); @@ -147,10 +147,10 @@ export const BrowserWindow = () => { info: highlighterData.elementInfo }); setShowAttributeModal(true); - } else + } else { const newField: TextStep = { id: Date.now(), - type: 'text', + type: 'text', label: `label ${Object.keys(fields).length + 1}`, data: highlighterData.elementInfo?.innerText || '', selectorObj: { @@ -161,20 +161,20 @@ export const BrowserWindow = () => { }; //console.log('added new field:', newField) - setFields(prevFields => { - const updatedFields = { - ...prevFields, - [newField.id]: newField - }; - - - return updatedFields; - }); + setFields(prevFields => { + const updatedFields = { + ...prevFields, + [newField.id]: newField + }; - if (listSelector) { - addListStep(listSelector, {...fields, [newField.id]: newField}); - console.log('Called addListStep with:', { listSelector, updatedFields: {...fields, [newField.id]: newField} }); - } + + return updatedFields; + }); + + if (listSelector) { + addListStep(listSelector, { ...fields, [newField.id]: newField }); + console.log('Called addListStep with:', { listSelector, updatedFields: { ...fields, [newField.id]: newField } }); + } } } From 3efeed61baf93f1af1179caf536ea5e717f58f69 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Fri, 9 Aug 2024 09:26:29 +0530 Subject: [PATCH 064/172] chore: format --- src/components/organisms/BrowserWindow.tsx | 5 ----- 1 file changed, 5 deletions(-) diff --git a/src/components/organisms/BrowserWindow.tsx b/src/components/organisms/BrowserWindow.tsx index 0116d03c..df080528 100644 --- a/src/components/organisms/BrowserWindow.tsx +++ b/src/components/organisms/BrowserWindow.tsx @@ -48,7 +48,6 @@ export const BrowserWindow = () => { const [listSelector, setListSelector] = useState(null); const [fields, setFields] = useState>({}); - const { socket } = useSocketStore(); const { width, height } = useBrowserDimensionsStore(); const { getText, getList } = useActionContext(); @@ -159,15 +158,12 @@ export const BrowserWindow = () => { attribute: 'innerText' } }; - //console.log('added new field:', newField) setFields(prevFields => { const updatedFields = { ...prevFields, [newField.id]: newField }; - - return updatedFields; }); @@ -182,7 +178,6 @@ export const BrowserWindow = () => { } }; - const handleAttributeSelection = (attribute: string) => { if (selectedElement) { let data = ''; From 28fc1486dbfe87960aa761d1ef438b23033821b5 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Fri, 9 Aug 2024 09:33:05 +0530 Subject: [PATCH 065/172] fix: revert --- src/context/browserSteps.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/context/browserSteps.tsx b/src/context/browserSteps.tsx index b9d69762..cd465cc8 100644 --- a/src/context/browserSteps.tsx +++ b/src/context/browserSteps.tsx @@ -1,6 +1,6 @@ import React, { createContext, useContext, useState } from 'react'; -interface TextStep { +export interface TextStep { id: number; type: 'text'; label: string; From 531617474ffa74500c1b2fb634811597e73a755a Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Fri, 9 Aug 2024 10:32:48 +0530 Subject: [PATCH 066/172] feat(WIP): scrapeList action --- src/components/organisms/RightSidePanel.tsx | 87 ++++++++++++++++++++- 1 file changed, 85 insertions(+), 2 deletions(-) diff --git a/src/components/organisms/RightSidePanel.tsx b/src/components/organisms/RightSidePanel.tsx index 5149a85c..27e7106c 100644 --- a/src/components/organisms/RightSidePanel.tsx +++ b/src/components/organisms/RightSidePanel.tsx @@ -80,6 +80,39 @@ export const RightSidePanel = () => { } }, [stopGetText, getTextSettingsObject, socket, browserSteps, confirmedTextSteps]); + + const getListSettingsObject = useCallback(() => { + const listStep = browserSteps.find(step => step.type === 'list'); + if (!listStep || listStep.type !== 'list' || Object.keys(listStep.fields).length === 0) return null; + + const firstFieldKey = Object.keys(listStep.fields)[0]; + const firstField = listStep.fields[firstFieldKey]; + + return { + listSelector: listStep.listSelector, + fields: { + [firstField.label]: { + selector: firstField.selectorObj.selector, + attribute: firstField.selectorObj.attribute || 'innerText' + } + } + }; + }, [browserSteps]); + + const stopCaptureAndEmitGetListSettings = useCallback(() => { + stopGetList(); + const settings = getListSettingsObject(); + if (settings) { + socket?.emit('action', { action: 'scrapeList', settings }); + } else { + notify('error', 'Unable to create list settings. Make sure you have defined a field for the list.'); + } + }, [stopGetList, getListSettingsObject, socket, notify]); + + // const handleListFieldChange = (stepId: number, key: 'label' | 'data', value: string) => { + // updateListStepField(stepId, key, value); + // }; + const captureScreenshot = (fullPage: boolean) => { const screenshotSettings: ScreenshotSettings = { fullPage, @@ -105,7 +138,7 @@ export const RightSidePanel = () => { {getList && <> - + @@ -176,13 +209,63 @@ export const RightSidePanel = () => { )} ) : ( - step.type === 'screenshot' && ( + step.type === 'screenshot' ? ( {`Take ${step.fullPage ? 'Fullpage' : 'Visible Part'} Screenshot`} + ) : ( + step.type === 'list' && Object.keys(step.fields).length > 0 && ( + <> + + + + ) + }} + /> + {Object.entries(step.fields).map(([key, field]) => ( + + {}} + fullWidth + margin="normal" + InputProps={{ + startAdornment: ( + + + + ) + }} + /> + + + + ) + }} + /> + + ))} + + ) ) ) } From 21a34a18828b3fe1f349afc37a6cd1e4e4c54c64 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Fri, 9 Aug 2024 19:19:44 +0530 Subject: [PATCH 067/172] chore: lint --- src/components/organisms/RightSidePanel.tsx | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/components/organisms/RightSidePanel.tsx b/src/components/organisms/RightSidePanel.tsx index 27e7106c..2efdc7bf 100644 --- a/src/components/organisms/RightSidePanel.tsx +++ b/src/components/organisms/RightSidePanel.tsx @@ -134,8 +134,8 @@ export const RightSidePanel = () => { - {!getText && !getScreenshot && !getList && } - {getList && + {!getText && !getScreenshot && !getList && } + {getList && <> @@ -237,7 +237,7 @@ export const RightSidePanel = () => { {}} + onChange={() => { }} fullWidth margin="normal" InputProps={{ From c4ebffaf6c85bc12ec97a916d7ea7229ecbfaee6 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sat, 10 Aug 2024 04:10:00 +0530 Subject: [PATCH 068/172] feat: export SelectorObject interface --- src/context/browserSteps.tsx | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/context/browserSteps.tsx b/src/context/browserSteps.tsx index cd465cc8..1e4263d7 100644 --- a/src/context/browserSteps.tsx +++ b/src/context/browserSteps.tsx @@ -23,7 +23,7 @@ interface ListStep { type BrowserStep = TextStep | ScreenshotStep | ListStep; -interface SelectorObject { +export interface SelectorObject { selector: string; tag?: string; attribute?: string; @@ -77,6 +77,14 @@ export const BrowserStepsProvider: React.FC<{ children: React.ReactNode }> = ({ ); }; + const updateListStep = (id: number, updatedListStep: ListStep) => { + setBrowserSteps(prevSteps => + prevSteps.map(step => + step.id === id && step.type === 'list' ? updatedListStep : step + ) + ); + }; + return ( Date: Sat, 10 Aug 2024 04:46:09 +0530 Subject: [PATCH 069/172] feat: get list settings THE RIGHT WAY --- src/components/organisms/RightSidePanel.tsx | 77 ++++++++++++++------- 1 file changed, 53 insertions(+), 24 deletions(-) diff --git a/src/components/organisms/RightSidePanel.tsx b/src/components/organisms/RightSidePanel.tsx index 2efdc7bf..804a877a 100644 --- a/src/components/organisms/RightSidePanel.tsx +++ b/src/components/organisms/RightSidePanel.tsx @@ -8,7 +8,7 @@ import { SimpleBox } from "../atoms/Box"; import Typography from "@mui/material/Typography"; import { useGlobalInfoStore } from "../../context/globalInfo"; import { useActionContext } from '../../context/browserActions'; -import { useBrowserSteps } from '../../context/browserSteps'; +import { useBrowserSteps, ListStep, TextStep, SelectorObject } from '../../context/browserSteps'; import { useSocketStore } from '../../context/socket'; import { ScreenshotSettings } from '../../shared/types'; import InputAdornment from '@mui/material/InputAdornment'; @@ -62,6 +62,7 @@ export const RightSidePanel = () => { settings[step.label] = step.selectorObj; } }); + console.log(`seetings text:`, settings) return settings; }, [browserSteps]); @@ -82,32 +83,60 @@ export const RightSidePanel = () => { const getListSettingsObject = useCallback(() => { - const listStep = browserSteps.find(step => step.type === 'list'); - if (!listStep || listStep.type !== 'list' || Object.keys(listStep.fields).length === 0) return null; + const settings: Record }> = {}; - const firstFieldKey = Object.keys(listStep.fields)[0]; - const firstField = listStep.fields[firstFieldKey]; + browserSteps.forEach(step => { + if (step.type === 'list' && step.listSelector && Object.keys(step.fields).length > 0) { + const fields: Record = {}; + Object.entries(step.fields).forEach(([label, field]) => { + if (field.selectorObj?.selector) { + fields[label] = { + selector: field.selectorObj.selector, + tag: field.selectorObj.tag, + attribute: field.selectorObj.attribute + }; + } + }); - return { - listSelector: listStep.listSelector, - fields: { - [firstField.label]: { - selector: firstField.selectorObj.selector, - attribute: firstField.selectorObj.attribute || 'innerText' + // settings.listSelector = step.listSelector; + // settings.fields = fields; + + settings[step.listSelector] = { + listSelector: step.listSelector, + fields: fields + }; } - } - }; - }, [browserSteps]); + }); - const stopCaptureAndEmitGetListSettings = useCallback(() => { - stopGetList(); - const settings = getListSettingsObject(); - if (settings) { - socket?.emit('action', { action: 'scrapeList', settings }); - } else { - notify('error', 'Unable to create list settings. Make sure you have defined a field for the list.'); - } - }, [stopGetList, getListSettingsObject, socket, notify]); + console.log(`Setting LIST:`, settings) + + return settings; +}, [browserSteps]); + + + // const firstFieldKey = Object.keys(listStep.fields)[0]; + // const firstField = listStep.fields[firstFieldKey]; + + // return { + // listSelector: listStep.listSelector, + // fields: { + // [firstField.label]: { + // selector: firstField.selectorObj.selector, + // attribute: firstField.selectorObj.attribute || 'innerText' + // } + // } + // }; + // }, [browserSteps]); + + // const stopCaptureAndEmitGetListSettings = useCallback(() => { + // stopGetList(); + // const settings = getListSettingsObject(); + // if (settings) { + // socket?.emit('action', { action: 'scrapeList', settings }); + // } else { + // notify('error', 'Unable to create list settings. Make sure you have defined a field for the list.'); + // } + // }, [stopGetList, getListSettingsObject, socket, notify]); // const handleListFieldChange = (stepId: number, key: 'label' | 'data', value: string) => { // updateListStepField(stepId, key, value); @@ -138,7 +167,7 @@ export const RightSidePanel = () => { {getList && <> - + From 64342279f7ac1cb73838f4572f22ded283c58ac2 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sat, 10 Aug 2024 05:04:50 +0530 Subject: [PATCH 070/172] feat: capture & emit list step --- src/components/organisms/RightSidePanel.tsx | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/src/components/organisms/RightSidePanel.tsx b/src/components/organisms/RightSidePanel.tsx index 804a877a..6e1b8132 100644 --- a/src/components/organisms/RightSidePanel.tsx +++ b/src/components/organisms/RightSidePanel.tsx @@ -128,15 +128,15 @@ export const RightSidePanel = () => { // }; // }, [browserSteps]); - // const stopCaptureAndEmitGetListSettings = useCallback(() => { - // stopGetList(); - // const settings = getListSettingsObject(); - // if (settings) { - // socket?.emit('action', { action: 'scrapeList', settings }); - // } else { - // notify('error', 'Unable to create list settings. Make sure you have defined a field for the list.'); - // } - // }, [stopGetList, getListSettingsObject, socket, notify]); + const stopCaptureAndEmitGetListSettings = useCallback(() => { + stopGetList(); + const settings = getListSettingsObject(); + if (settings) { + socket?.emit('action', { action: 'scrapeList', settings }); + } else { + notify('error', 'Unable to create list settings. Make sure you have defined a field for the list.'); + } + }, [stopGetList, getListSettingsObject, socket, notify]); // const handleListFieldChange = (stepId: number, key: 'label' | 'data', value: string) => { // updateListStepField(stepId, key, value); @@ -167,7 +167,7 @@ export const RightSidePanel = () => { {getList && <> - + From 9504c7075c8100b162b08b083d2737b16a4c19c8 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sat, 10 Aug 2024 05:05:40 +0530 Subject: [PATCH 071/172] fix: -rm single listStep settings obj --- src/components/organisms/RightSidePanel.tsx | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/src/components/organisms/RightSidePanel.tsx b/src/components/organisms/RightSidePanel.tsx index 6e1b8132..ed5a5cbe 100644 --- a/src/components/organisms/RightSidePanel.tsx +++ b/src/components/organisms/RightSidePanel.tsx @@ -114,20 +114,6 @@ export const RightSidePanel = () => { }, [browserSteps]); - // const firstFieldKey = Object.keys(listStep.fields)[0]; - // const firstField = listStep.fields[firstFieldKey]; - - // return { - // listSelector: listStep.listSelector, - // fields: { - // [firstField.label]: { - // selector: firstField.selectorObj.selector, - // attribute: firstField.selectorObj.attribute || 'innerText' - // } - // } - // }; - // }, [browserSteps]); - const stopCaptureAndEmitGetListSettings = useCallback(() => { stopGetList(); const settings = getListSettingsObject(); From 38dbf5fba97bbc05cb6cfd19f9d2031a964fd6ac Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sat, 10 Aug 2024 05:06:09 +0530 Subject: [PATCH 072/172] chore: code cleanup --- src/components/organisms/RightSidePanel.tsx | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/components/organisms/RightSidePanel.tsx b/src/components/organisms/RightSidePanel.tsx index ed5a5cbe..7e42b129 100644 --- a/src/components/organisms/RightSidePanel.tsx +++ b/src/components/organisms/RightSidePanel.tsx @@ -98,9 +98,6 @@ export const RightSidePanel = () => { } }); - // settings.listSelector = step.listSelector; - // settings.fields = fields; - settings[step.listSelector] = { listSelector: step.listSelector, fields: fields From c712e7305c074f6413c10fc0b1ae1de221242d1a Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sat, 10 Aug 2024 05:06:43 +0530 Subject: [PATCH 073/172] chore: lint --- src/components/organisms/RightSidePanel.tsx | 50 ++++++++++----------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/src/components/organisms/RightSidePanel.tsx b/src/components/organisms/RightSidePanel.tsx index 7e42b129..c2b72b6f 100644 --- a/src/components/organisms/RightSidePanel.tsx +++ b/src/components/organisms/RightSidePanel.tsx @@ -83,43 +83,43 @@ export const RightSidePanel = () => { const getListSettingsObject = useCallback(() => { - const settings: Record }> = {}; + const settings: Record }> = {}; browserSteps.forEach(step => { - if (step.type === 'list' && step.listSelector && Object.keys(step.fields).length > 0) { - const fields: Record = {}; - Object.entries(step.fields).forEach(([label, field]) => { - if (field.selectorObj?.selector) { - fields[label] = { - selector: field.selectorObj.selector, - tag: field.selectorObj.tag, - attribute: field.selectorObj.attribute - }; - } - }); + if (step.type === 'list' && step.listSelector && Object.keys(step.fields).length > 0) { + const fields: Record = {}; + Object.entries(step.fields).forEach(([label, field]) => { + if (field.selectorObj?.selector) { + fields[label] = { + selector: field.selectorObj.selector, + tag: field.selectorObj.tag, + attribute: field.selectorObj.attribute + }; + } + }); - settings[step.listSelector] = { - listSelector: step.listSelector, - fields: fields - }; - } + settings[step.listSelector] = { + listSelector: step.listSelector, + fields: fields + }; + } }); console.log(`Setting LIST:`, settings) return settings; -}, [browserSteps]); + }, [browserSteps]); const stopCaptureAndEmitGetListSettings = useCallback(() => { - stopGetList(); - const settings = getListSettingsObject(); - if (settings) { - socket?.emit('action', { action: 'scrapeList', settings }); + stopGetList(); + const settings = getListSettingsObject(); + if (settings) { + socket?.emit('action', { action: 'scrapeList', settings }); } else { - notify('error', 'Unable to create list settings. Make sure you have defined a field for the list.'); - } - }, [stopGetList, getListSettingsObject, socket, notify]); + notify('error', 'Unable to create list settings. Make sure you have defined a field for the list.'); + } + }, [stopGetList, getListSettingsObject, socket, notify]); // const handleListFieldChange = (stepId: number, key: 'label' | 'data', value: string) => { // updateListStepField(stepId, key, value); From 49f057f3773dde61de9b962bdc49f3e196a7956d Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sat, 10 Aug 2024 05:08:51 +0530 Subject: [PATCH 074/172] chore: clean console logs --- src/components/organisms/RightSidePanel.tsx | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/components/organisms/RightSidePanel.tsx b/src/components/organisms/RightSidePanel.tsx index c2b72b6f..7913a4d0 100644 --- a/src/components/organisms/RightSidePanel.tsx +++ b/src/components/organisms/RightSidePanel.tsx @@ -62,7 +62,6 @@ export const RightSidePanel = () => { settings[step.label] = step.selectorObj; } }); - console.log(`seetings text:`, settings) return settings; }, [browserSteps]); @@ -105,8 +104,6 @@ export const RightSidePanel = () => { } }); - console.log(`Setting LIST:`, settings) - return settings; }, [browserSteps]); From 7006e1bde5e1a199894a23ef5b287f8358adb3c9 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sat, 10 Aug 2024 06:18:52 +0530 Subject: [PATCH 075/172] fix: rename to Field Data --- src/components/organisms/RightSidePanel.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/organisms/RightSidePanel.tsx b/src/components/organisms/RightSidePanel.tsx index 7913a4d0..11f80a63 100644 --- a/src/components/organisms/RightSidePanel.tsx +++ b/src/components/organisms/RightSidePanel.tsx @@ -258,7 +258,7 @@ export const RightSidePanel = () => { }} /> Date: Sat, 10 Aug 2024 06:23:44 +0530 Subject: [PATCH 076/172] fix: pass label instead of id --- src/components/organisms/BrowserWindow.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/organisms/BrowserWindow.tsx b/src/components/organisms/BrowserWindow.tsx index df080528..1e566932 100644 --- a/src/components/organisms/BrowserWindow.tsx +++ b/src/components/organisms/BrowserWindow.tsx @@ -162,7 +162,7 @@ export const BrowserWindow = () => { setFields(prevFields => { const updatedFields = { ...prevFields, - [newField.id]: newField + [newField.label]: newField }; return updatedFields; }); From 943b13e1b8982a4712b148f83e6ebfca00b68973 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sat, 10 Aug 2024 06:25:51 +0530 Subject: [PATCH 077/172] feat: use capital L --- src/components/organisms/BrowserWindow.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/organisms/BrowserWindow.tsx b/src/components/organisms/BrowserWindow.tsx index 1e566932..ef73eace 100644 --- a/src/components/organisms/BrowserWindow.tsx +++ b/src/components/organisms/BrowserWindow.tsx @@ -150,7 +150,7 @@ export const BrowserWindow = () => { const newField: TextStep = { id: Date.now(), type: 'text', - label: `label ${Object.keys(fields).length + 1}`, + label: `Label ${Object.keys(fields).length + 1}`, data: highlighterData.elementInfo?.innerText || '', selectorObj: { selector: highlighterData.selector, From 58864257d8d46616269d32dcd1900c66465e809d Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sat, 10 Aug 2024 06:27:03 +0530 Subject: [PATCH 078/172] fix: pass label instead of id --- src/components/organisms/BrowserWindow.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/organisms/BrowserWindow.tsx b/src/components/organisms/BrowserWindow.tsx index ef73eace..c7521852 100644 --- a/src/components/organisms/BrowserWindow.tsx +++ b/src/components/organisms/BrowserWindow.tsx @@ -168,7 +168,7 @@ export const BrowserWindow = () => { }); if (listSelector) { - addListStep(listSelector, { ...fields, [newField.id]: newField }); + addListStep(listSelector, { ...fields, [newField.label]: newField }); console.log('Called addListStep with:', { listSelector, updatedFields: { ...fields, [newField.id]: newField } }); } } From d66c3d04b8aae129000747668896d42019a40a70 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sat, 10 Aug 2024 06:29:53 +0530 Subject: [PATCH 079/172] chore: remove console logs --- src/components/organisms/BrowserWindow.tsx | 1 - 1 file changed, 1 deletion(-) diff --git a/src/components/organisms/BrowserWindow.tsx b/src/components/organisms/BrowserWindow.tsx index c7521852..ee2d3322 100644 --- a/src/components/organisms/BrowserWindow.tsx +++ b/src/components/organisms/BrowserWindow.tsx @@ -169,7 +169,6 @@ export const BrowserWindow = () => { if (listSelector) { addListStep(listSelector, { ...fields, [newField.label]: newField }); - console.log('Called addListStep with:', { listSelector, updatedFields: { ...fields, [newField.id]: newField } }); } } From 1e5909f0c4ba8742517a86aefd9f8121cec7496f Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sat, 10 Aug 2024 06:35:03 +0530 Subject: [PATCH 080/172] feat: list selection ui --- src/components/organisms/RightSidePanel.tsx | 16 +++------------- 1 file changed, 3 insertions(+), 13 deletions(-) diff --git a/src/components/organisms/RightSidePanel.tsx b/src/components/organisms/RightSidePanel.tsx index 11f80a63..44794a0b 100644 --- a/src/components/organisms/RightSidePanel.tsx +++ b/src/components/organisms/RightSidePanel.tsx @@ -228,19 +228,9 @@ export const RightSidePanel = () => { ) : ( step.type === 'list' && Object.keys(step.fields).length > 0 && ( <> - - - - ) - }} - /> + + List Selected Successfully + {Object.entries(step.fields).map(([key, field]) => ( Date: Sat, 10 Aug 2024 06:36:29 +0530 Subject: [PATCH 081/172] fix: use TextFieldsIcon for Data --- src/components/organisms/RightSidePanel.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/organisms/RightSidePanel.tsx b/src/components/organisms/RightSidePanel.tsx index 44794a0b..bf8508dd 100644 --- a/src/components/organisms/RightSidePanel.tsx +++ b/src/components/organisms/RightSidePanel.tsx @@ -256,7 +256,7 @@ export const RightSidePanel = () => { readOnly: true, startAdornment: ( - + ) }} From e37a97e45c9bcb454e688845a6b79624438bf1d7 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sat, 10 Aug 2024 06:37:18 +0530 Subject: [PATCH 082/172] feat: show list selection even if 0 fields --- src/components/organisms/RightSidePanel.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/organisms/RightSidePanel.tsx b/src/components/organisms/RightSidePanel.tsx index bf8508dd..51f0d082 100644 --- a/src/components/organisms/RightSidePanel.tsx +++ b/src/components/organisms/RightSidePanel.tsx @@ -226,7 +226,7 @@ export const RightSidePanel = () => { ) : ( - step.type === 'list' && Object.keys(step.fields).length > 0 && ( + step.type === 'list' && ( <> List Selected Successfully From 0ad6391d3f9e9ecd7132131608f0aa353072cec4 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sat, 10 Aug 2024 06:37:50 +0530 Subject: [PATCH 083/172] feat: check for field length --- src/components/organisms/RightSidePanel.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/organisms/RightSidePanel.tsx b/src/components/organisms/RightSidePanel.tsx index 51f0d082..59c79476 100644 --- a/src/components/organisms/RightSidePanel.tsx +++ b/src/components/organisms/RightSidePanel.tsx @@ -231,7 +231,7 @@ export const RightSidePanel = () => { List Selected Successfully - {Object.entries(step.fields).map(([key, field]) => ( + {Object.keys(step.fields).length > 0 && Object.entries(step.fields).map(([key, field]) => ( Date: Sat, 10 Aug 2024 06:40:45 +0530 Subject: [PATCH 084/172] feat: check listSelector val --- src/components/organisms/RightSidePanel.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/organisms/RightSidePanel.tsx b/src/components/organisms/RightSidePanel.tsx index 59c79476..ae6c6f9c 100644 --- a/src/components/organisms/RightSidePanel.tsx +++ b/src/components/organisms/RightSidePanel.tsx @@ -226,7 +226,7 @@ export const RightSidePanel = () => { ) : ( - step.type === 'list' && ( + step.type === 'list' && step.listSelector && ( <> List Selected Successfully From 680cd4d68ddd80fb1bae1ea91352ee81a2fb2f07 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sat, 10 Aug 2024 07:07:49 +0530 Subject: [PATCH 085/172] feat: better format for browser steps --- src/components/organisms/RightSidePanel.tsx | 103 +++++++++----------- 1 file changed, 47 insertions(+), 56 deletions(-) diff --git a/src/components/organisms/RightSidePanel.tsx b/src/components/organisms/RightSidePanel.tsx index ae6c6f9c..470fd845 100644 --- a/src/components/organisms/RightSidePanel.tsx +++ b/src/components/organisms/RightSidePanel.tsx @@ -177,7 +177,7 @@ export const RightSidePanel = () => { {browserSteps.map(step => ( { - step.type === 'text' ? ( + step.type === 'text' && ( <> { )} - ) : ( - step.type === 'screenshot' ? ( - - - - {`Take ${step.fullPage ? 'Fullpage' : 'Visible Part'} Screenshot`} - + )} + {step.type === 'screenshot' && ( + + + + {`Take ${step.fullPage ? 'Fullpage' : 'Visible Part'} Screenshot`} + + + )} + {step.type === 'list' && ( + <> + List Selected Successfully + {Object.entries(step.fields).map(([key, field]) => ( + + {}} + fullWidth + margin="normal" + InputProps={{ + startAdornment: ( + + + + ) + }} + /> + + + + ) + }} + /> - ) : ( - step.type === 'list' && step.listSelector && ( - <> - - List Selected Successfully - - {Object.keys(step.fields).length > 0 && Object.entries(step.fields).map(([key, field]) => ( - - { }} - fullWidth - margin="normal" - InputProps={{ - startAdornment: ( - - - - ) - }} - /> - - - - ) - }} - /> - - ))} - - ) - ) - ) - } + ))} + + )} ))} ); -}; - -export const ActionDescription = styled.p` - margin-left: 15px; -`; +}; \ No newline at end of file From 8bb8ea2afe21dded7510610b785b97ae4cf2d450 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sat, 10 Aug 2024 07:42:41 +0530 Subject: [PATCH 086/172] chore: lint --- src/components/organisms/RightSidePanel.tsx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/components/organisms/RightSidePanel.tsx b/src/components/organisms/RightSidePanel.tsx index 470fd845..ce934a9e 100644 --- a/src/components/organisms/RightSidePanel.tsx +++ b/src/components/organisms/RightSidePanel.tsx @@ -218,15 +218,15 @@ export const RightSidePanel = () => { )} )} - {step.type === 'screenshot' && ( + {step.type === 'screenshot' && ( {`Take ${step.fullPage ? 'Fullpage' : 'Visible Part'} Screenshot`} - )} - {step.type === 'list' && ( + )} + {step.type === 'list' && ( <> List Selected Successfully {Object.entries(step.fields).map(([key, field]) => ( @@ -234,7 +234,7 @@ export const RightSidePanel = () => { {}} + onChange={() => { }} fullWidth margin="normal" InputProps={{ From 47148e1863f7ad58b2978f88300f4261291f9da5 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sat, 10 Aug 2024 21:00:27 +0530 Subject: [PATCH 087/172] fix: correct structure for list settings --- src/components/organisms/RightSidePanel.tsx | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/components/organisms/RightSidePanel.tsx b/src/components/organisms/RightSidePanel.tsx index ce934a9e..9261085e 100644 --- a/src/components/organisms/RightSidePanel.tsx +++ b/src/components/organisms/RightSidePanel.tsx @@ -82,7 +82,7 @@ export const RightSidePanel = () => { const getListSettingsObject = useCallback(() => { - const settings: Record }> = {}; + let settings: { listSelector?: string; fields?: Record } = {}; browserSteps.forEach(step => { if (step.type === 'list' && step.listSelector && Object.keys(step.fields).length > 0) { @@ -97,10 +97,11 @@ export const RightSidePanel = () => { } }); - settings[step.listSelector] = { + settings = { listSelector: step.listSelector, fields: fields - }; + }; + } }); From 45df4655ae88a199be0827edc3f1f80f5222e107 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sat, 10 Aug 2024 21:25:17 +0530 Subject: [PATCH 088/172] chore: lint --- src/components/organisms/RightSidePanel.tsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/components/organisms/RightSidePanel.tsx b/src/components/organisms/RightSidePanel.tsx index 9261085e..154f1b55 100644 --- a/src/components/organisms/RightSidePanel.tsx +++ b/src/components/organisms/RightSidePanel.tsx @@ -82,7 +82,7 @@ export const RightSidePanel = () => { const getListSettingsObject = useCallback(() => { - let settings: { listSelector?: string; fields?: Record } = {}; + let settings: { listSelector?: string; fields?: Record } = {}; browserSteps.forEach(step => { if (step.type === 'list' && step.listSelector && Object.keys(step.fields).length > 0) { @@ -100,7 +100,7 @@ export const RightSidePanel = () => { settings = { listSelector: step.listSelector, fields: fields - }; + }; } }); From ac43411f5ab1fd8d197120dd2af7705cac94ab81 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sat, 10 Aug 2024 22:39:56 +0530 Subject: [PATCH 089/172] feat: attribute selection for list step --- src/components/organisms/BrowserWindow.tsx | 25 ++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/src/components/organisms/BrowserWindow.tsx b/src/components/organisms/BrowserWindow.tsx index ee2d3322..ec9bf4bc 100644 --- a/src/components/organisms/BrowserWindow.tsx +++ b/src/components/organisms/BrowserWindow.tsx @@ -198,6 +198,31 @@ export const BrowserWindow = () => { attribute: attribute }); } + if (getList === true) { + const newField: TextStep = { + id: Date.now(), + type: 'text', + label: `Label ${Object.keys(fields).length + 1}`, + data: selectedElement.info?.innerText || '', + selectorObj: { + selector: selectedElement.selector, + tag: selectedElement.info?.tagName, + attribute: 'innerText' + } + }; + + setFields(prevFields => { + const updatedFields = { + ...prevFields, + [newField.label]: newField + }; + return updatedFields; + }); + + if (listSelector) { + addListStep(listSelector, { ...fields, [newField.label]: newField }); + } + } } } setShowAttributeModal(false); From 9085202746ca5653e2c66631a51096e4ffca0670 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sat, 10 Aug 2024 23:16:09 +0530 Subject: [PATCH 090/172] fix: pass attribute instead of innerText --- src/components/organisms/BrowserWindow.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/organisms/BrowserWindow.tsx b/src/components/organisms/BrowserWindow.tsx index ec9bf4bc..daf95fee 100644 --- a/src/components/organisms/BrowserWindow.tsx +++ b/src/components/organisms/BrowserWindow.tsx @@ -207,7 +207,7 @@ export const BrowserWindow = () => { selectorObj: { selector: selectedElement.selector, tag: selectedElement.info?.tagName, - attribute: 'innerText' + attribute: attribute } }; From b9e45b2b7f86765768f871a905faf116324c3bd0 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sun, 11 Aug 2024 08:14:20 +0530 Subject: [PATCH 091/172] feat: add fields to existing list step --- src/context/browserSteps.tsx | 39 +++++++++++++++++++++++------------- 1 file changed, 25 insertions(+), 14 deletions(-) diff --git a/src/context/browserSteps.tsx b/src/context/browserSteps.tsx index 1e4263d7..8d855c3e 100644 --- a/src/context/browserSteps.tsx +++ b/src/context/browserSteps.tsx @@ -14,7 +14,7 @@ interface ScreenshotStep { fullPage: boolean; } -interface ListStep { +export interface ListStep { id: number; type: 'list'; listSelector: string; @@ -51,12 +51,31 @@ export const BrowserStepsProvider: React.FC<{ children: React.ReactNode }> = ({ ]); }; - const addListStep = (listSelector: string, fields: { [key: string]: TextStep }) => { - setBrowserSteps(prevSteps => [ - ...prevSteps, - { id: Date.now(), type: 'list', listSelector, fields } - ]); + const addListStep = (listSelector: string, newFields: { [key: string]: TextStep }) => { + setBrowserSteps(prevSteps => { + const existingListStepIndex = prevSteps.findIndex( + step => step.type === 'list' && step.listSelector === listSelector + ); + + if (existingListStepIndex !== -1) { + // Update the existing ListStep with new fields + const updatedSteps = [...prevSteps]; + const existingListStep = updatedSteps[existingListStepIndex] as ListStep; + updatedSteps[existingListStepIndex] = { + ...existingListStep, + fields: { ...existingListStep.fields, ...newFields } + }; + return updatedSteps; + } else { + // Create a new ListStep + return [ + ...prevSteps, + { id: Date.now(), type: 'list', listSelector, fields: newFields } + ]; + } + }); }; + const addScreenshotStep = (fullPage: boolean) => { setBrowserSteps(prevSteps => [ @@ -77,14 +96,6 @@ export const BrowserStepsProvider: React.FC<{ children: React.ReactNode }> = ({ ); }; - const updateListStep = (id: number, updatedListStep: ListStep) => { - setBrowserSteps(prevSteps => - prevSteps.map(step => - step.id === id && step.type === 'list' ? updatedListStep : step - ) - ); - }; - return ( Date: Sun, 11 Aug 2024 08:14:52 +0530 Subject: [PATCH 092/172] chore: lint --- src/context/browserSteps.tsx | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/context/browserSteps.tsx b/src/context/browserSteps.tsx index 8d855c3e..59cdb546 100644 --- a/src/context/browserSteps.tsx +++ b/src/context/browserSteps.tsx @@ -56,7 +56,6 @@ export const BrowserStepsProvider: React.FC<{ children: React.ReactNode }> = ({ const existingListStepIndex = prevSteps.findIndex( step => step.type === 'list' && step.listSelector === listSelector ); - if (existingListStepIndex !== -1) { // Update the existing ListStep with new fields const updatedSteps = [...prevSteps]; @@ -75,7 +74,7 @@ export const BrowserStepsProvider: React.FC<{ children: React.ReactNode }> = ({ } }); }; - + const addScreenshotStep = (fullPage: boolean) => { setBrowserSteps(prevSteps => [ From bd9105ed05de333f63a8aeeaad3ebb64fedc7e22 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sun, 11 Aug 2024 08:46:48 +0530 Subject: [PATCH 093/172] chore: add todo --- src/components/organisms/RightSidePanel.tsx | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/components/organisms/RightSidePanel.tsx b/src/components/organisms/RightSidePanel.tsx index 154f1b55..9a9fff51 100644 --- a/src/components/organisms/RightSidePanel.tsx +++ b/src/components/organisms/RightSidePanel.tsx @@ -13,6 +13,11 @@ import { useSocketStore } from '../../context/socket'; import { ScreenshotSettings } from '../../shared/types'; import InputAdornment from '@mui/material/InputAdornment'; +// TODO: +// 1. Handle field label update +// 2. Handle field deletion | confirmation +// 3. Add description for each browser step +// 4. Handle non custom action steps export const RightSidePanel = () => { const [textLabels, setTextLabels] = useState<{ [id: number]: string }>({}); From faa1d48a0850f90a978bf1c1c60468e25b3c1c37 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Mon, 12 Aug 2024 00:18:11 +0530 Subject: [PATCH 094/172] feat: scrollDown --- maxun-core/src/browserSide/scraper.js | 46 +++++++++++++++++++++++++-- 1 file changed, 43 insertions(+), 3 deletions(-) diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index 8528f529..0ee2b4a1 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -126,6 +126,25 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, return out; } +async function scrollDownToLoadMore(selector, limit) { + let previousHeight = 0; + let itemsLoaded = 0; + + while (itemsLoaded < limit) { + window.scrollBy(0, window.innerHeight); + await new Promise(resolve => setTimeout(resolve, 1000)); + + const currentHeight = document.body.scrollHeight; + + if (currentHeight === previousHeight) { + break; // No more items to load + } + + previousHeight = currentHeight; + itemsLoaded += document.querySelectorAll(selector).length; + } +} + /** * Returns a "scrape" result from the current page. * @returns {Array} *Curated* array of scraped information (with sparse rows removed) @@ -250,7 +269,6 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, )); } - /** * Scrapes multiple lists of similar items based on a template item. * @param {Object} config - Configuration object @@ -260,11 +278,33 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, * @param {boolean} [config.flexible=false] - Whether to use flexible matching for field selectors * @returns {Array.>} Array of arrays of scraped items, one sub-array per list */ - window.scrapeList = function (config) { - const { listSelector, fields, limit, flexible = false } = config; + window.scrapeList = async function (config) { + const { listSelector, fields, limit, flexible = false, pagination } = config; const lists = Array.from(document.querySelectorAll(listSelector)); + if (pagination) { + const { type, selector } = pagination; + + switch (type) { + case 'scrollDown': + await scrollDownToLoadMore(pagination.selector, config.limit); + break; + // case 'scrollUp': + // await scrollUpToLoadMore(limit); + // break; + // case 'clickNext': + // if (selector) await clickNextToNavigate(selector, limit); + // break; + // case 'clickLoadMore': + // if (selector) await clickLoadMore(selector, limit); + // break; + default: + // No pagination or different handling + break; + } + } + return lists.map(list => { const listItems = Array.from(list.children); From 871d4fd48a8eb1f2cf500e515724cfc31cd478ba Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Mon, 12 Aug 2024 05:26:15 +0530 Subject: [PATCH 095/172] feat: scrapeListAuto --- maxun-core/src/browserSide/scraper.js | 71 +++++++++++++++++++++++---- 1 file changed, 61 insertions(+), 10 deletions(-) diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index 0ee2b4a1..6d42b43a 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -317,15 +317,18 @@ async function scrollDownToLoadMore(selector, limit) { for (const [fieldName, fieldConfig] of Object.entries(fields)) { let element; - if (flexible) { - // try multiple strategies to find the element - element = item.querySelector(fieldConfig.selector) || - item.querySelector(`[class*="${fieldConfig.selector}"]`) || - Array.from(item.querySelectorAll('*')) - .find(el => el.textContent.trim() === fieldConfig.selector); - } else { - element = item.querySelector(fieldConfig.selector); - } + // if (flexible) { + // // try multiple strategies to find the element + // element = item.querySelector(fieldConfig.selector) || + // item.querySelector(`[class*="${fieldConfig.selector}"]`) || + // Array.from(item.querySelectorAll('*')) + // .find(el => el.textContent.trim() === fieldConfig.selector); + // } else { + element = item.querySelector(fieldConfig.selector); + + console.debug('Element:', element); + + // } if (element) { switch (fieldConfig.attribute) { @@ -344,7 +347,8 @@ async function scrollDownToLoadMore(selector, limit) { break; } } else { - scrapedItem[fieldName] = null; + // send a message that says it failed + scrapedItem[fieldName] = `Failed to scrape ${fieldName}`; } } @@ -353,4 +357,51 @@ async function scrollDownToLoadMore(selector, limit) { }); }; + + /** + * Gets all children of the elements matching the listSelector, + * returning their CSS selectors and innerText. + * @param {string} listSelector - Selector for the list container(s) + * @returns {Array.} Array of objects, each containing the CSS selector and innerText of the children + */ +window.scrapeListAuto = function (listSelector) { + const lists = Array.from(document.querySelectorAll(listSelector)); + + const results = []; + + lists.forEach(list => { + const children = Array.from(list.children); + + children.forEach(child => { + const selectors = []; + let element = child; + + // Traverse up to gather the CSS selector for the element + while (element && element !== document) { + let selector = element.nodeName.toLowerCase(); + if (element.id) { + selector += `#${element.id}`; + selectors.push(selector); + break; + } else { + const className = element.className.trim().split(/\s+/).join('.'); + if (className) { + selector += `.${className}`; + } + selectors.push(selector); + element = element.parentElement; + } + } + + results.push({ + selector: selectors.reverse().join(' > '), + innerText: child.innerText.trim() + }); + }); + }); + + return results; +}; + + })(window); \ No newline at end of file From 196cfd4ccacb1158b5148a34dab5154028055456 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Mon, 12 Aug 2024 05:30:57 +0530 Subject: [PATCH 096/172] chore: lint --- maxun-core/src/browserSide/scraper.js | 68 +++++++++++++-------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index 6d42b43a..e2f39560 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -279,13 +279,13 @@ async function scrollDownToLoadMore(selector, limit) { * @returns {Array.>} Array of arrays of scraped items, one sub-array per list */ window.scrapeList = async function (config) { - const { listSelector, fields, limit, flexible = false, pagination } = config; + const { listSelector, fields, limit, flexible = false, pagination } = config; const lists = Array.from(document.querySelectorAll(listSelector)); if (pagination) { const { type, selector } = pagination; - + switch (type) { case 'scrollDown': await scrollDownToLoadMore(pagination.selector, config.limit); @@ -364,44 +364,44 @@ async function scrollDownToLoadMore(selector, limit) { * @param {string} listSelector - Selector for the list container(s) * @returns {Array.} Array of objects, each containing the CSS selector and innerText of the children */ -window.scrapeListAuto = function (listSelector) { - const lists = Array.from(document.querySelectorAll(listSelector)); - - const results = []; + window.scrapeListAuto = function (listSelector) { + const lists = Array.from(document.querySelectorAll(listSelector)); - lists.forEach(list => { - const children = Array.from(list.children); - - children.forEach(child => { - const selectors = []; - let element = child; - - // Traverse up to gather the CSS selector for the element - while (element && element !== document) { - let selector = element.nodeName.toLowerCase(); - if (element.id) { - selector += `#${element.id}`; - selectors.push(selector); - break; - } else { - const className = element.className.trim().split(/\s+/).join('.'); - if (className) { - selector += `.${className}`; + const results = []; + + lists.forEach(list => { + const children = Array.from(list.children); + + children.forEach(child => { + const selectors = []; + let element = child; + + // Traverse up to gather the CSS selector for the element + while (element && element !== document) { + let selector = element.nodeName.toLowerCase(); + if (element.id) { + selector += `#${element.id}`; + selectors.push(selector); + break; + } else { + const className = element.className.trim().split(/\s+/).join('.'); + if (className) { + selector += `.${className}`; + } + selectors.push(selector); + element = element.parentElement; } - selectors.push(selector); - element = element.parentElement; } - } - - results.push({ - selector: selectors.reverse().join(' > '), - innerText: child.innerText.trim() + + results.push({ + selector: selectors.reverse().join(' > '), + innerText: child.innerText.trim() + }); }); }); - }); - return results; -}; + return results; + }; })(window); \ No newline at end of file From 2bd4bc9ff146674e8c9a0aa4adf200dfa8c7f008 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Mon, 12 Aug 2024 06:28:08 +0530 Subject: [PATCH 097/172] feat: use scrapeListAuto --- maxun-core/src/interpret.ts | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 506c7bd7..32885c63 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -291,12 +291,22 @@ export default class Interpreter extends EventEmitter { await this.options.serializableCallback(scrapeResult); }, - scrapeList: async (config: { listSelector: string, fields: any, limit?: number, flexible?: boolean }) => { + scrapeList: async (config: { listSelector: string, fields: any, limit?: number, flexible?: boolean, pagination:any }) => { await this.ensureScriptsLoaded(page); const scrapeResults: Record[] = await page.evaluate((cfg) => window.scrapeList(cfg), config); await this.options.serializableCallback(scrapeResults); }, + scrapeListAuto: async (config: { listSelector: string }) => { + await this.ensureScriptsLoaded(page); + + const scrapeResults: { selector: string, innerText: string }[] = await page.evaluate((listSelector) => { + return window.scrapeListAuto(listSelector); + }, config.listSelector); + + await this.options.serializableCallback(scrapeResults); + }, + scroll: async (pages?: number) => { await page.evaluate(async (pagesInternal) => { for (let i = 1; i <= (pagesInternal ?? 1); i += 1) { @@ -417,7 +427,7 @@ export default class Interpreter extends EventEmitter { } private async ensureScriptsLoaded(page: Page) { - const isScriptLoaded = await page.evaluate(() => typeof window.scrape === 'function' && typeof window.scrapeSchema === 'function' && typeof window.scrapeList === 'function'); + const isScriptLoaded = await page.evaluate(() => typeof window.scrape === 'function' && typeof window.scrapeSchema === 'function' && typeof window.scrapeList === 'function' && typeof window.scrapeListAuto === 'function'); if (!isScriptLoaded) { await page.addInitScript({ path: path.join(__dirname, 'browserSide', 'scraper.js') }); } From 286a39662dd51e6c3e60009b97fc6311f397fc55 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Mon, 12 Aug 2024 06:28:33 +0530 Subject: [PATCH 098/172] feat: add scrapeListAuto to custom actions --- maxun-core/src/types/workflow.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/maxun-core/src/types/workflow.ts b/maxun-core/src/types/workflow.ts index 459cfa88..f7cf180d 100644 --- a/maxun-core/src/types/workflow.ts +++ b/maxun-core/src/types/workflow.ts @@ -28,7 +28,7 @@ type MethodNames = { [K in keyof T]: T[K] extends Function ? K : never; }[keyof T]; -export type CustomFunctions = 'scrape' | 'scrapeSchema' | 'scroll' | 'screenshot' | 'script' | 'enqueueLinks' | 'flag' | 'scrapeList'; +export type CustomFunctions = 'scrape' | 'scrapeSchema' | 'scroll' | 'screenshot' | 'script' | 'enqueueLinks' | 'flag' | 'scrapeList' | 'scrapeListAuto'; export type What = { action: MethodNames | CustomFunctions, From 6cafc9ef2d337d7de82cba36909fae386c538e91 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Mon, 12 Aug 2024 06:35:00 +0530 Subject: [PATCH 099/172] chore: lint --- maxun-core/src/interpret.ts | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 32885c63..1e62566e 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -286,12 +286,12 @@ export default class Interpreter extends EventEmitter { scrapeSchema: async (schema: Record) => { await this.ensureScriptsLoaded(page); - + const scrapeResult = await page.evaluate((schemaObj) => window.scrapeSchema(schemaObj), schema); await this.options.serializableCallback(scrapeResult); }, - scrapeList: async (config: { listSelector: string, fields: any, limit?: number, flexible?: boolean, pagination:any }) => { + scrapeList: async (config: { listSelector: string, fields: any, limit?: number, flexible?: boolean, pagination: any }) => { await this.ensureScriptsLoaded(page); const scrapeResults: Record[] = await page.evaluate((cfg) => window.scrapeList(cfg), config); await this.options.serializableCallback(scrapeResults); @@ -299,11 +299,11 @@ export default class Interpreter extends EventEmitter { scrapeListAuto: async (config: { listSelector: string }) => { await this.ensureScriptsLoaded(page); - + const scrapeResults: { selector: string, innerText: string }[] = await page.evaluate((listSelector) => { return window.scrapeListAuto(listSelector); }, config.listSelector); - + await this.options.serializableCallback(scrapeResults); }, @@ -315,6 +315,7 @@ export default class Interpreter extends EventEmitter { } }, pages ?? 1); }, + script: async (code: string) => { const AsyncFunction: FunctionConstructor = Object.getPrototypeOf( async () => { }, @@ -322,6 +323,7 @@ export default class Interpreter extends EventEmitter { const x = new AsyncFunction('page', 'log', code); await x(page, this.log); }, + flag: async () => new Promise((res) => { this.emit('flag', page, res); }), From 7c2ff6c2c913daa5b7a82cc3dab0ce6688942065 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Mon, 12 Aug 2024 18:24:27 +0530 Subject: [PATCH 100/172] feat: exclude utlitly classes + esc spc. chars --- server/src/workflow-management/selector.ts | 28 +++++++++++++--------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/server/src/workflow-management/selector.ts b/server/src/workflow-management/selector.ts index eccd9b23..00a19f40 100644 --- a/server/src/workflow-management/selector.ts +++ b/server/src/workflow-management/selector.ts @@ -730,35 +730,39 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => { * @returns {Promise} */ -export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates) => { +export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates) => { try { - const selectors = await page.evaluate(({ x, y }) => { - function getSelector(element: any) { + const selectors = await page.evaluate(({ x, y }: { x: number, y: number }) => { + + function getNonUniqueSelector(element: HTMLElement): string { let selector = element.tagName.toLowerCase(); - // Capture a single, relevant class if present + // Avoid using IDs to maintain non-uniqueness if (element.className) { - const classes = element.className.split(/\s+/).filter(Boolean); + const classes = element.className.split(/\s+/).filter((cls: string) => Boolean(cls)); if (classes.length > 0) { - // Use only the first class to avoid over-specificity - selector += '.' + classes[0]; + // Exclude utility classes and escape special characters + const validClasses = classes.filter((cls: string) => !cls.startsWith('!') && !cls.includes(':')); + if (validClasses.length > 0) { + selector += '.' + validClasses.map(cls => CSS.escape(cls)).join('.'); + } } } return selector; } - function getSelectorPath(element: any) { - const path = []; + function getSelectorPath(element: HTMLElement | null): string { + const path: string[] = []; while (element && element !== document.body) { - const selector = getSelector(element); + const selector = getNonUniqueSelector(element); path.unshift(selector); element = element.parentElement; } return path.join(' > '); } - const element = document.elementFromPoint(x, y); + const element = document.elementFromPoint(x, y) as HTMLElement | null; if (!element) return null; const generalSelector = getSelectorPath(element); @@ -775,6 +779,8 @@ export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinate }; + + /** * Returns the first pair from the given workflow that contains the given selector * inside the where condition, and it is the only selector there. From dca706a4df34fb6306b527daee70a5c21dbdb5e9 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Mon, 12 Aug 2024 20:17:17 +0530 Subject: [PATCH 101/172] fix: remove flexible logic --- maxun-core/src/browserSide/scraper.js | 8 -------- 1 file changed, 8 deletions(-) diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index e2f39560..789f9919 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -316,14 +316,6 @@ async function scrollDownToLoadMore(selector, limit) { for (const [fieldName, fieldConfig] of Object.entries(fields)) { let element; - - // if (flexible) { - // // try multiple strategies to find the element - // element = item.querySelector(fieldConfig.selector) || - // item.querySelector(`[class*="${fieldConfig.selector}"]`) || - // Array.from(item.querySelectorAll('*')) - // .find(el => el.textContent.trim() === fieldConfig.selector); - // } else { element = item.querySelector(fieldConfig.selector); console.debug('Element:', element); From 91983980f09285d3d8834aa256f4efeab64e8930 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Tue, 13 Aug 2024 22:19:27 +0530 Subject: [PATCH 102/172] feat: scrapeList --- maxun-core/src/browserSide/scraper.js | 92 +++++++++------------------ 1 file changed, 29 insertions(+), 63 deletions(-) diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index 789f9919..80dd3703 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -278,76 +278,42 @@ async function scrollDownToLoadMore(selector, limit) { * @param {boolean} [config.flexible=false] - Whether to use flexible matching for field selectors * @returns {Array.>} Array of arrays of scraped items, one sub-array per list */ - window.scrapeList = async function (config) { - const { listSelector, fields, limit, flexible = false, pagination } = config; + window.scrapeList = function({ listSelector, fields }) { + // Get all parent elements matching the listSelector + const parentElements = Array.from(document.querySelectorAll(listSelector)); - const lists = Array.from(document.querySelectorAll(listSelector)); + const scrapedData = []; - if (pagination) { - const { type, selector } = pagination; + // Iterate through each parent element + parentElements.forEach(parent => { + const record = {}; - switch (type) { - case 'scrollDown': - await scrollDownToLoadMore(pagination.selector, config.limit); - break; - // case 'scrollUp': - // await scrollUpToLoadMore(limit); - // break; - // case 'clickNext': - // if (selector) await clickNextToNavigate(selector, limit); - // break; - // case 'clickLoadMore': - // if (selector) await clickLoadMore(selector, limit); - // break; - default: - // No pagination or different handling - break; - } - } + // For each field, select the corresponding element within the parent + for (const [label, { selector, attribute }] of Object.entries(fields)) { + const fieldElement = parent.querySelector(selector); - return lists.map(list => { - const listItems = Array.from(list.children); - - const itemsToScrape = limit ? listItems.slice(0, limit) : listItems; - - // scrape each item - return itemsToScrape.map(item => { - const scrapedItem = {}; - - for (const [fieldName, fieldConfig] of Object.entries(fields)) { - let element; - element = item.querySelector(fieldConfig.selector); - - console.debug('Element:', element); - - // } - - if (element) { - switch (fieldConfig.attribute) { - case 'href': - scrapedItem[fieldName] = element.getAttribute('href'); - break; - case 'src': - scrapedItem[fieldName] = element.getAttribute('src'); - break; - case 'textContent': - scrapedItem[fieldName] = element.textContent.trim(); - break; - case 'innerText': - default: - scrapedItem[fieldName] = element.innerText.trim(); - break; + // Depending on the attribute specified, extract the data + if (fieldElement) { + if (attribute === 'innerText') { + record[label] = fieldElement.innerText.trim(); + } else if (attribute === 'innerHTML') { + record[label] = fieldElement.innerHTML.trim(); + } else if (attribute === 'src') { + record[label] = fieldElement.src; + } else if (attribute === 'href') { + record[label] = fieldElement.href; + } else { + // Default to attribute retrieval + record[label] = fieldElement.getAttribute(attribute); + } } - } else { - // send a message that says it failed - scrapedItem[fieldName] = `Failed to scrape ${fieldName}`; - } } - - return scrapedItem; - }); + scrapedData.push(record); }); - }; + + return scrapedData; +}; + /** From 5874f297b1aefec219658034b51cdbd06d5847a5 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Tue, 13 Aug 2024 22:20:05 +0530 Subject: [PATCH 103/172] chore: remove whitespace --- maxun-core/src/browserSide/scraper.js | 3 --- 1 file changed, 3 deletions(-) diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index 80dd3703..c9215fda 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -310,12 +310,9 @@ async function scrollDownToLoadMore(selector, limit) { } scrapedData.push(record); }); - return scrapedData; }; - - /** * Gets all children of the elements matching the listSelector, * returning their CSS selectors and innerText. From 00d32571fbd3dad5e7d3fa50504ffb10e0a3bc5b Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Tue, 13 Aug 2024 22:21:17 +0530 Subject: [PATCH 104/172] chore: lint --- maxun-core/src/browserSide/scraper.js | 45 +++++++++++++-------------- 1 file changed, 22 insertions(+), 23 deletions(-) diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index c9215fda..244613f5 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -278,7 +278,7 @@ async function scrollDownToLoadMore(selector, limit) { * @param {boolean} [config.flexible=false] - Whether to use flexible matching for field selectors * @returns {Array.>} Array of arrays of scraped items, one sub-array per list */ - window.scrapeList = function({ listSelector, fields }) { + window.scrapeList = function ({ listSelector, fields }) { // Get all parent elements matching the listSelector const parentElements = Array.from(document.querySelectorAll(listSelector)); @@ -286,32 +286,32 @@ async function scrollDownToLoadMore(selector, limit) { // Iterate through each parent element parentElements.forEach(parent => { - const record = {}; + const record = {}; - // For each field, select the corresponding element within the parent - for (const [label, { selector, attribute }] of Object.entries(fields)) { - const fieldElement = parent.querySelector(selector); + // For each field, select the corresponding element within the parent + for (const [label, { selector, attribute }] of Object.entries(fields)) { + const fieldElement = parent.querySelector(selector); - // Depending on the attribute specified, extract the data - if (fieldElement) { - if (attribute === 'innerText') { - record[label] = fieldElement.innerText.trim(); - } else if (attribute === 'innerHTML') { - record[label] = fieldElement.innerHTML.trim(); - } else if (attribute === 'src') { - record[label] = fieldElement.src; - } else if (attribute === 'href') { - record[label] = fieldElement.href; - } else { - // Default to attribute retrieval - record[label] = fieldElement.getAttribute(attribute); - } - } + // Depending on the attribute specified, extract the data + if (fieldElement) { + if (attribute === 'innerText') { + record[label] = fieldElement.innerText.trim(); + } else if (attribute === 'innerHTML') { + record[label] = fieldElement.innerHTML.trim(); + } else if (attribute === 'src') { + record[label] = fieldElement.src; + } else if (attribute === 'href') { + record[label] = fieldElement.href; + } else { + // Default to attribute retrieval + record[label] = fieldElement.getAttribute(attribute); + } } - scrapedData.push(record); + } + scrapedData.push(record); }); return scrapedData; -}; + }; /** * Gets all children of the elements matching the listSelector, @@ -358,5 +358,4 @@ async function scrollDownToLoadMore(selector, limit) { return results; }; - })(window); \ No newline at end of file From 448ffbfc0565875a41c77e86060f03c086e5ac0d Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Tue, 13 Aug 2024 23:49:10 +0530 Subject: [PATCH 105/172] feat: limit no. of records scraped --- maxun-core/src/browserSide/scraper.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index 244613f5..24e8f651 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -278,9 +278,9 @@ async function scrollDownToLoadMore(selector, limit) { * @param {boolean} [config.flexible=false] - Whether to use flexible matching for field selectors * @returns {Array.>} Array of arrays of scraped items, one sub-array per list */ - window.scrapeList = function ({ listSelector, fields }) { + window.scrapeList = function ({ listSelector, fields, limit = 10 }) { // Get all parent elements matching the listSelector - const parentElements = Array.from(document.querySelectorAll(listSelector)); + const parentElements = Array.from(document.querySelectorAll(listSelector)).slice(0, limit); const scrapedData = []; From 375590fc0da43c3eba267721b96e90b0ae3e27bf Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Tue, 13 Aug 2024 23:56:27 +0530 Subject: [PATCH 106/172] feat: add pagination param --- maxun-core/src/browserSide/scraper.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index 24e8f651..e36af779 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -278,7 +278,7 @@ async function scrollDownToLoadMore(selector, limit) { * @param {boolean} [config.flexible=false] - Whether to use flexible matching for field selectors * @returns {Array.>} Array of arrays of scraped items, one sub-array per list */ - window.scrapeList = function ({ listSelector, fields, limit = 10 }) { + window.scrapeList = function ({ listSelector, fields, limit = 10, pagination }) { // Get all parent elements matching the listSelector const parentElements = Array.from(document.querySelectorAll(listSelector)).slice(0, limit); From 2407c79dd2a0d385940215c1b1d23783d1771dd1 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 14 Aug 2024 05:58:57 +0530 Subject: [PATCH 107/172] feat: handle pagination types --- maxun-core/src/browserSide/scraper.js | 92 ++++++++++++++++++--------- 1 file changed, 63 insertions(+), 29 deletions(-) diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index e36af779..17962f0e 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -278,40 +278,74 @@ async function scrollDownToLoadMore(selector, limit) { * @param {boolean} [config.flexible=false] - Whether to use flexible matching for field selectors * @returns {Array.>} Array of arrays of scraped items, one sub-array per list */ - window.scrapeList = function ({ listSelector, fields, limit = 10, pagination }) { - // Get all parent elements matching the listSelector - const parentElements = Array.from(document.querySelectorAll(listSelector)).slice(0, limit); - + window.scrapeList = async function({ listSelector, fields, limit = 10, pagination = null }) { const scrapedData = []; - // Iterate through each parent element - parentElements.forEach(parent => { - const record = {}; + while (scrapedData.length < limit) { + // Get all parent elements matching the listSelector + const parentElements = Array.from(document.querySelectorAll(listSelector)); - // For each field, select the corresponding element within the parent - for (const [label, { selector, attribute }] of Object.entries(fields)) { - const fieldElement = parent.querySelector(selector); + // Iterate through each parent element + for (const parent of parentElements) { + if (scrapedData.length >= limit) break; + const record = {}; - // Depending on the attribute specified, extract the data - if (fieldElement) { - if (attribute === 'innerText') { - record[label] = fieldElement.innerText.trim(); - } else if (attribute === 'innerHTML') { - record[label] = fieldElement.innerHTML.trim(); - } else if (attribute === 'src') { - record[label] = fieldElement.src; - } else if (attribute === 'href') { - record[label] = fieldElement.href; - } else { - // Default to attribute retrieval - record[label] = fieldElement.getAttribute(attribute); - } + // For each field, select the corresponding element within the parent + for (const [label, { selector, attribute }] of Object.entries(fields)) { + const fieldElement = parent.querySelector(selector); + + // Depending on the attribute specified, extract the data + if (fieldElement) { + if (attribute === 'innerText') { + record[label] = fieldElement.innerText.trim(); + } else if (attribute === 'innerHTML') { + record[label] = fieldElement.innerHTML.trim(); + } else if (attribute === 'src') { + record[label] = fieldElement.src; + } else if (attribute === 'href') { + record[label] = fieldElement.href; + } else { + // Default to attribute retrieval + record[label] = fieldElement.getAttribute(attribute); + } + } + } + + // Add the record to the scrapedData array + scrapedData.push(record); } - } - scrapedData.push(record); - }); - return scrapedData; - }; + + // Check if we need to paginate + if (pagination && scrapedData.length < limit) { + switch (pagination.type) { + case 'scrollDown': + //await scrollDownPagination(); + break; + case 'scrollUp': + //await scrollUpPagination(); + break; + case 'clickNext': + //await clickNextPagination(pagination.selector); + break; + case 'clickLoadMore': + //await clickLoadMorePagination(pagination.selector); + break; + case 'none': + // No more items to load + break; + default: + console.warn("Unknown pagination type"); + break; + } + await new Promise(resolve => setTimeout(resolve, 2000)); // Wait for content to load + } else { + break; // No more items to load or no pagination + } + } + + return scrapedData.slice(0, limit); // Return only the limited number of records +}; + /** * Gets all children of the elements matching the listSelector, From 960a59176516127ace222f3c90e6620b4019cdf0 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 14 Aug 2024 06:28:55 +0530 Subject: [PATCH 108/172] feat: scroll down pagination --- maxun-core/src/browserSide/scraper.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index 17962f0e..daf86cd0 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -319,7 +319,7 @@ async function scrollDownToLoadMore(selector, limit) { if (pagination && scrapedData.length < limit) { switch (pagination.type) { case 'scrollDown': - //await scrollDownPagination(); + await scrollDownToLoadMore(listSelector, limit); break; case 'scrollUp': //await scrollUpPagination(); From 5828d512b94fb6771989097547df91359f351eb3 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 14 Aug 2024 06:29:17 +0530 Subject: [PATCH 109/172] chore: remove comment --- maxun-core/src/browserSide/scraper.js | 1 - 1 file changed, 1 deletion(-) diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index daf86cd0..d721caff 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -315,7 +315,6 @@ async function scrollDownToLoadMore(selector, limit) { scrapedData.push(record); } - // Check if we need to paginate if (pagination && scrapedData.length < limit) { switch (pagination.type) { case 'scrollDown': From 1c0b6e4db1b015ba232e3f8dd650dc051c9b475b Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 14 Aug 2024 06:32:42 +0530 Subject: [PATCH 110/172] feat: scroll up to load more items --- maxun-core/src/browserSide/scraper.js | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index d721caff..0ba8cbf8 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -145,6 +145,25 @@ async function scrollDownToLoadMore(selector, limit) { } } +async function scrollUpToLoadMore(selector, limit) { + let previousHeight = 0; + let itemsLoaded = 0; + + while (itemsLoaded < limit) { + window.scrollBy(0, -window.innerHeight); + await new Promise(resolve => setTimeout(resolve, 1000)); + + const currentHeight = document.body.scrollHeight; + + if (currentHeight === previousHeight) { + break; // No more items to load + } + + previousHeight = currentHeight; + itemsLoaded += document.querySelectorAll(selector).length; + } +} + /** * Returns a "scrape" result from the current page. * @returns {Array} *Curated* array of scraped information (with sparse rows removed) From 422b357e1c3bc1737af7a34f8ec4fb3deac8fafb Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 14 Aug 2024 06:33:26 +0530 Subject: [PATCH 111/172] feat: scroll up --- maxun-core/src/browserSide/scraper.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index 0ba8cbf8..7da38aa9 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -340,7 +340,7 @@ async function scrollUpToLoadMore(selector, limit) { await scrollDownToLoadMore(listSelector, limit); break; case 'scrollUp': - //await scrollUpPagination(); + await scrollUpToLoadMore(listSelector, limit); break; case 'clickNext': //await clickNextPagination(pagination.selector); From 2ef2b2e814da2bf6f46e0d92a0cbd3725965b5a8 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 14 Aug 2024 08:25:20 +0530 Subject: [PATCH 112/172] feat: click next pagination --- maxun-core/src/browserSide/scraper.js | 43 ++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index 7da38aa9..731b929a 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -164,6 +164,47 @@ async function scrollUpToLoadMore(selector, limit) { } } +async function clickNextPagination(selector, scrapedData, limit) { + // Check if the limit is already met + if (scrapedData.length >= limit) { + return false; // Return false to indicate no further action is needed + } + + // Check if a single "Next" button exists + let nextButton = document.querySelector(selector); + + if (nextButton) { + nextButton.click(); + return true; // Indicate that pagination occurred + } else { + // Handle pagination with numbers + const paginationButtons = document.querySelectorAll(selector); + let clicked = false; + + // Loop through pagination buttons to find the current active page + for (let i = 0; i < paginationButtons.length - 1; i++) { + const button = paginationButtons[i]; + if (button.classList.contains('active')) { + // Click the next button if available + const nextButtonInPagination = paginationButtons[i + 1]; + if (nextButtonInPagination) { + nextButtonInPagination.click(); + clicked = true; + break; + } + } + } + + // If no next button was clicked, we might be on the last page + if (!clicked) { + throw new Error("No more items to load or pagination has ended."); + } + + //return clicked; // Indicate whether pagination occurred + } +} + + /** * Returns a "scrape" result from the current page. * @returns {Array} *Curated* array of scraped information (with sparse rows removed) @@ -343,7 +384,7 @@ async function scrollUpToLoadMore(selector, limit) { await scrollUpToLoadMore(listSelector, limit); break; case 'clickNext': - //await clickNextPagination(pagination.selector); + await clickNextPagination(pagination.selector); break; case 'clickLoadMore': //await clickLoadMorePagination(pagination.selector); From d48f0477424e2842cdbf61bd1598069efc2585aa Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Thu, 15 Aug 2024 21:30:34 +0530 Subject: [PATCH 113/172] chroe: addtodo --- maxun-core/src/browserSide/scraper.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index 731b929a..3ed9cff1 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -262,6 +262,7 @@ async function clickNextPagination(selector, scrapedData, limit) { }; /** + * TODO: Simplify. * Given an object with named lists of elements, * groups the elements by their distance in the DOM tree. * @param {Object.} lists The named lists of HTML elements. @@ -384,7 +385,7 @@ async function clickNextPagination(selector, scrapedData, limit) { await scrollUpToLoadMore(listSelector, limit); break; case 'clickNext': - await clickNextPagination(pagination.selector); + await clickNextPagination(pagination.selector, scrapedData, limit); break; case 'clickLoadMore': //await clickLoadMorePagination(pagination.selector); From 87476c3e355dee2e6e1505771336142811987b72 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Fri, 16 Aug 2024 18:21:34 +0530 Subject: [PATCH 114/172] fix: remove flexible param --- maxun-core/src/interpret.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 1e62566e..44dddf8d 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -291,7 +291,7 @@ export default class Interpreter extends EventEmitter { await this.options.serializableCallback(scrapeResult); }, - scrapeList: async (config: { listSelector: string, fields: any, limit?: number, flexible?: boolean, pagination: any }) => { + scrapeList: async (config: { listSelector: string, fields: any, limit?: number, pagination: any }) => { await this.ensureScriptsLoaded(page); const scrapeResults: Record[] = await page.evaluate((cfg) => window.scrapeList(cfg), config); await this.options.serializableCallback(scrapeResults); From b2678759dbcd3a56ad341405a220db392033b791 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Fri, 16 Aug 2024 23:43:51 +0530 Subject: [PATCH 115/172] chore: lint --- maxun-core/src/browserSide/scraper.js | 159 ++++++++++++++------------ 1 file changed, 84 insertions(+), 75 deletions(-) diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index 3ed9cff1..331904ce 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -167,40 +167,40 @@ async function scrollUpToLoadMore(selector, limit) { async function clickNextPagination(selector, scrapedData, limit) { // Check if the limit is already met if (scrapedData.length >= limit) { - return false; // Return false to indicate no further action is needed + return false; // Return false to indicate no further action is needed } // Check if a single "Next" button exists let nextButton = document.querySelector(selector); if (nextButton) { - nextButton.click(); - return true; // Indicate that pagination occurred + nextButton.click(); + return true; // Indicate that pagination occurred } else { - // Handle pagination with numbers - const paginationButtons = document.querySelectorAll(selector); - let clicked = false; + // Handle pagination with numbers + const paginationButtons = document.querySelectorAll(selector); + let clicked = false; - // Loop through pagination buttons to find the current active page - for (let i = 0; i < paginationButtons.length - 1; i++) { - const button = paginationButtons[i]; - if (button.classList.contains('active')) { - // Click the next button if available - const nextButtonInPagination = paginationButtons[i + 1]; - if (nextButtonInPagination) { - nextButtonInPagination.click(); - clicked = true; - break; - } - } + // Loop through pagination buttons to find the current active page + for (let i = 0; i < paginationButtons.length - 1; i++) { + const button = paginationButtons[i]; + if (button.classList.contains('active')) { + // Click the next button if available + const nextButtonInPagination = paginationButtons[i + 1]; + if (nextButtonInPagination) { + nextButtonInPagination.click(); + clicked = true; + break; + } } + } - // If no next button was clicked, we might be on the last page - if (!clicked) { - throw new Error("No more items to load or pagination has ended."); - } + // If no next button was clicked, we might be on the last page + if (!clicked) { + throw new Error("No more items to load or pagination has ended."); + } - //return clicked; // Indicate whether pagination occurred + return clicked; // Indicate whether pagination occurred } } @@ -339,72 +339,81 @@ async function clickNextPagination(selector, scrapedData, limit) { * @param {boolean} [config.flexible=false] - Whether to use flexible matching for field selectors * @returns {Array.>} Array of arrays of scraped items, one sub-array per list */ - window.scrapeList = async function({ listSelector, fields, limit = 10, pagination = null }) { + window.scrapeList = async function ({ listSelector, fields, limit = 10, pagination = null }) { const scrapedData = []; while (scrapedData.length < limit) { - // Get all parent elements matching the listSelector - const parentElements = Array.from(document.querySelectorAll(listSelector)); + // Get all parent elements matching the listSelector + const parentElements = Array.from(document.querySelectorAll(listSelector)); - // Iterate through each parent element - for (const parent of parentElements) { - if (scrapedData.length >= limit) break; - const record = {}; + // Iterate through each parent element + for (const parent of parentElements) { + if (scrapedData.length >= limit) break; + const record = {}; - // For each field, select the corresponding element within the parent - for (const [label, { selector, attribute }] of Object.entries(fields)) { - const fieldElement = parent.querySelector(selector); + // For each field, select the corresponding element within the parent + for (const [label, { selector, attribute }] of Object.entries(fields)) { + const fieldElement = parent.querySelector(selector); - // Depending on the attribute specified, extract the data - if (fieldElement) { - if (attribute === 'innerText') { - record[label] = fieldElement.innerText.trim(); - } else if (attribute === 'innerHTML') { - record[label] = fieldElement.innerHTML.trim(); - } else if (attribute === 'src') { - record[label] = fieldElement.src; - } else if (attribute === 'href') { - record[label] = fieldElement.href; - } else { - // Default to attribute retrieval - record[label] = fieldElement.getAttribute(attribute); - } - } + if (fieldElement) { + if (attribute === 'innerText') { + record[label] = fieldElement.innerText.trim(); + } else if (attribute === 'innerHTML') { + record[label] = fieldElement.innerHTML.trim(); + } else if (attribute === 'src') { + record[label] = fieldElement.src; + } else if (attribute === 'href') { + record[label] = fieldElement.href; + } else { + record[label] = fieldElement.getAttribute(attribute); } - - // Add the record to the scrapedData array - scrapedData.push(record); + } } - if (pagination && scrapedData.length < limit) { - switch (pagination.type) { - case 'scrollDown': - await scrollDownToLoadMore(listSelector, limit); - break; - case 'scrollUp': - await scrollUpToLoadMore(listSelector, limit); - break; - case 'clickNext': - await clickNextPagination(pagination.selector, scrapedData, limit); - break; - case 'clickLoadMore': - //await clickLoadMorePagination(pagination.selector); - break; - case 'none': - // No more items to load - break; - default: - console.warn("Unknown pagination type"); - break; - } - await new Promise(resolve => setTimeout(resolve, 2000)); // Wait for content to load + // Add the record to the scrapedData array + scrapedData.push(record); + } + + // Check if we need to paginate + if (pagination && scrapedData.length < limit) { + let paginated = false; + + switch (pagination.type) { + case 'scrollDown': + await scrollDownToLoadMore(listSelector, limit); + paginated = true; + break; + case 'scrollUp': + await scrollUpToLoadMore(listSelector, limit); + paginated = true; + break; + case 'clickNext': + paginated = await clickNextPagination(pagination.selector, scrapedData, limit); + break; + case 'clickLoadMore': + //await clickLoadMorePagination(pagination.selector); + //paginated = true; + break; + case 'none': + // No more items to load + break; + default: + console.warn("Unknown pagination type"); + break; + } + + if (paginated) { + await new Promise(resolve => setTimeout(resolve, 2000)); // Wait for content to load } else { - break; // No more items to load or no pagination + break; // No further pagination needed } + } else { + break; // No more items to load or no pagination + } } return scrapedData.slice(0, limit); // Return only the limited number of records -}; + }; /** From 9bc94f461470a9d6c3f3d03d2df85c07b015aa96 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sat, 17 Aug 2024 21:16:22 +0530 Subject: [PATCH 116/172] feat: move pagination from browser side --- maxun-core/src/browserSide/scraper.js | 36 +-------------------------- 1 file changed, 1 insertion(+), 35 deletions(-) diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index 331904ce..db941dee 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -373,41 +373,7 @@ async function clickNextPagination(selector, scrapedData, limit) { // Add the record to the scrapedData array scrapedData.push(record); } - - // Check if we need to paginate - if (pagination && scrapedData.length < limit) { - let paginated = false; - - switch (pagination.type) { - case 'scrollDown': - await scrollDownToLoadMore(listSelector, limit); - paginated = true; - break; - case 'scrollUp': - await scrollUpToLoadMore(listSelector, limit); - paginated = true; - break; - case 'clickNext': - paginated = await clickNextPagination(pagination.selector, scrapedData, limit); - break; - case 'clickLoadMore': - //await clickLoadMorePagination(pagination.selector); - //paginated = true; - break; - case 'none': - // No more items to load - break; - default: - console.warn("Unknown pagination type"); - break; - } - - if (paginated) { - await new Promise(resolve => setTimeout(resolve, 2000)); // Wait for content to load - } else { - break; // No further pagination needed - } - } else { + else { break; // No more items to load or no pagination } } From 94daa02806d6c32159c2241ced08083d6d265c57 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sat, 17 Aug 2024 21:16:49 +0530 Subject: [PATCH 117/172] feat: remove pagination param --- maxun-core/src/browserSide/scraper.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index db941dee..cdf5ddb2 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -339,7 +339,7 @@ async function clickNextPagination(selector, scrapedData, limit) { * @param {boolean} [config.flexible=false] - Whether to use flexible matching for field selectors * @returns {Array.>} Array of arrays of scraped items, one sub-array per list */ - window.scrapeList = async function ({ listSelector, fields, limit = 10, pagination = null }) { + window.scrapeList = async function ({ listSelector, fields, limit = 10 }) { const scrapedData = []; while (scrapedData.length < limit) { From 985c1034d3bc8ed7b3b9ed13df89b3d040ca60a1 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sat, 17 Aug 2024 21:20:03 +0530 Subject: [PATCH 118/172] chore: lint --- maxun-core/src/browserSide/scraper.js | 2 -- 1 file changed, 2 deletions(-) diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index cdf5ddb2..25c4c150 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -369,8 +369,6 @@ async function clickNextPagination(selector, scrapedData, limit) { } } } - - // Add the record to the scrapedData array scrapedData.push(record); } else { From 794d540f81d3cb19b4d59945076aeec561dbe145 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sat, 17 Aug 2024 23:27:17 +0530 Subject: [PATCH 119/172] feat: move scrollDown & scrollUp inside window --- maxun-core/src/browserSide/scraper.js | 41 +++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index 25c4c150..0ae6b31f 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -369,6 +369,8 @@ async function clickNextPagination(selector, scrapedData, limit) { } } } + + // Add the record to the scrapedData array scrapedData.push(record); } else { @@ -425,4 +427,43 @@ async function clickNextPagination(selector, scrapedData, limit) { return results; }; + + window.scrollDown = async(selector, limit) { + let previousHeight = 0; + let itemsLoaded = 0; + + while (itemsLoaded < limit) { + window.scrollBy(0, window.innerHeight); + await new Promise(resolve => setTimeout(resolve, 1000)); + + const currentHeight = document.body.scrollHeight; + + if (currentHeight === previousHeight) { + break; // No more items to load + } + + previousHeight = currentHeight; + itemsLoaded += document.querySelectorAll(selector).length; + } + } + + window.scrollUp = async(selector, limit) { + let previousHeight = 0; + let itemsLoaded = 0; + + while (itemsLoaded < limit) { + window.scrollBy(0, -window.innerHeight); + await new Promise(resolve => setTimeout(resolve, 1000)); + + const currentHeight = document.body.scrollHeight; + + if (currentHeight === previousHeight) { + break; // No more items to load + } + + previousHeight = currentHeight; + itemsLoaded += document.querySelectorAll(selector).length; + } + } + })(window); \ No newline at end of file From 2799d5055e8c7177974fe8fff7206248706f6d02 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sat, 17 Aug 2024 23:27:28 +0530 Subject: [PATCH 120/172] chore: lint --- maxun-core/src/browserSide/scraper.js | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index 0ae6b31f..13d5de92 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -428,39 +428,39 @@ async function clickNextPagination(selector, scrapedData, limit) { }; - window.scrollDown = async(selector, limit) { + window.scrollDown = async (selector, limit) { let previousHeight = 0; let itemsLoaded = 0; - + while (itemsLoaded < limit) { window.scrollBy(0, window.innerHeight); await new Promise(resolve => setTimeout(resolve, 1000)); - + const currentHeight = document.body.scrollHeight; - + if (currentHeight === previousHeight) { break; // No more items to load } - + previousHeight = currentHeight; itemsLoaded += document.querySelectorAll(selector).length; } } - - window.scrollUp = async(selector, limit) { + + window.scrollUp = async (selector, limit) { let previousHeight = 0; let itemsLoaded = 0; - + while (itemsLoaded < limit) { window.scrollBy(0, -window.innerHeight); await new Promise(resolve => setTimeout(resolve, 1000)); - + const currentHeight = document.body.scrollHeight; - + if (currentHeight === previousHeight) { break; // No more items to load } - + previousHeight = currentHeight; itemsLoaded += document.querySelectorAll(selector).length; } From 4abc2cf9ba97930ba6828c197aca9ef6f3f3c278 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sat, 17 Aug 2024 23:31:52 +0530 Subject: [PATCH 121/172] fix: add function keyword --- maxun-core/src/browserSide/scraper.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index 13d5de92..7fcdd443 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -428,7 +428,7 @@ async function clickNextPagination(selector, scrapedData, limit) { }; - window.scrollDown = async (selector, limit) { + window.scrollDown = async function(selector, limit) { let previousHeight = 0; let itemsLoaded = 0; @@ -447,7 +447,7 @@ async function clickNextPagination(selector, scrapedData, limit) { } } - window.scrollUp = async (selector, limit) { + window.scrollUp = async function(selector, limit) { let previousHeight = 0; let itemsLoaded = 0; From f4511b7abc7400517d246fb8953c63a3a1499a71 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sat, 17 Aug 2024 23:32:55 +0530 Subject: [PATCH 122/172] fix: remove else statement --- maxun-core/src/browserSide/scraper.js | 3 --- 1 file changed, 3 deletions(-) diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index 7fcdd443..e38bfae5 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -373,9 +373,6 @@ async function clickNextPagination(selector, scrapedData, limit) { // Add the record to the scrapedData array scrapedData.push(record); } - else { - break; // No more items to load or no pagination - } } return scrapedData.slice(0, limit); // Return only the limited number of records From 9f8e8fc01a537b0a331c54e7047172001ee921c6 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sat, 17 Aug 2024 23:33:33 +0530 Subject: [PATCH 123/172] feat: remove splice --- maxun-core/src/browserSide/scraper.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index e38bfae5..7c132ff9 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -375,7 +375,7 @@ async function clickNextPagination(selector, scrapedData, limit) { } } - return scrapedData.slice(0, limit); // Return only the limited number of records + return scrapedData }; From 029a6b9e98da9be94288e53af898e7eb8c6fc099 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sat, 17 Aug 2024 23:51:00 +0530 Subject: [PATCH 124/172] chore: lint --- maxun-core/src/browserSide/scraper.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index 7c132ff9..fc526d4c 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -425,7 +425,7 @@ async function clickNextPagination(selector, scrapedData, limit) { }; - window.scrollDown = async function(selector, limit) { + window.scrollDown = async function (selector, limit) { let previousHeight = 0; let itemsLoaded = 0; @@ -444,7 +444,7 @@ async function clickNextPagination(selector, scrapedData, limit) { } } - window.scrollUp = async function(selector, limit) { + window.scrollUp = async function (selector, limit) { let previousHeight = 0; let itemsLoaded = 0; From a8e8b53d576c0446f5b0c234cd79c84bf068efac Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sat, 17 Aug 2024 23:51:32 +0530 Subject: [PATCH 125/172] chore: remove comment --- maxun-core/src/browserSide/scraper.js | 3 --- 1 file changed, 3 deletions(-) diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index fc526d4c..7c208b68 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -369,12 +369,9 @@ async function clickNextPagination(selector, scrapedData, limit) { } } } - - // Add the record to the scrapedData array scrapedData.push(record); } } - return scrapedData }; From 95631541b9d74f7fb97756387acb87b0b2d5790a Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sat, 17 Aug 2024 23:54:00 +0530 Subject: [PATCH 126/172] feat: handle pagination on server side --- maxun-core/src/interpret.ts | 67 +++++++++++++++++++++++++++++++++++-- 1 file changed, 65 insertions(+), 2 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 44dddf8d..4edc2131 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -291,9 +291,15 @@ export default class Interpreter extends EventEmitter { await this.options.serializableCallback(scrapeResult); }, + // scrapeList: async (config: { listSelector: string, fields: any, limit?: number, pagination: any }) => { + // await this.ensureScriptsLoaded(page); + // const scrapeResults: Record[] = await page.evaluate((cfg) => window.scrapeList(cfg), config); + // await this.options.serializableCallback(scrapeResults); + // }, + scrapeList: async (config: { listSelector: string, fields: any, limit?: number, pagination: any }) => { await this.ensureScriptsLoaded(page); - const scrapeResults: Record[] = await page.evaluate((cfg) => window.scrapeList(cfg), config); + const scrapeResults: Record[] = await this.handlePagination(page, config); await this.options.serializableCallback(scrapeResults); }, @@ -357,6 +363,63 @@ export default class Interpreter extends EventEmitter { } } + + private async handlePagination(page: Page, config: { listSelector: string, fields: any, limit?: number, pagination: any }) { + let allResults: Record[] = []; + let currentPage = 1; + + while (true) { + // Scrape current page + const pageResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); + allResults = allResults.concat(pageResults); + + if (config.limit && allResults.length >= config.limit) { + allResults = allResults.slice(0, config.limit); + break; + } + + switch (config.pagination.type) { + case 'scrollDown': + await page.evaluate(() => window.scrollDown(config.listSelector, config.limit)); + break; + case 'scrollUp': + await page.evaluate(() => window.scrollUp(config.listSelector, config.limit)); + break; + case 'clickNext': + const nextButton = await page.$(config.pagination.selector); + if (!nextButton) { + return allResults; // No more pages + } + await nextButton.click(); + break; + case 'clickLoadMore': + const loadMoreButton = await page.$(config.pagination.selector); + if (!loadMoreButton) { + return allResults; // No more items to load + } + await loadMoreButton.click(); + break; + default: + return allResults; // No pagination or unknown type + } + + // Check if new items were loaded + const newItemsLoaded = await page.evaluate((prevCount, listSelector) => { + const currentCount = document.querySelectorAll(listSelector).length; + return currentCount > prevCount; + }, allResults.length, config.listSelector); + + if (!newItemsLoaded) { + return allResults; // No new items, end pagination + } + + currentPage++; + await page.waitForTimeout(1000); // Wait for page to load + } + + return allResults; + } + private async runLoop(p: Page, workflow: Workflow) { const usedActions: string[] = []; let lastAction = null; @@ -429,7 +492,7 @@ export default class Interpreter extends EventEmitter { } private async ensureScriptsLoaded(page: Page) { - const isScriptLoaded = await page.evaluate(() => typeof window.scrape === 'function' && typeof window.scrapeSchema === 'function' && typeof window.scrapeList === 'function' && typeof window.scrapeListAuto === 'function'); + const isScriptLoaded = await page.evaluate(() => typeof window.scrape === 'function' && typeof window.scrapeSchema === 'function' && typeof window.scrapeList === 'function' && typeof window.scrapeListAuto === 'function' && typeof window.scrollDown === 'function' && typeof window.scrollUp === 'function'); if (!isScriptLoaded) { await page.addInitScript({ path: path.join(__dirname, 'browserSide', 'scraper.js') }); } From 221c450546905a7f4a95158721f8700eb1295c28 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sun, 18 Aug 2024 21:56:09 +0530 Subject: [PATCH 127/172] feat: use window.scrollTo instead of scrollBy --- maxun-core/src/browserSide/scraper.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index 7c208b68..99c8ee33 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -427,7 +427,7 @@ async function clickNextPagination(selector, scrapedData, limit) { let itemsLoaded = 0; while (itemsLoaded < limit) { - window.scrollBy(0, window.innerHeight); + window.scrollTo(0, document.body.scrollHeight); await new Promise(resolve => setTimeout(resolve, 1000)); const currentHeight = document.body.scrollHeight; From bf292f05167cd6875fb68722f1adca1be8137a5c Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sun, 18 Aug 2024 22:18:03 +0530 Subject: [PATCH 128/172] feat: scroll donw on server side --- maxun-core/src/interpret.ts | 34 ++++++++++++++++++++++++++-------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 4edc2131..ca273ac1 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -380,18 +380,36 @@ export default class Interpreter extends EventEmitter { switch (config.pagination.type) { case 'scrollDown': - await page.evaluate(() => window.scrollDown(config.listSelector, config.limit)); + let previousHeight = 0 + await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight)); + // Wait for potential lazy-loaded content + await page.waitForTimeout(2000); + + // Check if new content was loaded + const currentHeight = await page.evaluate(() => document.body.scrollHeight); + if (currentHeight === previousHeight) { + // No new content loaded, exit loop + return allResults; + } + previousHeight = currentHeight; break; case 'scrollUp': await page.evaluate(() => window.scrollUp(config.listSelector, config.limit)); break; - case 'clickNext': - const nextButton = await page.$(config.pagination.selector); - if (!nextButton) { - return allResults; // No more pages - } - await nextButton.click(); - break; + case 'clickNext': + const nextButton = await page.$(config.pagination.selector); + if (!nextButton) { + return allResults; // No more pages + } + + // Capture the current URL to check if it changes after clicking next + const currentURL = page.url(); + + await Promise.all([ + nextButton.click(), + page.waitForNavigation({ waitUntil: 'load' }) // Wait for page navigation + ]); + break; case 'clickLoadMore': const loadMoreButton = await page.$(config.pagination.selector); if (!loadMoreButton) { From 3771de92caca28e4ed95ccccfda8f7339b4a06d5 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sun, 18 Aug 2024 22:19:06 +0530 Subject: [PATCH 129/172] feat: make previousHeight global --- maxun-core/src/interpret.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index ca273ac1..7c3c08d4 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -367,6 +367,7 @@ export default class Interpreter extends EventEmitter { private async handlePagination(page: Page, config: { listSelector: string, fields: any, limit?: number, pagination: any }) { let allResults: Record[] = []; let currentPage = 1; + let previousHeight = 0 while (true) { // Scrape current page @@ -380,7 +381,6 @@ export default class Interpreter extends EventEmitter { switch (config.pagination.type) { case 'scrollDown': - let previousHeight = 0 await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight)); // Wait for potential lazy-loaded content await page.waitForTimeout(2000); From 1260730e563ad32d6447b146856c465c5cdc751c Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sun, 18 Aug 2024 22:19:24 +0530 Subject: [PATCH 130/172] chore: lint --- maxun-core/src/interpret.ts | 58 ++++++++++++++++++------------------- 1 file changed, 29 insertions(+), 29 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 7c3c08d4..ebdf09d4 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -329,7 +329,7 @@ export default class Interpreter extends EventEmitter { const x = new AsyncFunction('page', 'log', code); await x(page, this.log); }, - + flag: async () => new Promise((res) => { this.emit('flag', page, res); }), @@ -368,48 +368,48 @@ export default class Interpreter extends EventEmitter { let allResults: Record[] = []; let currentPage = 1; let previousHeight = 0 - + while (true) { // Scrape current page const pageResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); allResults = allResults.concat(pageResults); - + if (config.limit && allResults.length >= config.limit) { allResults = allResults.slice(0, config.limit); break; } - + switch (config.pagination.type) { case 'scrollDown': await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight)); - // Wait for potential lazy-loaded content - await page.waitForTimeout(2000); + // Wait for potential lazy-loaded content + await page.waitForTimeout(2000); - // Check if new content was loaded - const currentHeight = await page.evaluate(() => document.body.scrollHeight); - if (currentHeight === previousHeight) { - // No new content loaded, exit loop - return allResults; - } - previousHeight = currentHeight; + // Check if new content was loaded + const currentHeight = await page.evaluate(() => document.body.scrollHeight); + if (currentHeight === previousHeight) { + // No new content loaded, exit loop + return allResults; + } + previousHeight = currentHeight; break; case 'scrollUp': await page.evaluate(() => window.scrollUp(config.listSelector, config.limit)); break; - case 'clickNext': - const nextButton = await page.$(config.pagination.selector); - if (!nextButton) { - return allResults; // No more pages - } + case 'clickNext': + const nextButton = await page.$(config.pagination.selector); + if (!nextButton) { + return allResults; // No more pages + } - // Capture the current URL to check if it changes after clicking next - const currentURL = page.url(); + // Capture the current URL to check if it changes after clicking next + const currentURL = page.url(); - await Promise.all([ - nextButton.click(), - page.waitForNavigation({ waitUntil: 'load' }) // Wait for page navigation - ]); - break; + await Promise.all([ + nextButton.click(), + page.waitForNavigation({ waitUntil: 'load' }) // Wait for page navigation + ]); + break; case 'clickLoadMore': const loadMoreButton = await page.$(config.pagination.selector); if (!loadMoreButton) { @@ -420,21 +420,21 @@ export default class Interpreter extends EventEmitter { default: return allResults; // No pagination or unknown type } - + // Check if new items were loaded const newItemsLoaded = await page.evaluate((prevCount, listSelector) => { const currentCount = document.querySelectorAll(listSelector).length; return currentCount > prevCount; }, allResults.length, config.listSelector); - + if (!newItemsLoaded) { return allResults; // No new items, end pagination } - + currentPage++; await page.waitForTimeout(1000); // Wait for page to load } - + return allResults; } From 8ddee6ac10319c0e0415dc54512d59d6f7b91078 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sun, 18 Aug 2024 22:22:43 +0530 Subject: [PATCH 131/172] feat: call scrapeList based on pagination --- maxun-core/src/interpret.ts | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index ebdf09d4..f37b3f43 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -299,8 +299,13 @@ export default class Interpreter extends EventEmitter { scrapeList: async (config: { listSelector: string, fields: any, limit?: number, pagination: any }) => { await this.ensureScriptsLoaded(page); - const scrapeResults: Record[] = await this.handlePagination(page, config); - await this.options.serializableCallback(scrapeResults); + if (!config.pagination) { + const scrapeResults: Record[] = await page.evaluate((cfg) => window.scrapeList(cfg), config); + await this.options.serializableCallback(scrapeResults); + } else { + const scrapeResults: Record[] = await this.handlePagination(page, config); + await this.options.serializableCallback(scrapeResults); + } }, scrapeListAuto: async (config: { listSelector: string }) => { From ccc68ceb0954585be4b6efa6a87abb28f2ef298d Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sun, 18 Aug 2024 22:31:37 +0530 Subject: [PATCH 132/172] feat: remove scraping before checking pagination type --- maxun-core/src/interpret.ts | 9 --------- 1 file changed, 9 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index f37b3f43..70164f78 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -375,15 +375,6 @@ export default class Interpreter extends EventEmitter { let previousHeight = 0 while (true) { - // Scrape current page - const pageResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); - allResults = allResults.concat(pageResults); - - if (config.limit && allResults.length >= config.limit) { - allResults = allResults.slice(0, config.limit); - break; - } - switch (config.pagination.type) { case 'scrollDown': await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight)); From 1c93e5430a2268f53f21b4d136138b8f9bac808b Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sun, 18 Aug 2024 22:32:39 +0530 Subject: [PATCH 133/172] feat: scrape after scroll completion --- maxun-core/src/interpret.ts | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 70164f78..d7e05eee 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -384,7 +384,10 @@ export default class Interpreter extends EventEmitter { // Check if new content was loaded const currentHeight = await page.evaluate(() => document.body.scrollHeight); if (currentHeight === previousHeight) { - // No new content loaded, exit loop + // No new content loaded, scrape final results and exit loop + const finalResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); + allResults = allResults.concat(finalResults); + return allResults; return allResults; } previousHeight = currentHeight; From 5daac01f1b93e88ea5b0454c2af8bfc373bac9d7 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sun, 18 Aug 2024 22:32:58 +0530 Subject: [PATCH 134/172] fix: remove duplicate return --- maxun-core/src/interpret.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index d7e05eee..6eadebde 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -388,7 +388,6 @@ export default class Interpreter extends EventEmitter { const finalResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); allResults = allResults.concat(finalResults); return allResults; - return allResults; } previousHeight = currentHeight; break; From 48a5ac3b8268bb6af94ef073d176e80e6c8dbcee Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sun, 18 Aug 2024 22:34:46 +0530 Subject: [PATCH 135/172] feat: scrape for default case --- maxun-core/src/interpret.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 6eadebde..5e4551a4 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -416,7 +416,9 @@ export default class Interpreter extends EventEmitter { await loadMoreButton.click(); break; default: - return allResults; // No pagination or unknown type + const results = await page.evaluate((cfg) => window.scrapeList(cfg), config); + allResults = allResults.concat(results); + return allResults; } // Check if new items were loaded From 5d1747c874f505a4bc08c077cc965e078c60470c Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sun, 18 Aug 2024 22:35:35 +0530 Subject: [PATCH 136/172] feat: wait before next iteration for content load completion --- maxun-core/src/interpret.ts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 5e4551a4..f0111ccf 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -421,6 +421,9 @@ export default class Interpreter extends EventEmitter { return allResults; } + // Wait a bit before next iteration to ensure content is loaded + await page.waitForTimeout(1000); + // Check if new items were loaded const newItemsLoaded = await page.evaluate((prevCount, listSelector) => { const currentCount = document.querySelectorAll(listSelector).length; From 9a18f470f9b1b0309362a28487db7b68f91bed3f Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sun, 18 Aug 2024 22:46:10 +0530 Subject: [PATCH 137/172] chore: lint --- maxun-core/src/interpret.ts | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index f0111ccf..54e5bdb2 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -384,7 +384,7 @@ export default class Interpreter extends EventEmitter { // Check if new content was loaded const currentHeight = await page.evaluate(() => document.body.scrollHeight); if (currentHeight === previousHeight) { - // No new content loaded, scrape final results and exit loop + // No new content loaded, scrape final results and exit loop const finalResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); allResults = allResults.concat(finalResults); return allResults; @@ -422,20 +422,16 @@ export default class Interpreter extends EventEmitter { } // Wait a bit before next iteration to ensure content is loaded - await page.waitForTimeout(1000); + await page.waitForTimeout(1000); - // Check if new items were loaded - const newItemsLoaded = await page.evaluate((prevCount, listSelector) => { - const currentCount = document.querySelectorAll(listSelector).length; - return currentCount > prevCount; - }, allResults.length, config.listSelector); + // Scrape the current page after scrolling/clicking + const pageResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); + allResults = allResults.concat(pageResults); - if (!newItemsLoaded) { - return allResults; // No new items, end pagination + if (config.limit && allResults.length >= config.limit) { + allResults = allResults.slice(0, config.limit); + break; } - - currentPage++; - await page.waitForTimeout(1000); // Wait for page to load } return allResults; From 997a466f305a80db114000005a6517746895af2a Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sun, 18 Aug 2024 22:46:44 +0530 Subject: [PATCH 138/172] chore: remove comment --- maxun-core/src/interpret.ts | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 54e5bdb2..9f8a9adc 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -378,10 +378,8 @@ export default class Interpreter extends EventEmitter { switch (config.pagination.type) { case 'scrollDown': await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight)); - // Wait for potential lazy-loaded content await page.waitForTimeout(2000); - // Check if new content was loaded const currentHeight = await page.evaluate(() => document.body.scrollHeight); if (currentHeight === previousHeight) { // No new content loaded, scrape final results and exit loop @@ -389,6 +387,7 @@ export default class Interpreter extends EventEmitter { allResults = allResults.concat(finalResults); return allResults; } + previousHeight = currentHeight; break; case 'scrollUp': From 47f5726619faf066982b3c04cec4f6cf5d0107fd Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sun, 18 Aug 2024 22:47:14 +0530 Subject: [PATCH 139/172] fix: dont call window.scrollUp() from browser side --- maxun-core/src/interpret.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 9f8a9adc..0699e4d7 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -387,11 +387,11 @@ export default class Interpreter extends EventEmitter { allResults = allResults.concat(finalResults); return allResults; } - + previousHeight = currentHeight; break; case 'scrollUp': - await page.evaluate(() => window.scrollUp(config.listSelector, config.limit)); + break; case 'clickNext': const nextButton = await page.$(config.pagination.selector); From 4c699adad4f03e9f49aff8d77c47c74779e7305f Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sun, 18 Aug 2024 23:54:15 +0530 Subject: [PATCH 140/172] chore: remove comments --- maxun-core/src/interpret.ts | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 0699e4d7..1ac624da 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -382,7 +382,6 @@ export default class Interpreter extends EventEmitter { const currentHeight = await page.evaluate(() => document.body.scrollHeight); if (currentHeight === previousHeight) { - // No new content loaded, scrape final results and exit loop const finalResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); allResults = allResults.concat(finalResults); return allResults; @@ -391,12 +390,11 @@ export default class Interpreter extends EventEmitter { previousHeight = currentHeight; break; case 'scrollUp': - break; case 'clickNext': const nextButton = await page.$(config.pagination.selector); if (!nextButton) { - return allResults; // No more pages + return allResults; } // Capture the current URL to check if it changes after clicking next From 6a597daa427ec4f9800255b4904e7b6585364b7a Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Mon, 19 Aug 2024 00:17:38 +0530 Subject: [PATCH 141/172] feat: handle next click pagination on server --- maxun-core/src/interpret.ts | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 1ac624da..b7336c9a 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -394,17 +394,15 @@ export default class Interpreter extends EventEmitter { case 'clickNext': const nextButton = await page.$(config.pagination.selector); if (!nextButton) { - return allResults; - } - - // Capture the current URL to check if it changes after clicking next - const currentURL = page.url(); - - await Promise.all([ - nextButton.click(), - page.waitForNavigation({ waitUntil: 'load' }) // Wait for page navigation - ]); - break; + const finalResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); + allResults = allResults.concat(finalResults); + return allResults; + } + await Promise.all([ + nextButton.click(), + page.waitForNavigation({ waitUntil: 'networkidle' }) + ]); + break; case 'clickLoadMore': const loadMoreButton = await page.$(config.pagination.selector); if (!loadMoreButton) { From 0d1f83c201c13cbcbe2afe1371af8e0241c61f25 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Mon, 19 Aug 2024 00:39:24 +0530 Subject: [PATCH 142/172] fix: remove current page variable --- maxun-core/src/interpret.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index b7336c9a..2c84f64f 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -371,7 +371,6 @@ export default class Interpreter extends EventEmitter { private async handlePagination(page: Page, config: { listSelector: string, fields: any, limit?: number, pagination: any }) { let allResults: Record[] = []; - let currentPage = 1; let previousHeight = 0 while (true) { From b6bcc1d516ecbf27fbf4e16b749180e8635179b0 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Mon, 19 Aug 2024 01:38:41 +0530 Subject: [PATCH 143/172] feat: handle clickNext pagination by tracking already scraped items --- maxun-core/src/interpret.ts | 36 ++++++++++++++++++++++++++++-------- 1 file changed, 28 insertions(+), 8 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 2c84f64f..09076a4d 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -371,7 +371,8 @@ export default class Interpreter extends EventEmitter { private async handlePagination(page: Page, config: { listSelector: string, fields: any, limit?: number, pagination: any }) { let allResults: Record[] = []; - let previousHeight = 0 + let previousHeight = 0; + let scrapedItems: Set = new Set(); // Track unique items to avoid re-scraping while (true) { switch (config.pagination.type) { @@ -391,16 +392,35 @@ export default class Interpreter extends EventEmitter { case 'scrollUp': break; case 'clickNext': - const nextButton = await page.$(config.pagination.selector); - if (!nextButton) { - const finalResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); - allResults = allResults.concat(finalResults); - return allResults; + const pageResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); + + // Filter out items that have already been scraped + const newResults = pageResults.filter(item => { + const uniqueKey = JSON.stringify(item); + if (scrapedItems.has(uniqueKey)) return false; + scrapedItems.add(uniqueKey); + return true; + }); + + allResults = allResults.concat(newResults); + + // If the limit is reached, return the required number of items + if (config.limit && allResults.length >= config.limit) { + return allResults.slice(0, config.limit); } + + // Check if there's a next page button + const nextButton = await page.$(config.pagination.selector); + if (!nextButton) { + return allResults; // No more pages to navigate + } + + // Click the next button and wait for the navigation to complete await Promise.all([ - nextButton.click(), - page.waitForNavigation({ waitUntil: 'networkidle' }) + nextButton.click(), + page.waitForNavigation({ waitUntil: 'networkidle' }) ]); + break; case 'clickLoadMore': const loadMoreButton = await page.$(config.pagination.selector); From bdf68fb66d18dc10f1836530d774f0e08fa865aa Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Mon, 19 Aug 2024 01:39:55 +0530 Subject: [PATCH 144/172] chore: add comments --- maxun-core/src/interpret.ts | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 09076a4d..b7bc3e10 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -372,7 +372,8 @@ export default class Interpreter extends EventEmitter { private async handlePagination(page: Page, config: { listSelector: string, fields: any, limit?: number, pagination: any }) { let allResults: Record[] = []; let previousHeight = 0; - let scrapedItems: Set = new Set(); // Track unique items to avoid re-scraping + // track unique items to avoid re-scraping + let scrapedItems: Set = new Set(); while (true) { switch (config.pagination.type) { @@ -393,34 +394,26 @@ export default class Interpreter extends EventEmitter { break; case 'clickNext': const pageResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); - - // Filter out items that have already been scraped + // filter out items that have already been scraped const newResults = pageResults.filter(item => { const uniqueKey = JSON.stringify(item); if (scrapedItems.has(uniqueKey)) return false; scrapedItems.add(uniqueKey); return true; }); - allResults = allResults.concat(newResults); - - // If the limit is reached, return the required number of items + // if the limit is reached, return the required number of items if (config.limit && allResults.length >= config.limit) { return allResults.slice(0, config.limit); } - - // Check if there's a next page button const nextButton = await page.$(config.pagination.selector); if (!nextButton) { - return allResults; // No more pages to navigate + return allResults; } - - // Click the next button and wait for the navigation to complete await Promise.all([ nextButton.click(), page.waitForNavigation({ waitUntil: 'networkidle' }) ]); - break; case 'clickLoadMore': const loadMoreButton = await page.$(config.pagination.selector); From 56bd3c38ae341bf14f14ad1d8a16edb587749411 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Mon, 19 Aug 2024 01:42:41 +0530 Subject: [PATCH 145/172] chore: lint --- maxun-core/src/interpret.ts | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index b7bc3e10..f8cd8cfe 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -394,27 +394,27 @@ export default class Interpreter extends EventEmitter { break; case 'clickNext': const pageResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); - // filter out items that have already been scraped - const newResults = pageResults.filter(item => { + // filter out items that have already been scraped + const newResults = pageResults.filter(item => { const uniqueKey = JSON.stringify(item); if (scrapedItems.has(uniqueKey)) return false; scrapedItems.add(uniqueKey); return true; - }); - allResults = allResults.concat(newResults); - // if the limit is reached, return the required number of items - if (config.limit && allResults.length >= config.limit) { + }); + allResults = allResults.concat(newResults); + // if the limit is reached, return the required number of items + if (config.limit && allResults.length >= config.limit) { return allResults.slice(0, config.limit); - } - const nextButton = await page.$(config.pagination.selector); - if (!nextButton) { + } + const nextButton = await page.$(config.pagination.selector); + if (!nextButton) { return allResults; - } - await Promise.all([ + } + await Promise.all([ nextButton.click(), page.waitForNavigation({ waitUntil: 'networkidle' }) - ]); - break; + ]); + break; case 'clickLoadMore': const loadMoreButton = await page.$(config.pagination.selector); if (!loadMoreButton) { From 1b3bfddcb153fecfa444b8bde160ff93a8e5d022 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Mon, 19 Aug 2024 02:47:41 +0530 Subject: [PATCH 146/172] feat: track pages --- maxun-core/src/interpret.ts | 3 +++ 1 file changed, 3 insertions(+) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index f8cd8cfe..326e394e 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -374,6 +374,7 @@ export default class Interpreter extends EventEmitter { let previousHeight = 0; // track unique items to avoid re-scraping let scrapedItems: Set = new Set(); + let currentPage = 1 while (true) { switch (config.pagination.type) { @@ -414,6 +415,8 @@ export default class Interpreter extends EventEmitter { nextButton.click(), page.waitForNavigation({ waitUntil: 'networkidle' }) ]); + + currentPage += 1; break; case 'clickLoadMore': const loadMoreButton = await page.$(config.pagination.selector); From a715c56afee941fc78e559a6fecd328f2937b1db Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Mon, 19 Aug 2024 18:18:30 +0530 Subject: [PATCH 147/172] feat: initialize new set for unique items per page --- maxun-core/src/interpret.ts | 58 +++++++++++++++++++++---------------- 1 file changed, 33 insertions(+), 25 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 326e394e..03acaefe 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -372,9 +372,9 @@ export default class Interpreter extends EventEmitter { private async handlePagination(page: Page, config: { listSelector: string, fields: any, limit?: number, pagination: any }) { let allResults: Record[] = []; let previousHeight = 0; - // track unique items to avoid re-scraping - let scrapedItems: Set = new Set(); - let currentPage = 1 + let currentPage = 1; + // track unique items per page to avoid re-scraping + let scrapedItemsPerPage: Set[] = []; while (true) { switch (config.pagination.type) { @@ -395,29 +395,37 @@ export default class Interpreter extends EventEmitter { break; case 'clickNext': const pageResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); - // filter out items that have already been scraped - const newResults = pageResults.filter(item => { - const uniqueKey = JSON.stringify(item); - if (scrapedItems.has(uniqueKey)) return false; - scrapedItems.add(uniqueKey); - return true; - }); - allResults = allResults.concat(newResults); - // if the limit is reached, return the required number of items - if (config.limit && allResults.length >= config.limit) { - return allResults.slice(0, config.limit); - } - const nextButton = await page.$(config.pagination.selector); - if (!nextButton) { - return allResults; - } - await Promise.all([ - nextButton.click(), - page.waitForNavigation({ waitUntil: 'networkidle' }) - ]); + + // Initialize a new Set for the current page if it doesn't exist + if (!scrapedItemsPerPage[currentPage - 1]) { + scrapedItemsPerPage[currentPage - 1] = new Set(); + } - currentPage += 1; - break; + const newResults = pageResults.filter(item => { + const uniqueKey = JSON.stringify(item); + if (scrapedItemsPerPage[currentPage - 1].has(uniqueKey)) return false; + scrapedItemsPerPage[currentPage - 1].add(uniqueKey); + return true; + }); + + allResults = allResults.concat(newResults); + + if (config.limit && allResults.length >= config.limit) { + return allResults.slice(0, config.limit); + } + + const nextButton = await page.$(config.pagination.selector); + if (!nextButton) { + return allResults; + } + + await Promise.all([ + nextButton.click(), + page.waitForNavigation({ waitUntil: 'networkidle' }) + ]); + + currentPage++; + break; case 'clickLoadMore': const loadMoreButton = await page.$(config.pagination.selector); if (!loadMoreButton) { From e4b7ca52355b59d0296af10d3904297757f26300 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Mon, 19 Aug 2024 19:23:31 +0530 Subject: [PATCH 148/172] feat: scrape multiple pages --- maxun-core/src/interpret.ts | 70 +++++++++++++++++++------------------ 1 file changed, 36 insertions(+), 34 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 03acaefe..3316ad49 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -374,7 +374,7 @@ export default class Interpreter extends EventEmitter { let previousHeight = 0; let currentPage = 1; // track unique items per page to avoid re-scraping - let scrapedItemsPerPage: Set[] = []; + let scrapedItems: Set = new Set(); // Track unique items across all pages while (true) { switch (config.pagination.type) { @@ -393,39 +393,41 @@ export default class Interpreter extends EventEmitter { break; case 'scrollUp': break; - case 'clickNext': - const pageResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); - - // Initialize a new Set for the current page if it doesn't exist - if (!scrapedItemsPerPage[currentPage - 1]) { - scrapedItemsPerPage[currentPage - 1] = new Set(); - } - - const newResults = pageResults.filter(item => { - const uniqueKey = JSON.stringify(item); - if (scrapedItemsPerPage[currentPage - 1].has(uniqueKey)) return false; - scrapedItemsPerPage[currentPage - 1].add(uniqueKey); - return true; - }); - - allResults = allResults.concat(newResults); - - if (config.limit && allResults.length >= config.limit) { - return allResults.slice(0, config.limit); - } - - const nextButton = await page.$(config.pagination.selector); - if (!nextButton) { - return allResults; - } - - await Promise.all([ - nextButton.click(), - page.waitForNavigation({ waitUntil: 'networkidle' }) - ]); - - currentPage++; - break; + case 'clickNext': + while (true) { + // Scrape the current page + const pageResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); + + // Filter out already scraped items + const newResults = pageResults.filter(item => { + const uniqueKey = JSON.stringify(item); + if (scrapedItems.has(uniqueKey)) return false; // Ignore if already scraped + scrapedItems.add(uniqueKey); // Mark as scraped + return true; + }); + + allResults = allResults.concat(newResults); + + // Stop if limit is reached + if (config.limit && allResults.length >= config.limit) { + return allResults.slice(0, config.limit); + } + + // Move to the next page + const nextButton = await page.$(config.pagination.selector); + if (!nextButton) { + return allResults; // No more pages to scrape + } + + // Click the "Next" button and wait for the next page to load + await Promise.all([ + nextButton.click(), + page.waitForNavigation({ waitUntil: 'networkidle' }) + ]); + + currentPage++; // Increment page count and proceed + break; + } case 'clickLoadMore': const loadMoreButton = await page.$(config.pagination.selector); if (!loadMoreButton) { From 8857de0b8fd99596fac74fd1f1b3d0de00e23aa1 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Tue, 20 Aug 2024 21:48:11 +0530 Subject: [PATCH 149/172] fix: remove while loop --- maxun-core/src/interpret.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 3316ad49..973211dc 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -374,7 +374,7 @@ export default class Interpreter extends EventEmitter { let previousHeight = 0; let currentPage = 1; // track unique items per page to avoid re-scraping - let scrapedItems: Set = new Set(); // Track unique items across all pages + let scrapedItems: Set = new Set(); while (true) { switch (config.pagination.type) { @@ -394,7 +394,6 @@ export default class Interpreter extends EventEmitter { case 'scrollUp': break; case 'clickNext': - while (true) { // Scrape the current page const pageResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); @@ -425,9 +424,10 @@ export default class Interpreter extends EventEmitter { page.waitForNavigation({ waitUntil: 'networkidle' }) ]); - currentPage++; // Increment page count and proceed + // Wait a bit for the content to load + await page.waitForTimeout(1000); + break; - } case 'clickLoadMore': const loadMoreButton = await page.$(config.pagination.selector); if (!loadMoreButton) { From 24a3dd9a97b5abd65dd42b57d4f556a1958ebe3e Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Tue, 20 Aug 2024 22:01:29 +0530 Subject: [PATCH 150/172] feat: remove redundant scraping outside switch case --- maxun-core/src/interpret.ts | 7 ------- 1 file changed, 7 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 973211dc..e81cb4eb 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -441,13 +441,6 @@ export default class Interpreter extends EventEmitter { return allResults; } - // Wait a bit before next iteration to ensure content is loaded - await page.waitForTimeout(1000); - - // Scrape the current page after scrolling/clicking - const pageResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); - allResults = allResults.concat(pageResults); - if (config.limit && allResults.length >= config.limit) { allResults = allResults.slice(0, config.limit); break; From 6b6583647a5676ecb993fe9aa57cde37c16c4672 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Tue, 20 Aug 2024 23:15:21 +0530 Subject: [PATCH 151/172] chore: remove comments --- maxun-core/src/interpret.ts | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index e81cb4eb..d7b60d66 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -394,7 +394,6 @@ export default class Interpreter extends EventEmitter { case 'scrollUp': break; case 'clickNext': - // Scrape the current page const pageResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); // Filter out already scraped items @@ -407,31 +406,31 @@ export default class Interpreter extends EventEmitter { allResults = allResults.concat(newResults); - // Stop if limit is reached + if (config.limit && allResults.length >= config.limit) { return allResults.slice(0, config.limit); } - // Move to the next page + const nextButton = await page.$(config.pagination.selector); if (!nextButton) { return allResults; // No more pages to scrape } - // Click the "Next" button and wait for the next page to load + await Promise.all([ nextButton.click(), page.waitForNavigation({ waitUntil: 'networkidle' }) ]); - // Wait a bit for the content to load + await page.waitForTimeout(1000); break; case 'clickLoadMore': const loadMoreButton = await page.$(config.pagination.selector); if (!loadMoreButton) { - return allResults; // No more items to load + return allResults; } await loadMoreButton.click(); break; From 6d5ffb52221ca701e4b426cf1df663b59afdd257 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Tue, 20 Aug 2024 23:15:33 +0530 Subject: [PATCH 152/172] chore: lint --- maxun-core/src/interpret.ts | 68 ++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 34 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index d7b60d66..0cfa2309 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -374,7 +374,7 @@ export default class Interpreter extends EventEmitter { let previousHeight = 0; let currentPage = 1; // track unique items per page to avoid re-scraping - let scrapedItems: Set = new Set(); + let scrapedItems: Set = new Set(); while (true) { switch (config.pagination.type) { @@ -393,40 +393,40 @@ export default class Interpreter extends EventEmitter { break; case 'scrollUp': break; - case 'clickNext': - const pageResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); - - // Filter out already scraped items - const newResults = pageResults.filter(item => { - const uniqueKey = JSON.stringify(item); - if (scrapedItems.has(uniqueKey)) return false; // Ignore if already scraped - scrapedItems.add(uniqueKey); // Mark as scraped - return true; - }); - - allResults = allResults.concat(newResults); - - - if (config.limit && allResults.length >= config.limit) { - return allResults.slice(0, config.limit); - } - - - const nextButton = await page.$(config.pagination.selector); - if (!nextButton) { - return allResults; // No more pages to scrape - } - - - await Promise.all([ - nextButton.click(), - page.waitForNavigation({ waitUntil: 'networkidle' }) - ]); - - - await page.waitForTimeout(1000); + case 'clickNext': + const pageResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); - break; + // Filter out already scraped items + const newResults = pageResults.filter(item => { + const uniqueKey = JSON.stringify(item); + if (scrapedItems.has(uniqueKey)) return false; // Ignore if already scraped + scrapedItems.add(uniqueKey); // Mark as scraped + return true; + }); + + allResults = allResults.concat(newResults); + + + if (config.limit && allResults.length >= config.limit) { + return allResults.slice(0, config.limit); + } + + + const nextButton = await page.$(config.pagination.selector); + if (!nextButton) { + return allResults; // No more pages to scrape + } + + + await Promise.all([ + nextButton.click(), + page.waitForNavigation({ waitUntil: 'networkidle' }) + ]); + + + await page.waitForTimeout(1000); + + break; case 'clickLoadMore': const loadMoreButton = await page.$(config.pagination.selector); if (!loadMoreButton) { From c2db3cc720bcb9d982220aae4a04dbf1439902a0 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Tue, 20 Aug 2024 23:16:11 +0530 Subject: [PATCH 153/172] fix: whitespace format --- maxun-core/src/interpret.ts | 6 ------ 1 file changed, 6 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 0cfa2309..b03ed140 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -406,26 +406,20 @@ export default class Interpreter extends EventEmitter { allResults = allResults.concat(newResults); - if (config.limit && allResults.length >= config.limit) { return allResults.slice(0, config.limit); } - const nextButton = await page.$(config.pagination.selector); if (!nextButton) { return allResults; // No more pages to scrape } - - await Promise.all([ nextButton.click(), page.waitForNavigation({ waitUntil: 'networkidle' }) ]); - await page.waitForTimeout(1000); - break; case 'clickLoadMore': const loadMoreButton = await page.$(config.pagination.selector); From 91c10c72de5c9d707f5ca066d50c1de2079e4d31 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 21 Aug 2024 04:37:46 +0530 Subject: [PATCH 154/172] feat: add scrapeListAuto as custom action --- src/shared/types.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/shared/types.ts b/src/shared/types.ts index 686eaf20..0a259dea 100644 --- a/src/shared/types.ts +++ b/src/shared/types.ts @@ -23,4 +23,4 @@ export interface ScreenshotSettings { type?: "jpeg" | "png"; }; -export declare type CustomActions = 'scrape' | 'scrapeSchema' | 'scroll' | 'screenshot' | 'script' | 'enqueueLinks' | 'flag' | 'scrapeList'; +export declare type CustomActions = 'scrape' | 'scrapeSchema' | 'scroll' | 'screenshot' | 'script' | 'enqueueLinks' | 'flag' | 'scrapeList' | 'scrapeListAuto'; From 6de92bfe4f0669e76457cc5a2a9ece4dec4b6452 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 21 Aug 2024 04:42:00 +0530 Subject: [PATCH 155/172] chore(deps): stealth plugins --- maxun-core/package.json | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/maxun-core/package.json b/maxun-core/package.json index 2fa29f22..484ac6f9 100644 --- a/maxun-core/package.json +++ b/maxun-core/package.json @@ -23,7 +23,11 @@ "author": "Karishma Shukla", "license": "MIT", "dependencies": { + "@cliqz/adblocker-playwright": "^1.31.3", + "cross-fetch": "^4.0.0", "joi": "^17.6.0", - "playwright": "^1.20.1" + "playwright": "^1.20.1", + "playwright-extra": "^4.3.6", + "puppeteer-extra-plugin-stealth": "^2.11.2" } } From dd119955d251d09fc89cc4943e46845a09044be3 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 21 Aug 2024 05:06:34 +0530 Subject: [PATCH 156/172] feat: create blocker for Interpreter --- maxun-core/src/interpret.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index b03ed140..101a9d61 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -1,5 +1,7 @@ /* eslint-disable no-await-in-loop, no-restricted-syntax */ import { Page, PageScreenshotOptions } from 'playwright'; +import { fullLists, PlaywrightBlocker, Request } from '@cliqz/adblocker-playwright'; +import fetch from 'cross-fetch'; import path from 'path'; import { EventEmitter } from 'events'; @@ -46,6 +48,8 @@ export default class Interpreter extends EventEmitter { private log: typeof log; + private blocker: PlaywrightBlocker | null = null; + constructor(workflow: WorkflowFile, options?: Partial) { super(); this.workflow = workflow.workflow; From d23dad199027395adfcee8126d8ed2941881fb66 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 21 Aug 2024 05:10:56 +0530 Subject: [PATCH 157/172] feat: initialize ad-blocker --- maxun-core/src/interpret.ts | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 101a9d61..e1dc5bbe 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -81,6 +81,12 @@ export default class Interpreter extends EventEmitter { oldLog(...args); }; } + + PlaywrightBlocker.fromPrebuiltAdsAndTracking(fetch).then((blocker) => { + this.blocker = blocker; + }).catch((err) => { + this.log(`Failed to initialize ad-blocker:`, Level.ERROR); + }) } /** From b8679b356c8747979a54cd08c443a7cfc72651e0 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 21 Aug 2024 05:11:28 +0530 Subject: [PATCH 158/172] fix: remove extra () --- maxun-core/src/interpret.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index e1dc5bbe..2f20bcc1 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -82,9 +82,9 @@ export default class Interpreter extends EventEmitter { }; } - PlaywrightBlocker.fromPrebuiltAdsAndTracking(fetch).then((blocker) => { + PlaywrightBlocker.fromPrebuiltAdsAndTracking(fetch).then(blocker => { this.blocker = blocker; - }).catch((err) => { + }).catch(err => { this.log(`Failed to initialize ad-blocker:`, Level.ERROR); }) } From cdb7ee0429bb359630b3b1974212a717f75b9e46 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 21 Aug 2024 05:14:58 +0530 Subject: [PATCH 159/172] feat: apply ad blocker in page --- maxun-core/src/interpret.ts | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 2f20bcc1..ff0607f8 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -89,6 +89,12 @@ export default class Interpreter extends EventEmitter { }) } + private async applyAdBlocker(page: Page): Promise { + if (this.blocker) { + await this.blocker.enableBlockingInPage(page); + } + } + /** * Returns the context object from given Page and the current workflow.\ * \ From 1b06d8c00f22780321eba52c5a82a8464ff06ff7 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 21 Aug 2024 05:16:15 +0530 Subject: [PATCH 160/172] feat: apply ad-blocker before running loop --- maxun-core/src/interpret.ts | 2 ++ 1 file changed, 2 insertions(+) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index ff0607f8..f314ae26 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -460,6 +460,8 @@ export default class Interpreter extends EventEmitter { } private async runLoop(p: Page, workflow: Workflow) { + // apply ad-blocker to the current page + await this.applyAdBlocker(p); const usedActions: string[] = []; let lastAction = null; let repeatCount = 0; From 130f59a7c9882fd714b0549cbcaf16b44e74d896 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 21 Aug 2024 05:24:41 +0530 Subject: [PATCH 161/172] feat: disable ad blocker --- maxun-core/src/interpret.ts | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index f314ae26..994346ea 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -95,6 +95,13 @@ export default class Interpreter extends EventEmitter { } } + private async disableAdBlocker(page: Page): Promise { + if (this.blocker) { + await this.blocker.disableBlockingInPage(page); + } +} + + /** * Returns the context object from given Page and the current workflow.\ * \ From 321bd01704b9a193ae08bfd98d853b7e728474b6 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 21 Aug 2024 05:24:52 +0530 Subject: [PATCH 162/172] chore: lint --- maxun-core/src/interpret.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 994346ea..e2acfa96 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -97,9 +97,9 @@ export default class Interpreter extends EventEmitter { private async disableAdBlocker(page: Page): Promise { if (this.blocker) { - await this.blocker.disableBlockingInPage(page); + await this.blocker.disableBlockingInPage(page); } -} + } /** From 57b56d3fcbf6a6f823ed24388525821418c15b14 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 21 Aug 2024 05:25:30 +0530 Subject: [PATCH 163/172] feat: disable ad-blocker once runLoop closes --- maxun-core/src/interpret.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index e2acfa96..876dd2bb 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -534,6 +534,7 @@ export default class Interpreter extends EventEmitter { this.log(e, Level.ERROR); } } else { + await this.disableAdBlocker(p); return; } } From 6eb1d94dea4f5a6245fd4c608764114612f0cf11 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 21 Aug 2024 05:25:54 +0530 Subject: [PATCH 164/172] fix: remove fullLists & Request --- maxun-core/src/interpret.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 876dd2bb..17a7143b 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -1,6 +1,6 @@ /* eslint-disable no-await-in-loop, no-restricted-syntax */ import { Page, PageScreenshotOptions } from 'playwright'; -import { fullLists, PlaywrightBlocker, Request } from '@cliqz/adblocker-playwright'; +import { PlaywrightBlocker } from '@cliqz/adblocker-playwright'; import fetch from 'cross-fetch'; import path from 'path'; From a07e9b69696c6c624674e23b9b6bb31cd380a9db Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 21 Aug 2024 05:26:39 +0530 Subject: [PATCH 165/172] fix: remove old scrapeList implementation --- maxun-core/src/interpret.ts | 6 ------ 1 file changed, 6 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 17a7143b..db8d2970 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -101,7 +101,6 @@ export default class Interpreter extends EventEmitter { } } - /** * Returns the context object from given Page and the current workflow.\ * \ @@ -314,11 +313,6 @@ export default class Interpreter extends EventEmitter { await this.options.serializableCallback(scrapeResult); }, - // scrapeList: async (config: { listSelector: string, fields: any, limit?: number, pagination: any }) => { - // await this.ensureScriptsLoaded(page); - // const scrapeResults: Record[] = await page.evaluate((cfg) => window.scrapeList(cfg), config); - // await this.options.serializableCallback(scrapeResults); - // }, scrapeList: async (config: { listSelector: string, fields: any, limit?: number, pagination: any }) => { await this.ensureScriptsLoaded(page); From 928dc6141bdafbe3d57f5d82b19148baa0a77991 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 21 Aug 2024 05:27:16 +0530 Subject: [PATCH 166/172] fix: -rm currentPage var --- maxun-core/src/interpret.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index db8d2970..ecbce5dc 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -385,11 +385,9 @@ export default class Interpreter extends EventEmitter { } } - private async handlePagination(page: Page, config: { listSelector: string, fields: any, limit?: number, pagination: any }) { let allResults: Record[] = []; let previousHeight = 0; - let currentPage = 1; // track unique items per page to avoid re-scraping let scrapedItems: Set = new Set(); From f8346f89858344939e117a4401dd5ff5e3ced121 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 21 Aug 2024 05:27:37 +0530 Subject: [PATCH 167/172] chore: lint --- maxun-core/src/interpret.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index ecbce5dc..61c781dc 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -313,7 +313,6 @@ export default class Interpreter extends EventEmitter { await this.options.serializableCallback(scrapeResult); }, - scrapeList: async (config: { listSelector: string, fields: any, limit?: number, pagination: any }) => { await this.ensureScriptsLoaded(page); if (!config.pagination) { From dd12d136af1bfbe774bc84ac4415b5322a6a1fd0 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 21 Aug 2024 05:29:41 +0530 Subject: [PATCH 168/172] fix: format --- maxun-core/src/preprocessor.ts | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/maxun-core/src/preprocessor.ts b/maxun-core/src/preprocessor.ts index 9ad15c2a..89fbb0c4 100644 --- a/maxun-core/src/preprocessor.ts +++ b/maxun-core/src/preprocessor.ts @@ -9,6 +9,9 @@ import { operators } from './types/logic'; */ export default class Preprocessor { static validateWorkflow(workflow: WorkflowFile): any { + + console.log(`Workflow from preprocessor: ${workflow}`) + const regex = Joi.object({ $regex: Joi.string().required(), }); @@ -46,11 +49,11 @@ export default class Preprocessor { return error; } -/** -* Extracts parameter names from the workflow. -* @param {WorkflowFile} workflow The given workflow -* @returns {String[]} List of parameters' names. -*/ + /** + * Extracts parameter names from the workflow. + * @param {WorkflowFile} workflow The given workflow + * @returns {String[]} List of parameters' names. + */ static getParams(workflow: WorkflowFile): string[] { const getParamsRecurse = (object: any): string[] => { if (typeof object === 'object') { @@ -69,10 +72,10 @@ export default class Preprocessor { return getParamsRecurse(workflow.workflow); } -/** -* List all the selectors used in the given workflow (only literal "selector" -* field in WHERE clauses so far) -*/ + /** + * List all the selectors used in the given workflow (only literal "selector" + * field in WHERE clauses so far) + */ // TODO : add recursive selector search (also in click/fill etc. events?) static extractSelectors(workflow: Workflow): SelectorArray { /** @@ -107,11 +110,11 @@ export default class Preprocessor { ], []); } -/** -* Recursively crawl `object` and initializes params - replaces the `{$param : paramName}` objects -* with the defined value. -* @returns {Workflow} Copy of the given workflow, modified (the initial workflow is left untouched). -*/ + /** + * Recursively crawl `object` and initializes params - replaces the `{$param : paramName}` objects + * with the defined value. + * @returns {Workflow} Copy of the given workflow, modified (the initial workflow is left untouched). + */ static initWorkflow(workflow: Workflow, params?: ParamType): Workflow { const paramNames = this.getParams({ workflow }); From c2fe629267f3e7d4f9d31c6365e3954121334f1e Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 21 Aug 2024 05:30:21 +0530 Subject: [PATCH 169/172] fix: remove log of workflow from preprocessor --- maxun-core/src/preprocessor.ts | 3 --- 1 file changed, 3 deletions(-) diff --git a/maxun-core/src/preprocessor.ts b/maxun-core/src/preprocessor.ts index 89fbb0c4..7c31004e 100644 --- a/maxun-core/src/preprocessor.ts +++ b/maxun-core/src/preprocessor.ts @@ -9,9 +9,6 @@ import { operators } from './types/logic'; */ export default class Preprocessor { static validateWorkflow(workflow: WorkflowFile): any { - - console.log(`Workflow from preprocessor: ${workflow}`) - const regex = Joi.object({ $regex: Joi.string().required(), }); From 2bc9f70e092db38086b4ef6df6db8dd4a64ccf25 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 21 Aug 2024 05:55:05 +0530 Subject: [PATCH 170/172] fix(temporary): do not disable ad blocker manually --- maxun-core/src/interpret.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 61c781dc..4068f7be 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -525,7 +525,7 @@ export default class Interpreter extends EventEmitter { this.log(e, Level.ERROR); } } else { - await this.disableAdBlocker(p); + //await this.disableAdBlocker(p); return; } } From 044513f54d0a65d0b5179ecc88a3746eaeedbae2 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 21 Aug 2024 21:14:42 +0530 Subject: [PATCH 171/172] feat: --- src/pages/RecordingPage.tsx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/pages/RecordingPage.tsx b/src/pages/RecordingPage.tsx index 45b422b2..fa5c3a14 100644 --- a/src/pages/RecordingPage.tsx +++ b/src/pages/RecordingPage.tsx @@ -1,6 +1,7 @@ import React, { useCallback, useEffect, useState } from 'react'; import { Grid } from '@mui/material'; import { BrowserContent } from "../components/organisms/BrowserContent"; +import { InterpretationLog } from "../components/molecules/InterpretationLog"; import { startRecording, getActiveBrowserId } from "../api/recording"; import { LeftSidePanel } from "../components/organisms/LeftSidePanel"; import { RightSidePanel } from "../components/organisms/RightSidePanel"; @@ -121,6 +122,7 @@ export const RecordingPage = ({ recordingName }: RecordingPageProps) => { + From c90b80e9ae504910a393d3be205d7c9b84050e4d Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 21 Aug 2024 21:15:59 +0530 Subject: [PATCH 172/172] chore: lint --- src/components/molecules/InterpretationLog.tsx | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/components/molecules/InterpretationLog.tsx b/src/components/molecules/InterpretationLog.tsx index 18fed72b..02592511 100644 --- a/src/components/molecules/InterpretationLog.tsx +++ b/src/components/molecules/InterpretationLog.tsx @@ -27,7 +27,7 @@ export const InterpretationLog = () => { } const handleLog = useCallback((msg: string, date: boolean = true) => { - if (!date){ + if (!date) { setLog((prevState) => prevState + '\n' + msg); } else { setLog((prevState) => prevState + '\n' + `[${new Date().toLocaleString()}] ` + msg); @@ -42,9 +42,9 @@ export const InterpretationLog = () => { scrollLogToBottom(); }, [log, scrollLogToBottom]) - const handleBinaryCallback = useCallback(({data, mimetype}: any) => { + const handleBinaryCallback = useCallback(({ data, mimetype }: any) => { setLog((prevState) => - prevState + '\n' + '---------- Binary output data received ----------' + '\n' + prevState + '\n' + '---------- Binary output data received ----------' + '\n' + `mimetype: ${mimetype}` + '\n' + `data: ${JSON.stringify(data)}` + '\n' + '------------------------------------------------'); scrollLogToBottom(); @@ -66,10 +66,10 @@ export const InterpretationLog = () => { } + expandIcon={} aria-controls="panel1bh-content" id="panel1bh-header" > @@ -88,8 +88,8 @@ export const InterpretationLog = () => { {log} -
+