diff --git a/maxun-core/package.json b/maxun-core/package.json index 2fa29f22..484ac6f9 100644 --- a/maxun-core/package.json +++ b/maxun-core/package.json @@ -23,7 +23,11 @@ "author": "Karishma Shukla", "license": "MIT", "dependencies": { + "@cliqz/adblocker-playwright": "^1.31.3", + "cross-fetch": "^4.0.0", "joi": "^17.6.0", - "playwright": "^1.20.1" + "playwright": "^1.20.1", + "playwright-extra": "^4.3.6", + "puppeteer-extra-plugin-stealth": "^2.11.2" } } diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index 262e63ec..99c8ee33 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -126,6 +126,85 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, return out; } +async function scrollDownToLoadMore(selector, limit) { + let previousHeight = 0; + let itemsLoaded = 0; + + while (itemsLoaded < limit) { + window.scrollBy(0, window.innerHeight); + await new Promise(resolve => setTimeout(resolve, 1000)); + + const currentHeight = document.body.scrollHeight; + + if (currentHeight === previousHeight) { + break; // No more items to load + } + + previousHeight = currentHeight; + itemsLoaded += document.querySelectorAll(selector).length; + } +} + +async function scrollUpToLoadMore(selector, limit) { + let previousHeight = 0; + let itemsLoaded = 0; + + while (itemsLoaded < limit) { + window.scrollBy(0, -window.innerHeight); + await new Promise(resolve => setTimeout(resolve, 1000)); + + const currentHeight = document.body.scrollHeight; + + if (currentHeight === previousHeight) { + break; // No more items to load + } + + previousHeight = currentHeight; + itemsLoaded += document.querySelectorAll(selector).length; + } +} + +async function clickNextPagination(selector, scrapedData, limit) { + // Check if the limit is already met + if (scrapedData.length >= limit) { + return false; // Return false to indicate no further action is needed + } + + // Check if a single "Next" button exists + let nextButton = document.querySelector(selector); + + if (nextButton) { + nextButton.click(); + return true; // Indicate that pagination occurred + } else { + // Handle pagination with numbers + const paginationButtons = document.querySelectorAll(selector); + let clicked = false; + + // Loop through pagination buttons to find the current active page + for (let i = 0; i < paginationButtons.length - 1; i++) { + const button = paginationButtons[i]; + if (button.classList.contains('active')) { + // Click the next button if available + const nextButtonInPagination = paginationButtons[i + 1]; + if (nextButtonInPagination) { + nextButtonInPagination.click(); + clicked = true; + break; + } + } + } + + // If no next button was clicked, we might be on the last page + if (!clicked) { + throw new Error("No more items to load or pagination has ended."); + } + + return clicked; // Indicate whether pagination occurred + } +} + + /** * Returns a "scrape" result from the current page. * @returns {Array} *Curated* array of scraped information (with sparse rows removed) @@ -183,6 +262,7 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, }; /** + * TODO: Simplify. * Given an object with named lists of elements, * groups the elements by their distance in the DOM tree. * @param {Object.} lists The named lists of HTML elements. @@ -250,4 +330,134 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, )); } + /** + * Scrapes multiple lists of similar items based on a template item. + * @param {Object} config - Configuration object + * @param {string} config.listSelector - Selector for the list container(s) + * @param {Object.} config.fields - Fields to scrape + * @param {number} [config.limit] - Maximum number of items to scrape per list (optional) + * @param {boolean} [config.flexible=false] - Whether to use flexible matching for field selectors + * @returns {Array.>} Array of arrays of scraped items, one sub-array per list + */ + window.scrapeList = async function ({ listSelector, fields, limit = 10 }) { + const scrapedData = []; + + while (scrapedData.length < limit) { + // Get all parent elements matching the listSelector + const parentElements = Array.from(document.querySelectorAll(listSelector)); + + // Iterate through each parent element + for (const parent of parentElements) { + if (scrapedData.length >= limit) break; + const record = {}; + + // For each field, select the corresponding element within the parent + for (const [label, { selector, attribute }] of Object.entries(fields)) { + const fieldElement = parent.querySelector(selector); + + if (fieldElement) { + if (attribute === 'innerText') { + record[label] = fieldElement.innerText.trim(); + } else if (attribute === 'innerHTML') { + record[label] = fieldElement.innerHTML.trim(); + } else if (attribute === 'src') { + record[label] = fieldElement.src; + } else if (attribute === 'href') { + record[label] = fieldElement.href; + } else { + record[label] = fieldElement.getAttribute(attribute); + } + } + } + scrapedData.push(record); + } + } + return scrapedData + }; + + + /** + * Gets all children of the elements matching the listSelector, + * returning their CSS selectors and innerText. + * @param {string} listSelector - Selector for the list container(s) + * @returns {Array.} Array of objects, each containing the CSS selector and innerText of the children + */ + window.scrapeListAuto = function (listSelector) { + const lists = Array.from(document.querySelectorAll(listSelector)); + + const results = []; + + lists.forEach(list => { + const children = Array.from(list.children); + + children.forEach(child => { + const selectors = []; + let element = child; + + // Traverse up to gather the CSS selector for the element + while (element && element !== document) { + let selector = element.nodeName.toLowerCase(); + if (element.id) { + selector += `#${element.id}`; + selectors.push(selector); + break; + } else { + const className = element.className.trim().split(/\s+/).join('.'); + if (className) { + selector += `.${className}`; + } + selectors.push(selector); + element = element.parentElement; + } + } + + results.push({ + selector: selectors.reverse().join(' > '), + innerText: child.innerText.trim() + }); + }); + }); + + return results; + }; + + + window.scrollDown = async function (selector, limit) { + let previousHeight = 0; + let itemsLoaded = 0; + + while (itemsLoaded < limit) { + window.scrollTo(0, document.body.scrollHeight); + await new Promise(resolve => setTimeout(resolve, 1000)); + + const currentHeight = document.body.scrollHeight; + + if (currentHeight === previousHeight) { + break; // No more items to load + } + + previousHeight = currentHeight; + itemsLoaded += document.querySelectorAll(selector).length; + } + } + + window.scrollUp = async function (selector, limit) { + let previousHeight = 0; + let itemsLoaded = 0; + + while (itemsLoaded < limit) { + window.scrollBy(0, -window.innerHeight); + await new Promise(resolve => setTimeout(resolve, 1000)); + + const currentHeight = document.body.scrollHeight; + + if (currentHeight === previousHeight) { + break; // No more items to load + } + + previousHeight = currentHeight; + itemsLoaded += document.querySelectorAll(selector).length; + } + } + })(window); \ No newline at end of file diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 4860d2fd..4068f7be 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -1,5 +1,7 @@ /* eslint-disable no-await-in-loop, no-restricted-syntax */ import { Page, PageScreenshotOptions } from 'playwright'; +import { PlaywrightBlocker } from '@cliqz/adblocker-playwright'; +import fetch from 'cross-fetch'; import path from 'path'; import { EventEmitter } from 'events'; @@ -29,6 +31,7 @@ interface InterpreterOptions { }> } + /** * Class for running the Smart Workflows. */ @@ -45,6 +48,8 @@ export default class Interpreter extends EventEmitter { private log: typeof log; + private blocker: PlaywrightBlocker | null = null; + constructor(workflow: WorkflowFile, options?: Partial) { super(); this.workflow = workflow.workflow; @@ -76,6 +81,24 @@ export default class Interpreter extends EventEmitter { oldLog(...args); }; } + + PlaywrightBlocker.fromPrebuiltAdsAndTracking(fetch).then(blocker => { + this.blocker = blocker; + }).catch(err => { + this.log(`Failed to initialize ad-blocker:`, Level.ERROR); + }) + } + + private async applyAdBlocker(page: Page): Promise { + if (this.blocker) { + await this.blocker.enableBlockingInPage(page); + } + } + + private async disableAdBlocker(page: Page): Promise { + if (this.blocker) { + await this.blocker.disableBlockingInPage(page); + } } /** @@ -285,11 +308,32 @@ export default class Interpreter extends EventEmitter { scrapeSchema: async (schema: Record) => { await this.ensureScriptsLoaded(page); - + const scrapeResult = await page.evaluate((schemaObj) => window.scrapeSchema(schemaObj), schema); await this.options.serializableCallback(scrapeResult); }, + scrapeList: async (config: { listSelector: string, fields: any, limit?: number, pagination: any }) => { + await this.ensureScriptsLoaded(page); + if (!config.pagination) { + const scrapeResults: Record[] = await page.evaluate((cfg) => window.scrapeList(cfg), config); + await this.options.serializableCallback(scrapeResults); + } else { + const scrapeResults: Record[] = await this.handlePagination(page, config); + await this.options.serializableCallback(scrapeResults); + } + }, + + scrapeListAuto: async (config: { listSelector: string }) => { + await this.ensureScriptsLoaded(page); + + const scrapeResults: { selector: string, innerText: string }[] = await page.evaluate((listSelector) => { + return window.scrapeListAuto(listSelector); + }, config.listSelector); + + await this.options.serializableCallback(scrapeResults); + }, + scroll: async (pages?: number) => { await page.evaluate(async (pagesInternal) => { for (let i = 1; i <= (pagesInternal ?? 1); i += 1) { @@ -298,6 +342,7 @@ export default class Interpreter extends EventEmitter { } }, pages ?? 1); }, + script: async (code: string) => { const AsyncFunction: FunctionConstructor = Object.getPrototypeOf( async () => { }, @@ -305,6 +350,7 @@ export default class Interpreter extends EventEmitter { const x = new AsyncFunction('page', 'log', code); await x(page, this.log); }, + flag: async () => new Promise((res) => { this.emit('flag', page, res); }), @@ -338,7 +384,82 @@ export default class Interpreter extends EventEmitter { } } + private async handlePagination(page: Page, config: { listSelector: string, fields: any, limit?: number, pagination: any }) { + let allResults: Record[] = []; + let previousHeight = 0; + // track unique items per page to avoid re-scraping + let scrapedItems: Set = new Set(); + + while (true) { + switch (config.pagination.type) { + case 'scrollDown': + await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight)); + await page.waitForTimeout(2000); + + const currentHeight = await page.evaluate(() => document.body.scrollHeight); + if (currentHeight === previousHeight) { + const finalResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); + allResults = allResults.concat(finalResults); + return allResults; + } + + previousHeight = currentHeight; + break; + case 'scrollUp': + break; + case 'clickNext': + const pageResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); + + // Filter out already scraped items + const newResults = pageResults.filter(item => { + const uniqueKey = JSON.stringify(item); + if (scrapedItems.has(uniqueKey)) return false; // Ignore if already scraped + scrapedItems.add(uniqueKey); // Mark as scraped + return true; + }); + + allResults = allResults.concat(newResults); + + if (config.limit && allResults.length >= config.limit) { + return allResults.slice(0, config.limit); + } + + const nextButton = await page.$(config.pagination.selector); + if (!nextButton) { + return allResults; // No more pages to scrape + } + await Promise.all([ + nextButton.click(), + page.waitForNavigation({ waitUntil: 'networkidle' }) + ]); + + await page.waitForTimeout(1000); + break; + case 'clickLoadMore': + const loadMoreButton = await page.$(config.pagination.selector); + if (!loadMoreButton) { + return allResults; + } + await loadMoreButton.click(); + break; + default: + const results = await page.evaluate((cfg) => window.scrapeList(cfg), config); + allResults = allResults.concat(results); + return allResults; + } + + if (config.limit && allResults.length >= config.limit) { + allResults = allResults.slice(0, config.limit); + break; + } + } + + return allResults; + } + private async runLoop(p: Page, workflow: Workflow) { + // apply ad-blocker to the current page + await this.applyAdBlocker(p); const usedActions: string[] = []; let lastAction = null; let repeatCount = 0; @@ -404,13 +525,14 @@ export default class Interpreter extends EventEmitter { this.log(e, Level.ERROR); } } else { + //await this.disableAdBlocker(p); return; } } } private async ensureScriptsLoaded(page: Page) { - const isScriptLoaded = await page.evaluate(() => typeof window.scrape === 'function' && typeof window.scrapeSchema === 'function'); + const isScriptLoaded = await page.evaluate(() => typeof window.scrape === 'function' && typeof window.scrapeSchema === 'function' && typeof window.scrapeList === 'function' && typeof window.scrapeListAuto === 'function' && typeof window.scrollDown === 'function' && typeof window.scrollUp === 'function'); if (!isScriptLoaded) { await page.addInitScript({ path: path.join(__dirname, 'browserSide', 'scraper.js') }); } diff --git a/maxun-core/src/preprocessor.ts b/maxun-core/src/preprocessor.ts index 9ad15c2a..7c31004e 100644 --- a/maxun-core/src/preprocessor.ts +++ b/maxun-core/src/preprocessor.ts @@ -46,11 +46,11 @@ export default class Preprocessor { return error; } -/** -* Extracts parameter names from the workflow. -* @param {WorkflowFile} workflow The given workflow -* @returns {String[]} List of parameters' names. -*/ + /** + * Extracts parameter names from the workflow. + * @param {WorkflowFile} workflow The given workflow + * @returns {String[]} List of parameters' names. + */ static getParams(workflow: WorkflowFile): string[] { const getParamsRecurse = (object: any): string[] => { if (typeof object === 'object') { @@ -69,10 +69,10 @@ export default class Preprocessor { return getParamsRecurse(workflow.workflow); } -/** -* List all the selectors used in the given workflow (only literal "selector" -* field in WHERE clauses so far) -*/ + /** + * List all the selectors used in the given workflow (only literal "selector" + * field in WHERE clauses so far) + */ // TODO : add recursive selector search (also in click/fill etc. events?) static extractSelectors(workflow: Workflow): SelectorArray { /** @@ -107,11 +107,11 @@ export default class Preprocessor { ], []); } -/** -* Recursively crawl `object` and initializes params - replaces the `{$param : paramName}` objects -* with the defined value. -* @returns {Workflow} Copy of the given workflow, modified (the initial workflow is left untouched). -*/ + /** + * Recursively crawl `object` and initializes params - replaces the `{$param : paramName}` objects + * with the defined value. + * @returns {Workflow} Copy of the given workflow, modified (the initial workflow is left untouched). + */ static initWorkflow(workflow: Workflow, params?: ParamType): Workflow { const paramNames = this.getParams({ workflow }); diff --git a/maxun-core/src/types/workflow.ts b/maxun-core/src/types/workflow.ts index 36c6d14d..f7cf180d 100644 --- a/maxun-core/src/types/workflow.ts +++ b/maxun-core/src/types/workflow.ts @@ -28,7 +28,7 @@ type MethodNames = { [K in keyof T]: T[K] extends Function ? K : never; }[keyof T]; -export type CustomFunctions = 'scrape' | 'scrapeSchema' | 'scroll' | 'screenshot' | 'script' | 'enqueueLinks' | 'flag'; +export type CustomFunctions = 'scrape' | 'scrapeSchema' | 'scroll' | 'screenshot' | 'script' | 'enqueueLinks' | 'flag' | 'scrapeList' | 'scrapeListAuto'; export type What = { action: MethodNames | CustomFunctions, diff --git a/server/src/workflow-management/classes/Generator.ts b/server/src/workflow-management/classes/Generator.ts index ae2d2016..a7b5a641 100644 --- a/server/src/workflow-management/classes/Generator.ts +++ b/server/src/workflow-management/classes/Generator.ts @@ -7,6 +7,7 @@ import { getElementInformation, getRect, getSelectors, + getNonUniqueSelectors, isRuleOvershadowing, selectorAlreadyInWorkflow } from "../selector"; @@ -46,6 +47,12 @@ export class WorkflowGenerator { */ private socket: Socket; + /** + * getList is one of the custom actions from maxun-core. + * Used to provide appropriate selectors for the getList action. + */ + private getList: boolean = false; + /** * The public constructor of the WorkflowGenerator. * Takes socket for communication as a parameter and registers some important events on it. @@ -55,6 +62,7 @@ export class WorkflowGenerator { public constructor(socket: Socket) { this.socket = socket; this.registerEventHandlers(socket); + this.initializeSocketListeners(); } /** @@ -88,6 +96,15 @@ export class WorkflowGenerator { lastAction: '', } + /** + * Initializes the socket listeners for the generator. + */ + private initializeSocketListeners() { + this.socket.on('setGetList', (data: { getList: boolean }) => { + this.getList = data.getList; + }); + } + /** * Registers the event handlers for all generator-related events on the socket. * @param socket The socket used to communicate with the client. @@ -459,13 +476,17 @@ export class WorkflowGenerator { */ private generateSelector = async (page: Page, coordinates: Coordinates, action: ActionType) => { const elementInfo = await getElementInformation(page, coordinates); + + const selectorBasedOnCustomAction = (this.getList === true) + ? await getNonUniqueSelectors(page, coordinates) + : await getSelectors(page, coordinates); const bestSelector = getBestSelectorForAction( { type: action, tagName: elementInfo?.tagName as TagName || '', inputType: undefined, value: undefined, - selectors: await getSelectors(page, coordinates) || {}, + selectors: selectorBasedOnCustomAction || {}, timestamp: 0, isPassword: false, hasOnlyText: elementInfo?.hasOnlyText || false, @@ -488,6 +509,8 @@ export class WorkflowGenerator { if (rect) { this.socket.emit('highlighter', { rect, selector: displaySelector, elementInfo }); } + // reset getList after usage + this.getList = false; } /** diff --git a/server/src/workflow-management/selector.ts b/server/src/workflow-management/selector.ts index b94ed2d1..00a19f40 100644 --- a/server/src/workflow-management/selector.ts +++ b/server/src/workflow-management/selector.ts @@ -98,20 +98,20 @@ export const getElementInformation = async ( { x: coordinates.x, y: coordinates.y }, ); - if (elementInfo) { - if (elementInfo.tagName === 'A') { - if (elementInfo.innerText) { - console.log(`Link text: ${elementInfo.innerText}, URL: ${elementInfo.url}`); - } else { - console.log(`URL: ${elementInfo.url}`); - } - } else if (elementInfo.tagName === 'IMG') { - console.log(`Image URL: ${elementInfo.imageUrl}`); - } else { - console.log(`Element innerText: ${elementInfo.innerText}`); - } - } - + // if (elementInfo) { + // if (elementInfo.tagName === 'A') { + // if (elementInfo.innerText) { + // console.log(`Link text: ${elementInfo.innerText}, URL: ${elementInfo.url}`); + // } else { + // console.log(`URL: ${elementInfo.url}`); + // } + // } else if (elementInfo.tagName === 'IMG') { + // console.log(`Image URL: ${elementInfo.imageUrl}`); + // } else { + // console.log(`Element innerText: ${elementInfo.innerText}`); + // } + // } + return elementInfo; } catch (error) { const { message, stack } = error as Error; @@ -721,6 +721,66 @@ export const getSelectors = async (page: Page, coordinates: Coordinates) => { return null; }; + +/** + * Returns the best non-unique css {@link Selectors} for the element on the page. + * @param page The page instance. + * @param coordinates Coordinates of an element. + * @category WorkflowManagement-Selectors + * @returns {Promise} + */ + +export const getNonUniqueSelectors = async (page: Page, coordinates: Coordinates) => { + try { + const selectors = await page.evaluate(({ x, y }: { x: number, y: number }) => { + + function getNonUniqueSelector(element: HTMLElement): string { + let selector = element.tagName.toLowerCase(); + + // Avoid using IDs to maintain non-uniqueness + if (element.className) { + const classes = element.className.split(/\s+/).filter((cls: string) => Boolean(cls)); + if (classes.length > 0) { + // Exclude utility classes and escape special characters + const validClasses = classes.filter((cls: string) => !cls.startsWith('!') && !cls.includes(':')); + if (validClasses.length > 0) { + selector += '.' + validClasses.map(cls => CSS.escape(cls)).join('.'); + } + } + } + + return selector; + } + + function getSelectorPath(element: HTMLElement | null): string { + const path: string[] = []; + while (element && element !== document.body) { + const selector = getNonUniqueSelector(element); + path.unshift(selector); + element = element.parentElement; + } + return path.join(' > '); + } + + const element = document.elementFromPoint(x, y) as HTMLElement | null; + if (!element) return null; + + const generalSelector = getSelectorPath(element); + return { + generalSelector, + }; + }, coordinates); + + return selectors || {}; + } catch (error) { + console.error('Error in getNonUniqueSelectors:', error); + return {}; + } +}; + + + + /** * Returns the first pair from the given workflow that contains the given selector * inside the where condition, and it is the only selector there. diff --git a/src/components/atoms/Highlighter.tsx b/src/components/atoms/Highlighter.tsx index 34fb8f47..66d09327 100644 --- a/src/components/atoms/Highlighter.tsx +++ b/src/components/atoms/Highlighter.tsx @@ -24,10 +24,6 @@ export const Highlighter = ({ unmodifiedRect, displayedSelector = '', width, hei }; - //console.log('unmodifiedRect:', unmodifiedRect) - //console.log('rectangle:', rect) - //console.log('canvas rectangle:', canvasRect) - return (
` pointer-events: none !important; position: fixed !important; background: #ff5d5b26 !important; - outline: 4px solid pink !important; + outline: 4px solid red !important; //border: 4px solid #ff5d5b !important; z-index: 2147483647 !important; //border-radius: 5px; diff --git a/src/components/atoms/canvas.tsx b/src/components/atoms/canvas.tsx index f6e6fb1c..3e9d89c0 100644 --- a/src/components/atoms/canvas.tsx +++ b/src/components/atoms/canvas.tsx @@ -27,8 +27,9 @@ const Canvas = ({ width, height, onCreateRef }: CanvasProps) => { const canvasRef = useRef(null); const { socket } = useSocketStore(); const { setLastAction, lastAction } = useGlobalInfoStore(); - const { getText, getScreenshot } = useActionContext(); + const { getText, getList } = useActionContext(); const getTextRef = useRef(getText); + const getListRef = useRef(getList); const notifyLastAction = (action: string) => { if (lastAction !== action) { @@ -40,7 +41,8 @@ const Canvas = ({ width, height, onCreateRef }: CanvasProps) => { useEffect(() => { getTextRef.current = getText; - }, [getText]); + getListRef.current = getList; + }, [getText, getList]); const onMouseEvent = useCallback((event: MouseEvent) => { if (socket) { @@ -51,8 +53,9 @@ const Canvas = ({ width, height, onCreateRef }: CanvasProps) => { switch (event.type) { case 'mousedown': const clickCoordinates = getMappedCoordinates(event, canvasRef.current, width, height); - if (getTextRef.current === true) { - console.log('get text') + if (getTextRef.current === true || getListRef.current === true) { + // todo: remove console.log and return + console.log('get text or get list is true'); } else { socket.emit('input:mousedown', clickCoordinates); } diff --git a/src/components/molecules/InterpretationLog.tsx b/src/components/molecules/InterpretationLog.tsx index 18fed72b..02592511 100644 --- a/src/components/molecules/InterpretationLog.tsx +++ b/src/components/molecules/InterpretationLog.tsx @@ -27,7 +27,7 @@ export const InterpretationLog = () => { } const handleLog = useCallback((msg: string, date: boolean = true) => { - if (!date){ + if (!date) { setLog((prevState) => prevState + '\n' + msg); } else { setLog((prevState) => prevState + '\n' + `[${new Date().toLocaleString()}] ` + msg); @@ -42,9 +42,9 @@ export const InterpretationLog = () => { scrollLogToBottom(); }, [log, scrollLogToBottom]) - const handleBinaryCallback = useCallback(({data, mimetype}: any) => { + const handleBinaryCallback = useCallback(({ data, mimetype }: any) => { setLog((prevState) => - prevState + '\n' + '---------- Binary output data received ----------' + '\n' + prevState + '\n' + '---------- Binary output data received ----------' + '\n' + `mimetype: ${mimetype}` + '\n' + `data: ${JSON.stringify(data)}` + '\n' + '------------------------------------------------'); scrollLogToBottom(); @@ -66,10 +66,10 @@ export const InterpretationLog = () => { } + expandIcon={} aria-controls="panel1bh-content" id="panel1bh-header" > @@ -88,8 +88,8 @@ export const InterpretationLog = () => { {log} -
+
diff --git a/src/components/organisms/BrowserWindow.tsx b/src/components/organisms/BrowserWindow.tsx index fd1589da..daf95fee 100644 --- a/src/components/organisms/BrowserWindow.tsx +++ b/src/components/organisms/BrowserWindow.tsx @@ -5,7 +5,7 @@ import { useBrowserDimensionsStore } from "../../context/browserDimensions"; import { Highlighter } from "../atoms/Highlighter"; import { GenericModal } from '../atoms/GenericModal'; import { useActionContext } from '../../context/browserActions'; -import { useBrowserSteps } from '../../context/browserSteps'; +import { useBrowserSteps, TextStep } from '../../context/browserSteps'; interface ElementInfo { tagName: string; @@ -45,10 +45,13 @@ export const BrowserWindow = () => { const [attributeOptions, setAttributeOptions] = useState([]); const [selectedElement, setSelectedElement] = useState<{ selector: string, info: ElementInfo | null } | null>(null); + const [listSelector, setListSelector] = useState(null); + const [fields, setFields] = useState>({}); + const { socket } = useSocketStore(); const { width, height } = useBrowserDimensionsStore(); - const { getText } = useActionContext(); - const { addTextStep } = useBrowserSteps(); + const { getText, getList } = useActionContext(); + const { addTextStep, addListStep } = useBrowserSteps(); const onMouseMove = (e: MouseEvent) => { if (canvasRef && canvasRef.current && highlighterData) { @@ -84,8 +87,11 @@ export const BrowserWindow = () => { }, [screenShot, canvasRef, socket, screencastHandler]); const highlighterHandler = useCallback((data: { rect: DOMRect, selector: string, elementInfo: ElementInfo | null }) => { + if (getList === true) { + socket?.emit('setGetList', { getList: true }); + } setHighlighterData(data); - }, [highlighterData]) + }, [highlighterData, getList, socket]); useEffect(() => { document.addEventListener('mousemove', onMouseMove, false); @@ -128,6 +134,45 @@ export const BrowserWindow = () => { }); } } + + if (getList === true && !listSelector) { + setListSelector(highlighterData.selector); + } else if (getList === true && listSelector) { + const options = getAttributeOptions(highlighterData.elementInfo?.tagName || ''); + if (options.length > 1) { + setAttributeOptions(options); + setSelectedElement({ + selector: highlighterData.selector, + info: highlighterData.elementInfo + }); + setShowAttributeModal(true); + } else { + const newField: TextStep = { + id: Date.now(), + type: 'text', + label: `Label ${Object.keys(fields).length + 1}`, + data: highlighterData.elementInfo?.innerText || '', + selectorObj: { + selector: highlighterData.selector, + tag: highlighterData.elementInfo?.tagName, + attribute: 'innerText' + } + }; + + setFields(prevFields => { + const updatedFields = { + ...prevFields, + [newField.label]: newField + }; + return updatedFields; + }); + + if (listSelector) { + addListStep(listSelector, { ...fields, [newField.label]: newField }); + } + } + + } } } }; @@ -153,6 +198,31 @@ export const BrowserWindow = () => { attribute: attribute }); } + if (getList === true) { + const newField: TextStep = { + id: Date.now(), + type: 'text', + label: `Label ${Object.keys(fields).length + 1}`, + data: selectedElement.info?.innerText || '', + selectorObj: { + selector: selectedElement.selector, + tag: selectedElement.info?.tagName, + attribute: attribute + } + }; + + setFields(prevFields => { + const updatedFields = { + ...prevFields, + [newField.label]: newField + }; + return updatedFields; + }); + + if (listSelector) { + addListStep(listSelector, { ...fields, [newField.label]: newField }); + } + } } } setShowAttributeModal(false); @@ -161,7 +231,7 @@ export const BrowserWindow = () => { return (
{ - getText === true ? ( + getText === true || getList === true ? ( { }} @@ -179,7 +249,7 @@ export const BrowserWindow = () => { ) : null } - {(getText === true && !showAttributeModal && highlighterData?.rect != null && highlighterData?.rect.top != null) && canvasRef?.current ? + {((getText === true || getList === true) && !showAttributeModal && highlighterData?.rect != null && highlighterData?.rect.top != null) && canvasRef?.current ? { const [textLabels, setTextLabels] = useState<{ [id: number]: string }>({}); @@ -20,7 +25,7 @@ export const RightSidePanel = () => { const [confirmedTextSteps, setConfirmedTextSteps] = useState<{ [id: number]: boolean }>({}); const { lastAction, notify } = useGlobalInfoStore(); - const { getText, startGetText, stopGetText, getScreenshot, startGetScreenshot, stopGetScreenshot } = useActionContext(); + const { getText, startGetText, stopGetText, getScreenshot, startGetScreenshot, stopGetScreenshot, getList, startGetList, stopGetList } = useActionContext(); const { browserSteps, updateBrowserTextStepLabel, deleteBrowserStep, addScreenshotStep } = useBrowserSteps(); const { socket } = useSocketStore(); @@ -80,6 +85,49 @@ export const RightSidePanel = () => { } }, [stopGetText, getTextSettingsObject, socket, browserSteps, confirmedTextSteps]); + + const getListSettingsObject = useCallback(() => { + let settings: { listSelector?: string; fields?: Record } = {}; + + browserSteps.forEach(step => { + if (step.type === 'list' && step.listSelector && Object.keys(step.fields).length > 0) { + const fields: Record = {}; + Object.entries(step.fields).forEach(([label, field]) => { + if (field.selectorObj?.selector) { + fields[label] = { + selector: field.selectorObj.selector, + tag: field.selectorObj.tag, + attribute: field.selectorObj.attribute + }; + } + }); + + settings = { + listSelector: step.listSelector, + fields: fields + }; + + } + }); + + return settings; + }, [browserSteps]); + + + const stopCaptureAndEmitGetListSettings = useCallback(() => { + stopGetList(); + const settings = getListSettingsObject(); + if (settings) { + socket?.emit('action', { action: 'scrapeList', settings }); + } else { + notify('error', 'Unable to create list settings. Make sure you have defined a field for the list.'); + } + }, [stopGetList, getListSettingsObject, socket, notify]); + + // const handleListFieldChange = (stepId: number, key: 'label' | 'data', value: string) => { + // updateListStepField(stepId, key, value); + // }; + const captureScreenshot = (fullPage: boolean) => { const screenshotSettings: ScreenshotSettings = { fullPage, @@ -101,7 +149,17 @@ export const RightSidePanel = () => { - {!getText && !getScreenshot && } + {!getText && !getScreenshot && !getList && } + {getList && + <> + + + + + + } + + {!getText && !getScreenshot && !getList && } {getText && <> @@ -111,7 +169,7 @@ export const RightSidePanel = () => { } - {!getText && !getScreenshot && } + {!getText && !getScreenshot && !getList && } {getScreenshot && ( @@ -125,7 +183,7 @@ export const RightSidePanel = () => { {browserSteps.map(step => ( { - step.type === 'text' ? ( + step.type === 'text' && ( <> { )} - ) : ( - step.type === 'screenshot' && ( - - - - {`Take ${step.fullPage ? 'Fullpage' : 'Visible Part'} Screenshot`} - + )} + {step.type === 'screenshot' && ( + + + + {`Take ${step.fullPage ? 'Fullpage' : 'Visible Part'} Screenshot`} + + + )} + {step.type === 'list' && ( + <> + List Selected Successfully + {Object.entries(step.fields).map(([key, field]) => ( + + { }} + fullWidth + margin="normal" + InputProps={{ + startAdornment: ( + + + + ) + }} + /> + + + + ) + }} + /> - ) - ) - } + ))} + + )} ))} ); -}; - -export const ActionDescription = styled.p` - margin-left: 15px; -`; +}; \ No newline at end of file diff --git a/src/context/browserActions.tsx b/src/context/browserActions.tsx index 50d86777..5a7c12b5 100644 --- a/src/context/browserActions.tsx +++ b/src/context/browserActions.tsx @@ -2,9 +2,12 @@ import React, { createContext, useContext, useState, ReactNode } from 'react'; interface ActionContextProps { getText: boolean; + getList: boolean; getScreenshot: boolean; startGetText: () => void; stopGetText: () => void; + startGetList: () => void; + stopGetList: () => void; startGetScreenshot: () => void; stopGetScreenshot: () => void; } @@ -13,16 +16,20 @@ const ActionContext = createContext(undefined); export const ActionProvider = ({ children }: { children: ReactNode }) => { const [getText, setGetText] = useState(false); + const [getList, setGetList] = useState(false); const [getScreenshot, setGetScreenshot] = useState(false); const startGetText = () => setGetText(true); const stopGetText = () => setGetText(false); + const startGetList = () => setGetList(true); + const stopGetList = () => setGetList(false); + const startGetScreenshot = () => setGetScreenshot(true); const stopGetScreenshot = () => setGetScreenshot(false); return ( - + {children} ); @@ -34,4 +41,4 @@ export const useActionContext = () => { throw new Error('useActionContext must be used within an ActionProvider'); } return context; -}; \ No newline at end of file +}; diff --git a/src/context/browserSteps.tsx b/src/context/browserSteps.tsx index e2984e53..59cdb546 100644 --- a/src/context/browserSteps.tsx +++ b/src/context/browserSteps.tsx @@ -1,6 +1,6 @@ import React, { createContext, useContext, useState } from 'react'; -interface TextStep { +export interface TextStep { id: number; type: 'text'; label: string; @@ -14,10 +14,16 @@ interface ScreenshotStep { fullPage: boolean; } +export interface ListStep { + id: number; + type: 'list'; + listSelector: string; + fields: { [key: string]: TextStep }; +} -type BrowserStep = TextStep | ScreenshotStep; +type BrowserStep = TextStep | ScreenshotStep | ListStep; -interface SelectorObject { +export interface SelectorObject { selector: string; tag?: string; attribute?: string; @@ -27,6 +33,7 @@ interface SelectorObject { interface BrowserStepsContextType { browserSteps: BrowserStep[]; addTextStep: (label: string, data: string, selectorObj: SelectorObject) => void; + addListStep: (listSelector: string, fields: { [key: string]: TextStep }) => void addScreenshotStep: (fullPage: boolean) => void; deleteBrowserStep: (id: number) => void; updateBrowserTextStepLabel: (id: number, newLabel: string) => void; @@ -44,6 +51,31 @@ export const BrowserStepsProvider: React.FC<{ children: React.ReactNode }> = ({ ]); }; + const addListStep = (listSelector: string, newFields: { [key: string]: TextStep }) => { + setBrowserSteps(prevSteps => { + const existingListStepIndex = prevSteps.findIndex( + step => step.type === 'list' && step.listSelector === listSelector + ); + if (existingListStepIndex !== -1) { + // Update the existing ListStep with new fields + const updatedSteps = [...prevSteps]; + const existingListStep = updatedSteps[existingListStepIndex] as ListStep; + updatedSteps[existingListStepIndex] = { + ...existingListStep, + fields: { ...existingListStep.fields, ...newFields } + }; + return updatedSteps; + } else { + // Create a new ListStep + return [ + ...prevSteps, + { id: Date.now(), type: 'list', listSelector, fields: newFields } + ]; + } + }); + }; + + const addScreenshotStep = (fullPage: boolean) => { setBrowserSteps(prevSteps => [ ...prevSteps, @@ -67,6 +99,7 @@ export const BrowserStepsProvider: React.FC<{ children: React.ReactNode }> = ({ { + diff --git a/src/shared/types.ts b/src/shared/types.ts index aa5f254e..0a259dea 100644 --- a/src/shared/types.ts +++ b/src/shared/types.ts @@ -23,4 +23,4 @@ export interface ScreenshotSettings { type?: "jpeg" | "png"; }; -export declare type CustomActions = 'scrape' | 'scrapeSchema' | 'scroll' | 'screenshot' | 'script' | 'enqueueLinks' | 'flag'; +export declare type CustomActions = 'scrape' | 'scrapeSchema' | 'scroll' | 'screenshot' | 'script' | 'enqueueLinks' | 'flag' | 'scrapeList' | 'scrapeListAuto';