From 4ffce7b922d604f7d0b900a7d13b1aec66231093 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Tue, 6 May 2025 15:42:09 +0530 Subject: [PATCH 1/8] feat: export browserstep, add update limit step type --- src/context/browserSteps.tsx | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/src/context/browserSteps.tsx b/src/context/browserSteps.tsx index fd311a35..d588d60d 100644 --- a/src/context/browserSteps.tsx +++ b/src/context/browserSteps.tsx @@ -26,7 +26,7 @@ export interface ListStep { limit?: number; } -type BrowserStep = TextStep | ScreenshotStep | ListStep; +export type BrowserStep = TextStep | ScreenshotStep | ListStep; export interface SelectorObject { selector: string; @@ -44,6 +44,7 @@ interface BrowserStepsContextType { deleteBrowserStep: (id: number) => void; updateBrowserTextStepLabel: (id: number, newLabel: string) => void; updateListTextFieldLabel: (listId: number, fieldKey: string, newLabel: string) => void; + updateListStepLimit: (listId: number, limit: number) => void; removeListTextField: (listId: number, fieldKey: string) => void; } @@ -142,6 +143,20 @@ export const BrowserStepsProvider: React.FC<{ children: React.ReactNode }> = ({ ); }; + const updateListStepLimit = (listId: number, limit: number) => { + setBrowserSteps(prevSteps => + prevSteps.map(step => { + if (step.type === 'list' && step.id === listId) { + return { + ...step, + limit: limit + }; + } + return step; + }) + ); + }; + const removeListTextField = (listId: number, fieldKey: string) => { setBrowserSteps(prevSteps => prevSteps.map(step => { @@ -166,6 +181,7 @@ export const BrowserStepsProvider: React.FC<{ children: React.ReactNode }> = ({ deleteBrowserStep, updateBrowserTextStepLabel, updateListTextFieldLabel, + updateListStepLimit, removeListTextField, }}> {children} From 6f047beb3d82c224ce8a65dba2faad12bfdd4600 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Tue, 6 May 2025 15:53:13 +0530 Subject: [PATCH 2/8] feat: update list step limit --- src/components/recorder/RightSidePanel.tsx | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/src/components/recorder/RightSidePanel.tsx b/src/components/recorder/RightSidePanel.tsx index 89a2c486..95201322 100644 --- a/src/components/recorder/RightSidePanel.tsx +++ b/src/components/recorder/RightSidePanel.tsx @@ -7,7 +7,7 @@ import { WorkflowFile } from "maxun-core"; import Typography from "@mui/material/Typography"; import { useGlobalInfoStore } from "../../context/globalInfo"; import { PaginationType, useActionContext, LimitType } from '../../context/browserActions'; -import { useBrowserSteps } from '../../context/browserSteps'; +import { BrowserStep, useBrowserSteps } from '../../context/browserSteps'; import { useSocketStore } from '../../context/socket'; import { ScreenshotSettings } from '../../shared/types'; import InputAdornment from '@mui/material/InputAdornment'; @@ -69,7 +69,7 @@ export const RightSidePanel: React.FC = ({ onFinishCapture startAction, finishAction } = useActionContext(); - const { browserSteps, updateBrowserTextStepLabel, deleteBrowserStep, addScreenshotStep, updateListTextFieldLabel, removeListTextField } = useBrowserSteps(); + const { browserSteps, updateBrowserTextStepLabel, deleteBrowserStep, addScreenshotStep, updateListTextFieldLabel, removeListTextField, updateListStepLimit } = useBrowserSteps(); const { id, socket } = useSocketStore(); const { t } = useTranslation(); @@ -349,6 +349,13 @@ export const RightSidePanel: React.FC = ({ onFinishCapture ) ); + const getLatestListStep = (steps: BrowserStep[]) => { + const listSteps = steps.filter(step => step.type === 'list'); + if (listSteps.length === 0) return null; + + return listSteps.sort((a, b) => b.id - a.id)[0]; + }; + const handleConfirmListCapture = useCallback(() => { switch (captureStage) { case 'initial': @@ -385,6 +392,12 @@ export const RightSidePanel: React.FC = ({ onFinishCapture notify('error', t('right_panel.errors.invalid_limit')); return; } + + const latestListStep = getLatestListStep(browserSteps); + if (latestListStep) { + updateListStepLimit(latestListStep.id, limit); + } + stopLimitMode(); setShowLimitOptions(false); setIsCaptureListConfirmed(true); From 1022ca6b67d472b59644f47727dde9a02a69eacb Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Wed, 7 May 2025 09:11:59 +0530 Subject: [PATCH 3/8] feat: emit extracted list data from remote browser --- .../classes/RemoteBrowser.ts | 555 ++++++++++++++++++ 1 file changed, 555 insertions(+) diff --git a/server/src/browser-management/classes/RemoteBrowser.ts b/server/src/browser-management/classes/RemoteBrowser.ts index 4ab159d4..8e250d23 100644 --- a/server/src/browser-management/classes/RemoteBrowser.ts +++ b/server/src/browser-management/classes/RemoteBrowser.ts @@ -427,6 +427,541 @@ export class RemoteBrowser { } }; + /** + * Extract data from a list of elements on a page + * @param page - Playwright Page object + * @param listSelector - CSS selector for the list container + * @param fields - Record of field configurations + * @param limit - Maximum number of items to extract (default: 5) + * @returns Promise>> - Array of extracted data objects + */ + private async extractListData( + page: Page, + listSelector: string, + fields: Record, + limit: number = 5 + ): Promise>> { + return await page.evaluate( + async ({ listSelector, fields, limit }: { + listSelector: string; + fields: Record; + limit: number; + }) => { + const convertedFields: Record = {}; + + for (const [key, field] of Object.entries(fields)) { + convertedFields[field.label] = { + selector: field.selectorObj.selector, + attribute: field.selectorObj.attribute + }; + } + + const queryElement = (rootElement: Element | Document, selector: string): Element | null => { + if (!selector.includes('>>') && !selector.includes(':>>')) { + return rootElement.querySelector(selector); + } + + const parts = selector.split(/(?:>>|:>>)/).map(part => part.trim()); + let currentElement: Element | Document | null = rootElement; + + for (let i = 0; i < parts.length; i++) { + if (!currentElement) return null; + + if ((currentElement as Element).tagName === 'IFRAME' || (currentElement as Element).tagName === 'FRAME') { + try { + const frameElement = currentElement as HTMLIFrameElement | HTMLFrameElement; + const frameDoc = frameElement.contentDocument || frameElement.contentWindow?.document; + if (!frameDoc) return null; + currentElement = frameDoc.querySelector(parts[i]); + continue; + } catch (e) { + console.warn(`Cannot access ${(currentElement as Element).tagName.toLowerCase()} content:`, e); + return null; + } + } + + let nextElement: Element | null = null; + + if ('querySelector' in currentElement) { + nextElement = currentElement.querySelector(parts[i]); + } + + if (!nextElement && 'shadowRoot' in currentElement && (currentElement as Element).shadowRoot) { + nextElement = (currentElement as Element).shadowRoot!.querySelector(parts[i]); + } + + if (!nextElement && 'children' in currentElement) { + const children: any = Array.from((currentElement as Element).children || []); + for (const child of children) { + if (child.shadowRoot) { + nextElement = child.shadowRoot.querySelector(parts[i]); + if (nextElement) break; + } + } + } + + currentElement = nextElement; + } + + return currentElement as Element | null; + }; + + const queryElementAll = (rootElement: Element | Document, selector: string): Element[] => { + if (!selector.includes('>>') && !selector.includes(':>>')) { + return Array.from(rootElement.querySelectorAll(selector)); + } + + const parts = selector.split(/(?:>>|:>>)/).map(part => part.trim()); + let currentElements: (Element | Document)[] = [rootElement]; + + for (const part of parts) { + const nextElements: Element[] = []; + + for (const element of currentElements) { + if ((element as Element).tagName === 'IFRAME' || (element as Element).tagName === 'FRAME') { + try { + const frameElement = element as HTMLIFrameElement | HTMLFrameElement; + const frameDoc = frameElement.contentDocument || frameElement.contentWindow?.document; + if (frameDoc) { + nextElements.push(...Array.from(frameDoc.querySelectorAll(part))); + } + } catch (e) { + console.warn(`Cannot access ${(element as Element).tagName.toLowerCase()} content:`, e); + continue; + } + } else { + if ('querySelectorAll' in element) { + nextElements.push(...Array.from(element.querySelectorAll(part))); + } + + if ('shadowRoot' in element && (element as Element).shadowRoot) { + nextElements.push(...Array.from((element as Element).shadowRoot!.querySelectorAll(part))); + } + + if ('children' in element) { + const children = Array.from((element as Element).children || []); + for (const child of children) { + if (child.shadowRoot) { + nextElements.push(...Array.from(child.shadowRoot.querySelectorAll(part))); + } + } + } + } + } + + currentElements = nextElements; + } + + return currentElements as Element[]; + }; + + function extractValue(element: Element, attribute: string): string | null { + if (!element) return null; + + const baseURL = element.ownerDocument?.location?.href || window.location.origin; + + if (element.shadowRoot) { + const shadowContent = element.shadowRoot.textContent; + if (shadowContent?.trim()) { + return shadowContent.trim(); + } + } + + if (attribute === 'innerText') { + return (element as HTMLElement).innerText.trim(); + } else if (attribute === 'innerHTML') { + return element.innerHTML.trim(); + } else if (attribute === 'src' || attribute === 'href') { + if (attribute === 'href' && element.tagName !== 'A') { + const parentElement = element.parentElement; + if (parentElement && parentElement.tagName === 'A') { + const parentHref = parentElement.getAttribute('href'); + if (parentHref) { + try { + return new URL(parentHref, baseURL).href; + } catch (e) { + return parentHref; + } + } + } + } + + const attrValue = element.getAttribute(attribute); + const dataAttr = attrValue || element.getAttribute('data-' + attribute); + + if (!dataAttr || dataAttr.trim() === '') { + if (attribute === 'src') { + const style = window.getComputedStyle(element); + const bgImage = style.backgroundImage; + if (bgImage && bgImage !== 'none') { + const matches = bgImage.match(/url\(['"]?([^'")]+)['"]?\)/); + return matches ? new URL(matches[1], baseURL).href : null; + } + } + return null; + } + + try { + return new URL(dataAttr, baseURL).href; + } catch (e) { + console.warn('Error creating URL from', dataAttr, e); + return dataAttr; // Return the original value if URL construction fails + } + } + return element.getAttribute(attribute); + } + + function findTableAncestor(element: Element): { type: string; element: Element } | null { + let currentElement: Element | null = element; + const MAX_DEPTH = 5; + let depth = 0; + + while (currentElement && depth < MAX_DEPTH) { + if (currentElement.getRootNode() instanceof ShadowRoot) { + currentElement = (currentElement.getRootNode() as ShadowRoot).host; + continue; + } + + if (currentElement.tagName === 'TD') { + return { type: 'TD', element: currentElement }; + } else if (currentElement.tagName === 'TR') { + return { type: 'TR', element: currentElement }; + } + + if (currentElement.tagName === 'IFRAME' || currentElement.tagName === 'FRAME') { + try { + const frameElement = currentElement as HTMLIFrameElement | HTMLFrameElement; + currentElement = frameElement.contentDocument?.body || null; + } catch (e) { + return null; + } + } else { + currentElement = currentElement.parentElement; + } + depth++; + } + return null; + } + + function getCellIndex(td: Element): number { + if (td.getRootNode() instanceof ShadowRoot) { + const shadowRoot = td.getRootNode() as ShadowRoot; + const allCells = Array.from(shadowRoot.querySelectorAll('td')); + return allCells.indexOf(td as HTMLTableCellElement); + } + + let index = 0; + let sibling = td; + while (sibling = sibling.previousElementSibling as Element) { + index++; + } + return index; + } + + function hasThElement(row: Element, tableFields: Record): boolean { + for (const [_, { selector }] of Object.entries(tableFields)) { + const element = queryElement(row, selector); + if (element) { + let current: Element | ShadowRoot | Document | null = element; + while (current && current !== row) { + if (current.getRootNode() instanceof ShadowRoot) { + current = (current.getRootNode() as ShadowRoot).host; + continue; + } + + if ((current as Element).tagName === 'TH') return true; + + if ((current as Element).tagName === 'IFRAME' || (current as Element).tagName === 'FRAME') { + try { + const frameElement = current as HTMLIFrameElement | HTMLFrameElement; + current = frameElement.contentDocument?.body || null; + } catch (e) { + break; + } + } else { + current = (current as Element).parentElement; + } + } + } + } + return false; + } + + function filterRowsBasedOnTag(rows: Element[], tableFields: Record): Element[] { + for (const row of rows) { + if (hasThElement(row, tableFields)) { + return rows; + } + } + return rows.filter(row => { + const directTH = row.getElementsByTagName('TH').length === 0; + const shadowTH = row.shadowRoot ? + row.shadowRoot.querySelector('th') === null : true; + return directTH && shadowTH; + }); + } + + function calculateClassSimilarity(classList1: string[], classList2: string[]): number { + const set1 = new Set(classList1); + const set2 = new Set(classList2); + const intersection = new Set([...set1].filter(x => set2.has(x))); + const union = new Set([...set1, ...set2]); + return intersection.size / union.size; + } + + function findSimilarElements(baseElement: Element, similarityThreshold: number = 0.7): Element[] { + const baseClasses = Array.from(baseElement.classList); + if (baseClasses.length === 0) return []; + + const allElements: Element[] = []; + + allElements.push(...Array.from(document.getElementsByTagName(baseElement.tagName))); + + if (baseElement.getRootNode() instanceof ShadowRoot) { + const shadowHost = (baseElement.getRootNode() as ShadowRoot).host; + allElements.push(...Array.from(shadowHost.getElementsByTagName(baseElement.tagName))); + } + + const frames = [ + ...Array.from(document.getElementsByTagName('iframe')), + ...Array.from(document.getElementsByTagName('frame')) + ]; + + for (const frame of frames) { + try { + const frameElement = frame as HTMLIFrameElement | HTMLFrameElement; + const frameDoc = frameElement.contentDocument || frameElement.contentWindow?.document; + if (frameDoc) { + allElements.push(...Array.from(frameDoc.getElementsByTagName(baseElement.tagName))); + } + } catch (e) { + console.warn(`Cannot access ${frame.tagName.toLowerCase()} content:`, e); + } + } + + return allElements.filter(element => { + if (element === baseElement) return false; + const similarity = calculateClassSimilarity( + baseClasses, + Array.from(element.classList) + ); + return similarity >= similarityThreshold; + }); + } + + let containers = queryElementAll(document, listSelector); + + if (containers.length === 0) return []; + + if (limit > 1 && containers.length === 1) { + const baseContainer = containers[0]; + const similarContainers = findSimilarElements(baseContainer); + + if (similarContainers.length > 0) { + const newContainers = similarContainers.filter(container => + !container.matches(listSelector) + ); + containers = [...containers, ...newContainers]; + } + } + + const containerFields = containers.map(() => ({ + tableFields: {} as Record, + nonTableFields: {} as Record + })); + + containers.forEach((container, containerIndex) => { + for (const [label, field] of Object.entries(convertedFields)) { + const sampleElement = queryElement(container, field.selector); + + if (sampleElement) { + const ancestor = findTableAncestor(sampleElement); + if (ancestor) { + containerFields[containerIndex].tableFields[label] = { + ...field, + tableContext: ancestor.type, + cellIndex: ancestor.type === 'TD' ? getCellIndex(ancestor.element) : -1 + }; + } else { + containerFields[containerIndex].nonTableFields[label] = field; + } + } else { + containerFields[containerIndex].nonTableFields[label] = field; + } + } + }); + + const tableData: Array> = []; + const nonTableData: Array> = []; + + for (let containerIndex = 0; containerIndex < containers.length; containerIndex++) { + const container = containers[containerIndex]; + const { tableFields } = containerFields[containerIndex]; + + if (Object.keys(tableFields).length > 0) { + const firstField = Object.values(tableFields)[0]; + const firstElement = queryElement(container, firstField.selector); + let tableContext: Element | null = firstElement; + + while (tableContext && tableContext.tagName !== 'TABLE' && tableContext !== container) { + if (tableContext.getRootNode() instanceof ShadowRoot) { + tableContext = (tableContext.getRootNode() as ShadowRoot).host; + continue; + } + + if (tableContext.tagName === 'IFRAME' || tableContext.tagName === 'FRAME') { + try { + const frameElement = tableContext as HTMLIFrameElement | HTMLFrameElement; + tableContext = frameElement.contentDocument?.body || null; + } catch (e) { + break; + } + } else { + tableContext = tableContext.parentElement; + } + } + + if (tableContext) { + const rows: Element[] = []; + + rows.push(...Array.from(tableContext.getElementsByTagName('TR'))); + + if (tableContext.tagName === 'IFRAME' || tableContext.tagName === 'FRAME') { + try { + const frameElement = tableContext as HTMLIFrameElement | HTMLFrameElement; + const frameDoc = frameElement.contentDocument || frameElement.contentWindow?.document; + if (frameDoc) { + rows.push(...Array.from(frameDoc.getElementsByTagName('TR'))); + } + } catch (e) { + console.warn(`Cannot access ${tableContext.tagName.toLowerCase()} rows:`, e); + } + } + + const processedRows = filterRowsBasedOnTag(rows, tableFields); + + for (let rowIndex = 0; rowIndex < Math.min(processedRows.length, limit); rowIndex++) { + const record: Record = {}; + const currentRow = processedRows[rowIndex]; + + for (const [label, { selector, attribute, cellIndex }] of Object.entries(tableFields)) { + let element: Element | null = null; + + if (cellIndex !== undefined && cellIndex >= 0) { + let td: Element | null = currentRow.children[cellIndex] || null; + + if (!td && currentRow.shadowRoot) { + const shadowCells = currentRow.shadowRoot.children; + if (shadowCells && shadowCells.length > cellIndex) { + td = shadowCells[cellIndex]; + } + } + + if (td) { + element = queryElement(td, selector); + + if (!element && selector.split(/(?:>>|:>>)/).pop()?.includes('td:nth-child')) { + element = td; + } + + if (!element) { + const tagOnlySelector = selector.split('.')[0]; + element = queryElement(td, tagOnlySelector); + } + + if (!element) { + let currentElement: Element | null = td; + while (currentElement && currentElement.children.length > 0) { + let foundContentChild = false; + for (const child of Array.from(currentElement.children)) { + if (extractValue(child, attribute)) { + currentElement = child; + foundContentChild = true; + break; + } + } + if (!foundContentChild) break; + } + element = currentElement; + } + } + } else { + element = queryElement(currentRow, selector); + } + + if (element) { + const value = extractValue(element, attribute); + if (value !== null) { + record[label] = value; + } + } + } + + if (Object.keys(record).length > 0) { + tableData.push(record); + } + } + } + } + } + + for (let containerIndex = 0; containerIndex < containers.length; containerIndex++) { + if (nonTableData.length >= limit) break; + + const container = containers[containerIndex]; + const { nonTableFields } = containerFields[containerIndex]; + + if (Object.keys(nonTableFields).length > 0) { + const record: Record = {}; + + for (const [label, { selector, attribute }] of Object.entries(nonTableFields)) { + const relativeSelector = selector.split(/(?:>>|:>>)/).slice(-1)[0]; + const element = queryElement(container, relativeSelector); + + if (element) { + const value = extractValue(element, attribute); + if (value !== null) { + record[label] = value; + } + } + } + + if (Object.keys(record).length > 0) { + nonTableData.push(record); + } + } + } + + const scrapedData = [...tableData, ...nonTableData].slice(0, limit); + return scrapedData; + }, + { listSelector, fields, limit } + ) as Array>; + } + /** * Registers all event listeners needed for the recording editor session. * Should be called only once after the full initialization of the remote browser. @@ -526,6 +1061,26 @@ export class RemoteBrowser { this.context = await this.browser.newContext({ viewport: { width, height } }); } }); + + this.socket.on('extractListData', async (data: { + listSelector: string, + fields: Record, + currentListId: number, + pagination: any + }) => { + if (this.currentPage) { + const extractedData = await this.extractListData( + this.currentPage, + data.listSelector, + data.fields + ); + + this.socket.emit('listDataExtracted', { + currentListId: data.currentListId, + data: extractedData + }); + } + }); }; /** * Subscribes the remote browser for a screencast session From f43b8600d733223addf5ed0c528adf89059457c5 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Wed, 7 May 2025 09:18:08 +0530 Subject: [PATCH 4/8] feat: get list extracted data --- src/components/browser/BrowserWindow.tsx | 69 ++++++++++++++++++------ 1 file changed, 52 insertions(+), 17 deletions(-) diff --git a/src/components/browser/BrowserWindow.tsx b/src/components/browser/BrowserWindow.tsx index c23974a5..83750f38 100644 --- a/src/components/browser/BrowserWindow.tsx +++ b/src/components/browser/BrowserWindow.tsx @@ -87,7 +87,7 @@ export const BrowserWindow = () => { const { socket } = useSocketStore(); const { notify } = useGlobalInfoStore(); const { getText, getList, paginationMode, paginationType, limitMode, captureStage } = useActionContext(); - const { addTextStep, addListStep } = useBrowserSteps(); + const { addTextStep, addListStep, updateListStepData } = useBrowserSteps(); const { state } = useContext(AuthContext); const { user } = state; @@ -252,6 +252,19 @@ export const BrowserWindow = () => { } }, [getList, socket, listSelector, paginationMode, paginationType, limitMode]); + useEffect(() => { + if (socket) { + socket.on('listDataExtracted', (response) => { + const { currentListId, data } = response; + + updateListStepData(currentListId, data); + }); + } + + return () => { + socket?.off('listDataExtracted'); + }; + }, [socket]); useEffect(() => { document.addEventListener('mousemove', onMouseMove, false); @@ -380,16 +393,27 @@ export const BrowserWindow = () => { } }; - setFields(prevFields => { - const updatedFields = { - ...prevFields, - [newField.id]: newField - }; - return updatedFields; - }); + const updatedFields = { + ...fields, + [newField.id]: newField + }; + + setFields(updatedFields); if (listSelector) { - addListStep(listSelector, { ...fields, [newField.id]: newField }, currentListId, { type: '', selector: paginationSelector }); + socket?.emit('extractListData', { + listSelector, + fields: updatedFields, + currentListId, + pagination: { type: '', selector: paginationSelector } + }); + + addListStep( + listSelector, + updatedFields, + currentListId, + { type: '', selector: paginationSelector } + ); } } else { @@ -441,16 +465,27 @@ export const BrowserWindow = () => { } }; - setFields(prevFields => { - const updatedFields = { - ...prevFields, - [newField.id]: newField - }; - return updatedFields; - }); + const updatedFields = { + ...fields, + [newField.id]: newField + }; + + setFields(updatedFields); if (listSelector) { - addListStep(listSelector, { ...fields, [newField.id]: newField }, currentListId, { type: '', selector: paginationSelector }); + socket?.emit('extractListData', { + listSelector, + fields: updatedFields, + currentListId, + pagination: { type: '', selector: paginationSelector } + }); + + addListStep( + listSelector, + updatedFields, + currentListId, + { type: '', selector: paginationSelector } + ); } } } From ec36df79fd9adede1a1a0eea1d50b5c4c768e5c1 Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Wed, 7 May 2025 09:19:04 +0530 Subject: [PATCH 5/8] feat: add update list data browser step --- src/context/browserSteps.tsx | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/src/context/browserSteps.tsx b/src/context/browserSteps.tsx index d588d60d..db0eb239 100644 --- a/src/context/browserSteps.tsx +++ b/src/context/browserSteps.tsx @@ -45,6 +45,7 @@ interface BrowserStepsContextType { updateBrowserTextStepLabel: (id: number, newLabel: string) => void; updateListTextFieldLabel: (listId: number, fieldKey: string, newLabel: string) => void; updateListStepLimit: (listId: number, limit: number) => void; + updateListStepData: (listId: number, extractedData: any[]) => void; removeListTextField: (listId: number, fieldKey: string) => void; } @@ -143,6 +144,20 @@ export const BrowserStepsProvider: React.FC<{ children: React.ReactNode }> = ({ ); }; + const updateListStepData = (listId: number, extractedData: any[]) => { + setBrowserSteps((prevSteps) => { + return prevSteps.map(step => { + if (step.type === 'list' && step.id === listId) { + return { + ...step, + data: extractedData // Add the extracted data to the step + }; + } + return step; + }); + }); + }; + const updateListStepLimit = (listId: number, limit: number) => { setBrowserSteps(prevSteps => prevSteps.map(step => { @@ -182,6 +197,7 @@ export const BrowserStepsProvider: React.FC<{ children: React.ReactNode }> = ({ updateBrowserTextStepLabel, updateListTextFieldLabel, updateListStepLimit, + updateListStepData, removeListTextField, }}> {children} From 0a1704e2a82d4eed64850a45786467e86bdee83a Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Wed, 7 May 2025 09:22:50 +0530 Subject: [PATCH 6/8] feat: display browser step data --- src/components/run/InterpretationLog.tsx | 148 +++++++++++++---------- 1 file changed, 84 insertions(+), 64 deletions(-) diff --git a/src/components/run/InterpretationLog.tsx b/src/components/run/InterpretationLog.tsx index be901250..e11e0151 100644 --- a/src/components/run/InterpretationLog.tsx +++ b/src/components/run/InterpretationLog.tsx @@ -1,7 +1,7 @@ import * as React from 'react'; import SwipeableDrawer from '@mui/material/SwipeableDrawer'; import Typography from '@mui/material/Typography'; -import { Button, Grid, Tabs, Tab, Box } from '@mui/material'; +import { Button, Grid, Box } from '@mui/material'; import { useCallback, useEffect, useRef, useState } from "react"; import { useSocketStore } from "../../context/socket"; import { Buffer } from 'buffer'; @@ -19,6 +19,7 @@ import { SidePanelHeader } from '../recorder/SidePanelHeader'; import { useGlobalInfoStore } from '../../context/globalInfo'; import { useThemeMode } from '../../context/theme-provider'; import { useTranslation } from 'react-i18next'; +import { useBrowserSteps } from '../../context/browserSteps'; interface InterpretationLogProps { isOpen: boolean; @@ -41,6 +42,10 @@ export const InterpretationLog: React.FC = ({ isOpen, se const logEndRef = useRef(null); + const { browserSteps } = useBrowserSteps(); + + const [activeActionId, setActiveActionId] = useState(null); + const { browserWidth, outputPreviewHeight, outputPreviewWidth } = useBrowserDimensionsStore(); const { socket } = useSocketStore(); const { currentWorkflowActionsState, shouldResetInterpretationLog, notify } = useGlobalInfoStore(); @@ -71,33 +76,39 @@ export const InterpretationLog: React.FC = ({ isOpen, se scrollLogToBottom(); }, []); - const handleSerializableCallback = useCallback(({ type, data }: { type: string, data: any }) => { - setLog((prevState) => - prevState + '\n' + t('interpretation_log.data_sections.serializable_received') + '\n' - + JSON.stringify(data, null, 2) + '\n' + t('interpretation_log.data_sections.separator')); - - if (type === 'captureList') { - setCaptureListData(prev => [...prev, data]); - if (captureListData.length === 0) { - const availableTabs = getAvailableTabs(); - const tabIndex = availableTabs.findIndex(tab => tab.id === 'captureList'); - if (tabIndex !== -1) setActiveTab(tabIndex); + useEffect(() => { + if (activeActionId !== null) { + const textSteps = browserSteps.filter(step => step.type === 'text'); + if (textSteps.length > 0) { + const textDataRow: Record = {}; + + textSteps.forEach(step => { + textDataRow[step.label] = step.data; + }); + + setCaptureTextData([textDataRow]); } - } else if (type === 'captureText') { - if (Array.isArray(data)) { - setCaptureTextData(data); - } else { - setCaptureTextData([data]); - } - if (captureTextData.length === 0) { - const availableTabs = getAvailableTabs(); - const tabIndex = availableTabs.findIndex(tab => tab.id === 'captureText'); - if (tabIndex !== -1) setActiveTab(tabIndex); + + const listSteps = browserSteps.filter(step => step.type === 'list'); + if (listSteps.length > 0) { + setCaptureListData(listSteps); } + + updateActiveTab(); } - - scrollLogToBottom(); - }, [captureListData.length, captureTextData.length, t]); + }, [activeActionId, browserSteps, t]); + + const updateActiveTab = useCallback(() => { + const availableTabs = getAvailableTabs(); + + if (captureListData.length > 0 && availableTabs.findIndex(tab => tab.id === 'captureList') !== -1) { + setActiveTab(availableTabs.findIndex(tab => tab.id === 'captureList')); + } else if (captureTextData.length > 0 && availableTabs.findIndex(tab => tab.id === 'captureText') !== -1) { + setActiveTab(availableTabs.findIndex(tab => tab.id === 'captureText')); + } else if (screenshotData.length > 0 && availableTabs.findIndex(tab => tab.id === 'captureScreenshot') !== -1) { + setActiveTab(availableTabs.findIndex(tab => tab.id === 'captureScreenshot')); + } + }, [captureListData.length, captureTextData.length, screenshotData.length]); const handleBinaryCallback = useCallback(({ data, mimetype, type }: { data: any, mimetype: string, type: string }) => { const base64String = Buffer.from(data).toString('base64'); @@ -121,6 +132,10 @@ export const InterpretationLog: React.FC = ({ isOpen, se scrollLogToBottom(); }, [screenshotData.length, t]); + const handleActivePairId = useCallback((id: number) => { + setActiveActionId(id); + }, []); + const handleCustomValueChange = (event: React.ChangeEvent) => { setCustomValue(event.target.value); }; @@ -134,19 +149,21 @@ export const InterpretationLog: React.FC = ({ isOpen, se setActiveTab(0); setCaptureListPage(0); setScreenshotPage(0); + setActiveActionId(null); } }, [shouldResetInterpretationLog]); useEffect(() => { socket?.on('log', handleLog); - socket?.on('serializableCallback', handleSerializableCallback); socket?.on('binaryCallback', handleBinaryCallback); + socket?.on('activePairId', handleActivePairId); + return () => { socket?.off('log', handleLog); - socket?.off('serializableCallback', handleSerializableCallback); socket?.off('binaryCallback', handleBinaryCallback); + socket?.off('activePairId', handleActivePairId); }; - }, [socket, handleLog, handleSerializableCallback, handleBinaryCallback]); + }, [socket, handleLog, handleBinaryCallback, handleActivePairId]); const getAvailableTabs = useCallback(() => { const tabs = []; @@ -321,45 +338,48 @@ export const InterpretationLog: React.FC = ({ isOpen, se - {captureListData[captureListPage] && captureListData[captureListPage].length > 0 && - Object.keys(captureListData[captureListPage][0]).map((column) => ( - - {column} - - )) - } + {Object.values(captureListData[captureListPage]?.fields || {}).map((field: any, index) => ( + + {field.label} + + ))} - {captureListData[captureListPage] && - captureListData[captureListPage].map((row: any, idx: any) => ( - - {Object.keys(row).map((column) => ( - - {row[column]} - - ))} - - ))} + {(captureListData[captureListPage]?.data || []) + .slice(0, Math.min(captureListData[captureListPage]?.limit || 10, 5)) + .map((row: any, rowIndex: any) => ( + + {Object.values(captureListData[captureListPage]?.fields || {}).map((field: any, colIndex) => ( + + {row[field.label]} + + ))} + + )) + }
From 817c7254d291239a8693fcdaf70b8385bf6d0dac Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Wed, 7 May 2025 09:29:03 +0530 Subject: [PATCH 7/8] feat: add mode editor --- server/src/workflow-management/classes/Interpreter.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/server/src/workflow-management/classes/Interpreter.ts b/server/src/workflow-management/classes/Interpreter.ts index 0481687d..14877600 100644 --- a/server/src/workflow-management/classes/Interpreter.ts +++ b/server/src/workflow-management/classes/Interpreter.ts @@ -183,6 +183,7 @@ export class WorkflowInterpreter { const options = { ...settings, + mode: 'editor', debugChannel: { activeId: (id: any) => { this.activeId = id; From 38430ec4c1a9820f4feaf18b97eaa7cb937e338d Mon Sep 17 00:00:00 2001 From: RohitR311 Date: Wed, 7 May 2025 09:30:25 +0530 Subject: [PATCH 8/8] feat: add mode param options --- maxun-core/src/interpret.ts | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 91ade8b2..69dd3127 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -37,6 +37,7 @@ declare global { * Defines optional intepreter options (passed in constructor) */ interface InterpreterOptions { + mode?: string; maxRepeats: number; maxConcurrency: number; serializableCallback: (output: any) => (void | Promise); @@ -432,6 +433,11 @@ export default class Interpreter extends EventEmitter { if (this.options.debugChannel?.setActionType) { this.options.debugChannel.setActionType('scrapeSchema'); } + + if (this.options.mode && this.options.mode === 'editor') { + await this.options.serializableCallback({}); + return; + } await this.ensureScriptsLoaded(page); @@ -463,6 +469,11 @@ export default class Interpreter extends EventEmitter { this.options.debugChannel.setActionType('scrapeList'); } + if (this.options.mode && this.options.mode === 'editor') { + await this.options.serializableCallback({}); + return; + } + await this.ensureScriptsLoaded(page); if (!config.pagination) { const scrapeResults: Record[] = await page.evaluate((cfg) => window.scrapeList(cfg), config);