Merge pull request #654 from getmaxun/record-revamp

feat: recorder revamp
This commit is contained in:
Karishma Shukla
2025-06-24 16:22:58 +05:30
committed by GitHub
15 changed files with 7802 additions and 133 deletions

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,10 @@
const esbuild = require('esbuild');
esbuild.build({
entryPoints: ['rrweb-entry.js'],
bundle: true,
minify: true,
outfile: 'rrweb-bundle.js',
format: 'iife', // so that rrwebSnapshot is available on window
globalName: 'rrwebSnapshotBundle'
}).catch(() => process.exit(1));

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,2 @@
import { snapshot } from 'rrweb-snapshot';
window.rrwebSnapshot = { snapshot };

View File

@@ -20,7 +20,7 @@ import logger from "../logger";
* @returns string
* @category BrowserManagement-Controller
*/
export const initializeRemoteBrowserForRecording = (userId: string): string => {
export const initializeRemoteBrowserForRecording = (userId: string, mode: string = "dom"): string => {
const id = getActiveBrowserIdByState(userId, "recording") || uuid();
createSocketConnection(
io.of(id),
@@ -37,7 +37,15 @@ export const initializeRemoteBrowserForRecording = (userId: string): string => {
browserSession.interpreter.subscribeToPausing();
await browserSession.initialize(userId);
await browserSession.registerEditorEvents();
await browserSession.subscribeToScreencast();
if (mode === "dom") {
await browserSession.subscribeToDOM();
logger.info('DOM streaming started for scraping browser in recording mode');
} else {
await browserSession.subscribeToScreencast();
logger.info('Screenshot streaming started for local browser in recording mode');
}
browserPool.addRemoteBrowser(id, browserSession, userId, false, "recording");
}
socket.emit('loaded');

View File

@@ -11,6 +11,8 @@ import { WorkflowGenerator } from "../workflow-management/classes/Generator";
import { Page } from "playwright";
import { throttle } from "../../../src/helpers/inputHelpers";
import { CustomActions } from "../../../src/shared/types";
import { WhereWhatPair } from "maxun-core";
import { RemoteBrowser } from './classes/RemoteBrowser';
/**
* A wrapper function for handling user input.
@@ -27,7 +29,7 @@ import { CustomActions } from "../../../src/shared/types";
*/
const handleWrapper = async (
handleCallback: (
generator: WorkflowGenerator,
activeBrowser: RemoteBrowser,
page: Page,
args?: any
) => Promise<void>,
@@ -44,9 +46,9 @@ const handleWrapper = async (
const currentPage = activeBrowser?.getCurrentPage();
if (currentPage && activeBrowser) {
if (args) {
await handleCallback(activeBrowser.generator, currentPage, args);
await handleCallback(activeBrowser, currentPage, args);
} else {
await handleCallback(activeBrowser.generator, currentPage);
await handleCallback(activeBrowser, currentPage);
}
} else {
logger.log('warn', `No active page for browser ${id}`);
@@ -85,8 +87,19 @@ const onGenerateAction = async (customActionEventData: CustomActionEventData, us
* @category BrowserManagement
*/
const handleGenerateAction =
async (generator: WorkflowGenerator, page: Page, { action, settings }: CustomActionEventData) => {
await generator.customAction(action, settings, page);
async (activeBrowser: RemoteBrowser, page: Page, { action, settings }: CustomActionEventData) => {
try {
if (page.isClosed()) {
logger.log("debug", `Ignoring generate action event: page is closed`);
return;
}
const generator = activeBrowser.generator;
await generator.customAction(action, settings, page);
} catch (e) {
const { message } = e as Error;
logger.log("warn", `Error handling generate action event: ${message}`);
}
}
/**
@@ -104,40 +117,51 @@ const onMousedown = async (coordinates: Coordinates, userId: string) => {
* A mousedown event handler.
* Reproduces the click on the remote browser instance
* and generates pair data for the recorded workflow.
* @param generator - the workflow generator {@link Generator}
* @param activeBrowser - the active remote browser {@link RemoteBrowser}
* @param page - the active page of the remote browser
* @param x - the x coordinate of the mousedown event
* @param y - the y coordinate of the mousedown event
* @category BrowserManagement
*/
const handleMousedown = async (generator: WorkflowGenerator, page: Page, { x, y }: Coordinates) => {
const handleMousedown = async (activeBrowser: RemoteBrowser, page: Page, { x, y }: Coordinates) => {
try {
if (page.isClosed()) {
logger.log("debug", `Ignoring mousedown event: page is closed`);
return;
}
const generator = activeBrowser.generator;
await generator.onClick({ x, y }, page);
const previousUrl = page.url();
const tabsBeforeClick = page.context().pages().length;
await page.mouse.click(x, y);
// try if the click caused a navigation to a new url
try {
await page.waitForNavigation({ timeout: 2000 });
const currentUrl = page.url();
if (currentUrl !== previousUrl) {
generator.notifyUrlChange(currentUrl);
}
await page.waitForNavigation({ timeout: 2000 });
const currentUrl = page.url();
if (currentUrl !== previousUrl) {
generator.notifyUrlChange(currentUrl);
}
} catch (e) {
const { message } = e as Error;
const { message } = e as Error;
} //ignore possible timeouts
// check if any new page was opened by the click
const tabsAfterClick = page.context().pages().length;
const numOfNewPages = tabsAfterClick - tabsBeforeClick;
if (numOfNewPages > 0) {
for (let i = 1; i <= numOfNewPages; i++) {
const newPage = page.context().pages()[tabsAfterClick - i];
if (newPage) {
generator.notifyOnNewTab(newPage, tabsAfterClick - i);
}
for (let i = 1; i <= numOfNewPages; i++) {
const newPage = page.context().pages()[tabsAfterClick - i];
if (newPage) {
generator.notifyOnNewTab(newPage, tabsAfterClick - i);
}
}
}
logger.log('debug', `Clicked on position x:${x}, y:${y}`);
logger.log("debug", `Clicked on position x:${x}, y:${y}`);
} catch (e) {
const { message } = e as Error;
logger.log("warn", `Error handling mousedown event: ${message}`);
}
};
/**
@@ -156,15 +180,16 @@ const onWheel = async (scrollDeltas: ScrollDeltas, userId: string) => {
* Reproduces the wheel event on the remote browser instance.
* Scroll is not generated for the workflow pair. This is because
* Playwright scrolls elements into focus on any action.
* @param generator - the workflow generator {@link Generator}
* @param activeBrowser - the active remote browser {@link RemoteBrowser}
* @param page - the active page of the remote browser
* @param deltaX - the delta x of the wheel event
* @param deltaY - the delta y of the wheel event
* @category BrowserManagement
*/
const handleWheel = async (generator: WorkflowGenerator, page: Page, { deltaX, deltaY }: ScrollDeltas) => {
const handleWheel = async (activeBrowser: RemoteBrowser, page: Page, { deltaX, deltaY }: ScrollDeltas) => {
try {
if (page.isClosed()) {
logger.log("debug", `Ignoring wheel event: page is closed`);
return;
}
@@ -194,28 +219,30 @@ const onMousemove = async (coordinates: Coordinates, userId: string) => {
* Reproduces the mousemove event on the remote browser instance
* and generates data for the client's highlighter.
* Mousemove is also not reflected in the workflow.
* @param generator - the workflow generator {@link Generator}
* @param activeBrowser - the active remote browser {@link RemoteBrowser}
* @param page - the active page of the remote browser
* @param x - the x coordinate of the mousemove event
* @param y - the y coordinate of the mousemove event
* @category BrowserManagement
*/
const handleMousemove = async (generator: WorkflowGenerator, page: Page, { x, y }: Coordinates) => {
const handleMousemove = async (activeBrowser: RemoteBrowser, page: Page, { x, y }: Coordinates) => {
try {
if (page.isClosed()) {
logger.log('debug', `Ignoring mousemove event: page is closed`);
logger.log("debug", `Ignoring mousemove event: page is closed`);
return;
}
const generator = activeBrowser.generator;
await page.mouse.move(x, y);
throttle(async () => {
if (!page.isClosed()) {
await generator.generateDataForHighlighter(page, { x, y });
}
}, 100)();
logger.log('debug', `Moved over position x:${x}, y:${y}`);
logger.log("debug", `Moved over position x:${x}, y:${y}`);
} catch (e) {
const { message } = e as Error;
logger.log('error', message);
logger.log("error", message);
}
}
@@ -234,28 +261,50 @@ const onKeydown = async (keyboardInput: KeyboardInput, userId: string) => {
* A keydown event handler.
* Reproduces the keydown event on the remote browser instance
* and generates the workflow pair data.
* @param generator - the workflow generator {@link Generator}
* @param activeBrowser - the active remote browser {@link RemoteBrowser}
* @param page - the active page of the remote browser
* @param key - the pressed key
* @param coordinates - the coordinates, where the keydown event happened
* @category BrowserManagement
*/
const handleKeydown = async (generator: WorkflowGenerator, page: Page, { key, coordinates }: KeyboardInput) => {
await page.keyboard.down(key);
await generator.onKeyboardInput(key, coordinates, page);
logger.log('debug', `Key ${key} pressed`);
const handleKeydown = async (activeBrowser: RemoteBrowser, page: Page, { key, coordinates }: KeyboardInput) => {
try {
if (page.isClosed()) {
logger.log("debug", `Ignoring keydown event: page is closed`);
return;
}
const generator = activeBrowser.generator;
await page.keyboard.down(key);
await generator.onKeyboardInput(key, coordinates, page);
logger.log("debug", `Key ${key} pressed`);
} catch (e) {
const { message } = e as Error;
logger.log("warn", `Error handling keydown event: ${message}`);
}
};
/**
* Handles the date selection event.
* @param generator - the workflow generator {@link Generator}
* @param activeBrowser - the active remote browser {@link RemoteBrowser}
* @param page - the active page of the remote browser
* @param data - the data of the date selection event {@link DatePickerEventData}
* @category BrowserManagement
*/
const handleDateSelection = async (generator: WorkflowGenerator, page: Page, data: DatePickerEventData) => {
await generator.onDateSelection(page, data);
logger.log('debug', `Date ${data.value} selected`);
const handleDateSelection = async (activeBrowser: RemoteBrowser, page: Page, data: DatePickerEventData) => {
try {
if (page.isClosed()) {
logger.log("debug", `Ignoring date selection event: page is closed`);
return;
}
const generator = activeBrowser.generator;
await generator.onDateSelection(page, data);
logger.log("debug", `Date ${data.value} selected`);
} catch (e) {
const { message } = e as Error;
logger.log("warn", `Error handling date selection event: ${message}`);
}
}
/**
@@ -271,14 +320,25 @@ const onDateSelection = async (data: DatePickerEventData, userId: string) => {
/**
* Handles the dropdown selection event.
* @param generator - the workflow generator {@link Generator}
* @param activeBrowser - the active remote browser {@link RemoteBrowser}
* @param page - the active page of the remote browser
* @param data - the data of the dropdown selection event
* @category BrowserManagement
*/
const handleDropdownSelection = async (generator: WorkflowGenerator, page: Page, data: { selector: string, value: string }) => {
await generator.onDropdownSelection(page, data);
logger.log('debug', `Dropdown value ${data.value} selected`);
const handleDropdownSelection = async (activeBrowser: RemoteBrowser, page: Page, data: { selector: string, value: string }) => {
try {
if (page.isClosed()) {
logger.log("debug", `Ignoring dropdown selection event: page is closed`);
return;
}
const generator = activeBrowser.generator;
await generator.onDropdownSelection(page, data);
logger.log("debug", `Dropdown value ${data.value} selected`);
} catch (e) {
const { message } = e as Error;
logger.log("warn", `Error handling dropdown selection event: ${message}`);
}
}
/**
@@ -294,14 +354,25 @@ const onDropdownSelection = async (data: { selector: string, value: string }, us
/**
* Handles the time selection event.
* @param generator - the workflow generator {@link Generator}
* @param activeBrowser - the active remote browser {@link RemoteBrowser}
* @param page - the active page of the remote browser
* @param data - the data of the time selection event
* @category BrowserManagement
*/
const handleTimeSelection = async (generator: WorkflowGenerator, page: Page, data: { selector: string, value: string }) => {
await generator.onTimeSelection(page, data);
logger.log('debug', `Time value ${data.value} selected`);
const handleTimeSelection = async (activeBrowser: RemoteBrowser, page: Page, data: { selector: string, value: string }) => {
try {
if (page.isClosed()) {
logger.log("debug", `Ignoring time selection event: page is closed`);
return;
}
const generator = activeBrowser.generator;
await generator.onTimeSelection(page, data);
logger.log("debug", `Time value ${data.value} selected`);
} catch (e) {
const { message } = e as Error;
logger.log("warn", `Error handling time selection event: ${message}`);
}
}
/**
@@ -317,14 +388,31 @@ const onTimeSelection = async (data: { selector: string, value: string }, userId
/**
* Handles the datetime-local selection event.
* @param generator - the workflow generator {@link Generator}
* @param activeBrowser - the active remote browser {@link RemoteBrowser}
* @param page - the active page of the remote browser
* @param data - the data of the datetime-local selection event
* @category BrowserManagement
*/
const handleDateTimeLocalSelection = async (generator: WorkflowGenerator, page: Page, data: { selector: string, value: string }) => {
await generator.onDateTimeLocalSelection(page, data);
logger.log('debug', `DateTime Local value ${data.value} selected`);
const handleDateTimeLocalSelection = async (activeBrowser: RemoteBrowser, page: Page, data: { selector: string, value: string }) => {
try {
if (page.isClosed()) {
logger.log(
"debug",
`Ignoring datetime-local selection event: page is closed`
);
return;
}
const generator = activeBrowser.generator;
await generator.onDateTimeLocalSelection(page, data);
logger.log("debug", `DateTime Local value ${data.value} selected`);
} catch (e) {
const { message } = e as Error;
logger.log(
"warn",
`Error handling datetime-local selection event: ${message}`
);
}
}
/**
@@ -353,14 +441,24 @@ const onKeyup = async (keyboardInput: KeyboardInput, userId: string) => {
* A keyup event handler.
* Reproduces the keyup event on the remote browser instance.
* Does not generate any data - keyup is not reflected in the workflow.
* @param generator - the workflow generator {@link Generator}
* @param activeBrowser - the active remote browser {@link RemoteBrowser}
* @param page - the active page of the remote browser
* @param key - the released key
* @category BrowserManagement
*/
const handleKeyup = async (generator: WorkflowGenerator, page: Page, key: string) => {
await page.keyboard.up(key);
logger.log('debug', `Key ${key} unpressed`);
const handleKeyup = async (activeBrowser: RemoteBrowser, page: Page, key: string) => {
try {
if (page.isClosed()) {
logger.log("debug", `Ignoring keyup event: page is closed`);
return;
}
await page.keyboard.up(key);
logger.log("debug", `Key ${key} unpressed`);
} catch (e) {
const { message } = e as Error;
logger.log("warn", `Error handling keyup event: ${message}`);
}
};
/**
@@ -377,23 +475,35 @@ const onChangeUrl = async (url: string, userId: string) => {
/**
* An url change event handler.
* Navigates the page to the given url and generates data for the workflow.
* @param generator - the workflow generator {@link Generator}
* @param activeBrowser - the active remote browser {@link RemoteBrowser}
* @param page - the active page of the remote browser
* @param url - the new url of the page
* @category BrowserManagement
*/
const handleChangeUrl = async (generator: WorkflowGenerator, page: Page, url: string) => {
if (url) {
await generator.onChangeUrl(url, page);
try {
await page.goto(url, { waitUntil: 'networkidle', timeout: 100000 });
logger.log('debug', `Went to ${url}`);
} catch (e) {
const { message } = e as Error;
logger.log('error', message);
const handleChangeUrl = async (activeBrowser: RemoteBrowser, page: Page, url: string) => {
try {
if (page.isClosed()) {
logger.log("debug", `Ignoring change url event: page is closed`);
return;
}
} else {
logger.log('warn', `No url provided`);
if (url) {
const generator = activeBrowser.generator;
await generator.onChangeUrl(url, page);
try {
await page.goto(url, { waitUntil: "networkidle", timeout: 100000 });
logger.log("debug", `Went to ${url}`);
} catch (e) {
const { message } = e as Error;
logger.log("error", message);
}
} else {
logger.log("warn", `No url provided`);
}
} catch (e) {
const { message } = e as Error;
logger.log("warn", `Error handling change url event: ${message}`);
}
};
@@ -410,13 +520,23 @@ const onRefresh = async (userId: string) => {
/**
* A refresh event handler.
* Refreshes the page. This is not reflected in the workflow.
* @param generator - the workflow generator {@link Generator}
* @param activeBrowser - the active remote browser {@link RemoteBrowser}
* @param page - the active page of the remote browser
* @category BrowserManagement
*/
const handleRefresh = async (generator: WorkflowGenerator, page: Page) => {
await page.reload();
logger.log('debug', `Page refreshed.`);
const handleRefresh = async (activeBrowser: RemoteBrowser, page: Page) => {
try {
if (page.isClosed()) {
logger.log("debug", `Ignoring refresh event: page is closed`);
return;
}
await page.reload();
logger.log("debug", `Page refreshed.`);
} catch (e) {
const { message } = e as Error;
logger.log("warn", `Error handling refresh event: ${message}`);
}
};
/**
@@ -432,14 +552,25 @@ const onGoBack = async (userId: string) => {
/**
* A go back event handler.
* Navigates the page back and generates data for the workflow.
* @param generator - the workflow generator {@link Generator}
* @param activeBrowser - the active remote browser {@link RemoteBrowser}
* @param page - the active page of the remote browser
* @category BrowserManagement
*/
const handleGoBack = async (generator: WorkflowGenerator, page: Page) => {
await page.goBack({ waitUntil: 'commit' });
generator.onGoBack(page.url());
logger.log('debug', 'Page went back')
const handleGoBack = async (activeBrowser: RemoteBrowser, page: Page) => {
try {
if (page.isClosed()) {
logger.log("debug", `Ignoring go back event: page is closed`);
return;
}
const generator = activeBrowser.generator;
await page.goBack({ waitUntil: "commit" });
generator.onGoBack(page.url());
logger.log("debug", "Page went back");
} catch (e) {
const { message } = e as Error;
logger.log("warn", `Error handling go back event: ${message}`);
}
};
/**
@@ -455,14 +586,207 @@ const onGoForward = async (userId: string) => {
/**
* A go forward event handler.
* Navigates the page forward and generates data for the workflow.
* @param generator - the workflow generator {@link Generator}
* @param activeBrowser - the active remote browser {@link RemoteBrowser}
* @param page - the active page of the remote browser
* @category BrowserManagement
*/
const handleGoForward = async (generator: WorkflowGenerator, page: Page) => {
await page.goForward({ waitUntil: 'commit' });
generator.onGoForward(page.url());
logger.log('debug', 'Page went forward');
const handleGoForward = async (activeBrowser: RemoteBrowser, page: Page) => {
try {
if (page.isClosed()) {
logger.log("debug", `Ignoring go forward event: page is closed`);
return;
}
const generator = activeBrowser.generator;
await page.goForward({ waitUntil: "commit" });
generator.onGoForward(page.url());
logger.log("debug", "Page went forward");
} catch (e) {
const { message } = e as Error;
logger.log("warn", `Error handling go forward event: ${message}`);
}
};
/**
* Handles the click action event.
* @param activeBrowser - the active remote browser {@link RemoteBrowser}
* @param page - the active page of the remote browser
* @param data - the data of the click action event
* @category BrowserManagement
*/
const handleClickAction = async (
activeBrowser: RemoteBrowser,
page: Page,
data: {
selector: string;
url: string;
userId: string;
elementInfo?: any;
coordinates?: { x: number; y: number };
isSPA?: boolean;
}
) => {
try {
if (page.isClosed()) {
logger.log("debug", `Ignoring click action event: page is closed`);
return;
}
const { selector, url, elementInfo, coordinates, isSPA = false } = data;
const currentUrl = page.url();
await page.click(selector);
const generator = activeBrowser.generator;
await generator.onDOMClickAction(page, data);
logger.log("debug", `Click action processed: ${selector}`);
if (isSPA) {
logger.log("debug", `SPA interaction detected for selector: ${selector}`);
await new Promise((resolve) => setTimeout(resolve, 1500));
} else {
const newUrl = page.url();
const hasNavigated = newUrl !== currentUrl && !newUrl.endsWith("/#");
if (hasNavigated) {
logger.log("debug", `Navigation detected: ${currentUrl} -> ${newUrl}`);
await generator.onDOMNavigation(page, {
url: newUrl,
currentUrl: currentUrl,
userId: data.userId,
});
}
}
await new Promise((resolve) => setTimeout(resolve, 2000));
await activeBrowser.makeAndEmitDOMSnapshot();
} catch (e) {
const { message } = e as Error;
logger.log(
"warn",
`Error handling enhanced click action event: ${message}`
);
}
};
/**
* A wrapper function for handling the click action event.
* @param socket The socket connection
* @param data - the data of the click action event
* @category HelperFunctions
*/
const onDOMClickAction = async (
data: {
selector: string;
url: string;
userId: string;
elementInfo?: any;
coordinates?: { x: number; y: number };
},
userId: string
) => {
logger.log("debug", "Handling click action event emitted from client");
await handleWrapper(handleClickAction, userId, data);
};
/**
* Handles the keyboard action event.
* @param activeBrowser - the active remote browser {@link RemoteBrowser}
* @param page - the active page of the remote browser
* @param data - the data of the keyboard action event
* @category BrowserManagement
*/
const handleKeyboardAction = async (
activeBrowser: RemoteBrowser,
page: Page,
data: {
selector: string;
key: string;
url: string;
userId: string;
inputType?: string;
}
) => {
try {
if (page.isClosed()) {
logger.log("debug", `Ignoring keyboard action event: page is closed`);
return;
}
const generator = activeBrowser.generator;
await generator.onDOMKeyboardAction(page, data);
logger.log(
"debug",
`Keyboard action processed: ${data.key} on ${data.selector}`
);
} catch (e) {
const { message } = e as Error;
logger.log("warn", `Error handling keyboard action event: ${message}`);
}
};
/**
* A wrapper function for handling the keyboard action event.
* @param socket The socket connection
* @param data - the data of the keyboard action event
* @category HelperFunctions
*/
const onDOMKeyboardAction = async (
data: {
selector: string;
key: string;
url: string;
userId: string;
inputType?: string;
},
userId: string
) => {
logger.log("debug", "Handling keyboard action event emitted from client");
await handleWrapper(handleKeyboardAction, userId, data);
};
/**
* Handles the workflow pair event.
* @param activeBrowser - the active remote browser {@link RemoteBrowser}
* @param page - the active page of the remote browser
* @param data - the data of the workflow pair event
* @category BrowserManagement
*/
const handleWorkflowPair = async (
activeBrowser: RemoteBrowser,
page: Page,
data: { pair: WhereWhatPair; userId: string }
) => {
try {
if (page.isClosed()) {
logger.log("debug", `Ignoring workflow pair event: page is closed`);
return;
}
const generator = activeBrowser.generator;
await generator.onDOMWorkflowPair(page, data);
logger.log("debug", `Workflow pair processed from frontend`);
} catch (e) {
const { message } = e as Error;
logger.log("warn", `Error handling workflow pair event: ${message}`);
}
};
/**
* A wrapper function for handling the workflow pair event.
* @param socket The socket connection
* @param data - the data of the workflow pair event
* @category HelperFunctions
*/
const onDOMWorkflowPair = async (
data: { pair: WhereWhatPair; userId: string },
userId: string
) => {
logger.log("debug", "Handling workflow pair event emitted from client");
await handleWrapper(handleWorkflowPair, userId, data);
};
/**
@@ -493,6 +817,10 @@ const registerInputHandlers = (socket: Socket, userId: string) => {
socket.on("input:time", (data) => onTimeSelection(data, userId));
socket.on("input:datetime-local", (data) => onDateTimeLocalSelection(data, userId));
socket.on("action", (data) => onGenerateAction(data, userId));
socket.on("dom:click", (data) => onDOMClickAction(data, userId));
socket.on("dom:keypress", (data) => onDOMKeyboardAction(data, userId));
socket.on("dom:addpair", (data) => onDOMWorkflowPair(data, userId));
};
export default registerInputHandlers;

View File

@@ -82,6 +82,7 @@ export class WorkflowGenerator {
this.poolId = poolId;
this.registerEventHandlers(socket);
this.initializeSocketListeners();
this.initializeDOMListeners();
}
/**
@@ -92,6 +93,8 @@ export class WorkflowGenerator {
workflow: [],
};
private isDOMMode: boolean = false;
/**
* Metadata of the currently recorded workflow.
* @private
@@ -134,6 +137,18 @@ export class WorkflowGenerator {
})
}
private initializeDOMListeners() {
this.socket.on('dom-mode-enabled', () => {
this.isDOMMode = true;
logger.log('debug', 'Generator: DOM mode enabled');
});
this.socket.on('screenshot-mode-enabled', () => {
this.isDOMMode = false;
logger.log('debug', 'Generator: Screenshot mode enabled');
});
}
/**
* Registers the event handlers for all generator-related events on the socket.
* @param socket The socket used to communicate with the client.
@@ -348,6 +363,96 @@ export class WorkflowGenerator {
await this.addPairToWorkflowAndNotifyClient(pair, page);
};
// Handles click events on the DOM, generating a pair for the click action
public onDOMClickAction = async (page: Page, data: {
selector: string,
url: string,
userId: string,
elementInfo?: any,
coordinates?: { x: number, y: number }
}) => {
const { selector, url, elementInfo, coordinates } = data;
const pair: WhereWhatPair = {
where: {
url: this.getBestUrl(url),
selectors: [selector]
},
what: [{
action: 'click',
args: [selector],
}],
};
// Handle special input elements with cursor positioning
if (elementInfo && coordinates &&
(elementInfo.tagName === 'INPUT' || elementInfo.tagName === 'TEXTAREA')) {
pair.what[0] = {
action: 'click',
args: [selector, { position: coordinates }, { cursorIndex: 0 }],
};
}
this.generatedData.lastUsedSelector = selector;
this.generatedData.lastAction = 'click';
await this.addPairToWorkflowAndNotifyClient(pair, page);
};
// Handles keyboard actions on the DOM, generating a pair for the key press action
public onDOMKeyboardAction = async (page: Page, data: {
selector: string,
key: string,
url: string,
userId: string,
inputType?: string
}) => {
const { selector, key, url, inputType } = data;
const pair: WhereWhatPair = {
where: {
url: this.getBestUrl(url),
selectors: [selector]
},
what: [{
action: 'press',
args: [selector, encrypt(key), inputType || 'text'],
}],
};
this.generatedData.lastUsedSelector = selector;
this.generatedData.lastAction = 'press';
await this.addPairToWorkflowAndNotifyClient(pair, page);
};
// Handles navigation events on the DOM, generating a pair for the navigation action
public onDOMNavigation = async (page: Page, data: {
url: string,
currentUrl: string,
userId: string
}) => {
const { url, currentUrl } = data;
const pair: WhereWhatPair = {
where: { url: this.getBestUrl(currentUrl) },
what: [{
action: 'goto',
args: [url],
}],
};
this.generatedData.lastUsedSelector = '';
await this.addPairToWorkflowAndNotifyClient(pair, page);
};
// Handles workflow pair events on the DOM
public onDOMWorkflowPair = async (page: Page, data: { pair: WhereWhatPair, userId: string }) => {
const { pair } = data;
await this.addPairToWorkflowAndNotifyClient(pair, page);
};
/**
* Generates a pair for the click event.
* @param coordinates The coordinates of the click event.
@@ -357,6 +462,7 @@ export class WorkflowGenerator {
public onClick = async (coordinates: Coordinates, page: Page) => {
let where: WhereWhatPair["where"] = { url: this.getBestUrl(page.url()) };
const selector = await this.generateSelector(page, coordinates, ActionType.Click);
console.log("COOORDINATES: ", coordinates);
logger.log('debug', `Element's selector: ${selector}`);
const elementInfo = await getElementInformation(page, coordinates, '', false);
@@ -708,6 +814,7 @@ export class WorkflowGenerator {
this.socket = socket;
this.registerEventHandlers(socket);
this.initializeSocketListeners();
this.initializeDOMListeners();
};
/**

View File

@@ -11,6 +11,12 @@ import { useTranslation } from 'react-i18next';
import { AuthContext } from '../../context/auth';
import { coordinateMapper } from '../../helpers/coordinateMapper';
import { useBrowserDimensionsStore } from '../../context/browserDimensions';
import { clientSelectorGenerator } from "../../helpers/clientSelectorGenerator";
import DatePicker from "../pickers/DatePicker";
import Dropdown from "../pickers/Dropdown";
import TimePicker from "../pickers/TimePicker";
import DateTimeLocalPicker from "../pickers/DateTimeLocalPicker";
import { DOMBrowserRenderer } from '../recorder/DOMBrowserRenderer';
interface ElementInfo {
tagName: string;
@@ -23,6 +29,7 @@ interface ElementInfo {
attributes?: Record<string, string>;
innerHTML?: string;
outerHTML?: string;
isDOMMode?: boolean;
}
interface AttributeOption {
@@ -41,6 +48,73 @@ interface ViewportInfo {
height: number;
}
interface RRWebSnapshot {
type: number;
childNodes?: RRWebSnapshot[];
tagName?: string;
attributes?: Record<string, string>;
textContent: string;
id: number;
[key: string]: any;
}
interface ProcessedSnapshot {
snapshot: RRWebSnapshot;
resources: {
stylesheets: Array<{
href: string;
content: string;
media?: string;
}>;
images: Array<{
src: string;
dataUrl: string;
alt?: string;
}>;
fonts: Array<{
url: string;
dataUrl: string;
format?: string;
}>;
scripts: Array<{
src: string;
content: string;
type?: string;
}>;
media: Array<{
src: string;
dataUrl: string;
type: string;
}>;
};
baseUrl: string;
viewport: { width: number; height: number };
timestamp: number;
processingStats: {
totalReplacements: number;
discoveredResources: {
images: number;
stylesheets: number;
scripts: number;
fonts: number;
media: number;
};
cachedResources: {
stylesheets: number;
images: number;
fonts: number;
scripts: number;
media: number;
};
totalCacheSize: number;
};
}
interface RRWebDOMCastData {
snapshotData: ProcessedSnapshot;
userId: string;
timestamp: number;
}
const getAttributeOptions = (tagName: string, elementInfo: ElementInfo | null): AttributeOption[] => {
if (!elementInfo) return [];
@@ -79,6 +153,9 @@ export const BrowserWindow = () => {
const [selectedElement, setSelectedElement] = useState<{ selector: string, info: ElementInfo | null } | null>(null);
const [currentListId, setCurrentListId] = useState<number | null>(null);
const [viewportInfo, setViewportInfo] = useState<ViewportInfo>({ width: browserWidth, height: browserHeight });
const [isDOMMode, setIsDOMMode] = useState(false);
const [currentSnapshot, setCurrentSnapshot] = useState<ProcessedSnapshot | null>(null);
const [isLoading, setIsLoading] = useState(false);
const [listSelector, setListSelector] = useState<string | null>(null);
const [fields, setFields] = useState<Record<string, TextStep>>({});
@@ -94,11 +171,142 @@ export const BrowserWindow = () => {
const { state } = useContext(AuthContext);
const { user } = state;
const [datePickerInfo, setDatePickerInfo] = useState<{
coordinates: { x: number; y: number };
selector: string;
} | null>(null);
const [dropdownInfo, setDropdownInfo] = useState<{
coordinates: { x: number; y: number };
selector: string;
options: Array<{
value: string;
text: string;
disabled: boolean;
selected: boolean;
}>;
} | null>(null);
const [timePickerInfo, setTimePickerInfo] = useState<{
coordinates: { x: number; y: number };
selector: string;
} | null>(null);
const [dateTimeLocalInfo, setDateTimeLocalInfo] = useState<{
coordinates: { x: number; y: number };
selector: string;
} | null>(null);
const dimensions = {
width: browserWidth,
height: browserHeight
};
const handleShowDatePicker = useCallback(
(info: { coordinates: { x: number; y: number }; selector: string }) => {
setDatePickerInfo(info);
},
[]
);
const handleShowDropdown = useCallback(
(info: {
coordinates: { x: number; y: number };
selector: string;
options: Array<{
value: string;
text: string;
disabled: boolean;
selected: boolean;
}>;
}) => {
setDropdownInfo(info);
},
[]
);
const handleShowTimePicker = useCallback(
(info: { coordinates: { x: number; y: number }; selector: string }) => {
setTimePickerInfo(info);
},
[]
);
const handleShowDateTimePicker = useCallback(
(info: { coordinates: { x: number; y: number }; selector: string }) => {
setDateTimeLocalInfo(info);
},
[]
);
const rrwebSnapshotHandler = useCallback(
(data: RRWebDOMCastData) => {
if (!data.userId || data.userId === user?.id) {
if (data.snapshotData && data.snapshotData.snapshot) {
setCurrentSnapshot(data.snapshotData);
setIsDOMMode(true);
socket?.emit("dom-mode-enabled");
setIsLoading(false);
} else {
setIsLoading(false);
}
}
},
[user?.id, socket]
);
const domModeHandler = useCallback(
(data: any) => {
if (!data.userId || data.userId === user?.id) {
setIsDOMMode(true);
socket?.emit("dom-mode-enabled");
setIsLoading(false);
}
},
[user?.id, socket]
);
const screenshotModeHandler = useCallback(
(data: any) => {
if (!data.userId || data.userId === user?.id) {
setIsDOMMode(false);
socket?.emit("screenshot-mode-enabled");
setCurrentSnapshot(null);
setIsLoading(false);
}
},
[user?.id]
);
const domModeErrorHandler = useCallback(
(data: any) => {
if (!data.userId || data.userId === user?.id) {
setIsDOMMode(false);
setCurrentSnapshot(null);
setIsLoading(false);
}
},
[user?.id]
);
useEffect(() => {
if (isDOMMode) {
clientSelectorGenerator.setGetList(getList);
clientSelectorGenerator.setListSelector(listSelector || "");
clientSelectorGenerator.setPaginationMode(paginationMode);
}
}, [isDOMMode, getList, listSelector, paginationMode]);
useEffect(() => {
if (isDOMMode && listSelector) {
socket?.emit("setGetList", { getList: true });
socket?.emit("listSelector", { selector: listSelector });
clientSelectorGenerator.setListSelector(listSelector);
}
}, [isDOMMode, listSelector, socket, getList]);
useEffect(() => {
coordinateMapper.updateDimensions(dimensions.width, dimensions.height, viewportInfo.width, viewportInfo.height);
}, [viewportInfo, dimensions.width, dimensions.height]);
@@ -162,16 +370,185 @@ export const BrowserWindow = () => {
useEffect(() => {
if (socket) {
socket.on("screencast", screencastHandler);
socket.on("domcast", rrwebSnapshotHandler);
socket.on("dom-mode-enabled", domModeHandler);
socket.on("screenshot-mode-enabled", screenshotModeHandler);
socket.on("dom-mode-error", domModeErrorHandler);
}
if (canvasRef?.current) {
if (canvasRef?.current && !isDOMMode && screenShot) {
drawImage(screenShot, canvasRef.current);
} else {
console.log('Canvas is not initialized');
}
return () => {
socket?.off("screencast", screencastHandler);
}
}, [screenShot, canvasRef, socket, screencastHandler]);
if (socket) {
console.log("Cleaning up DOM streaming event listeners");
socket.off("screencast", screencastHandler);
socket.off("domcast", rrwebSnapshotHandler);
socket.off("dom-mode-enabled", domModeHandler);
socket.off("screenshot-mode-enabled", screenshotModeHandler);
socket.off("dom-mode-error", domModeErrorHandler);
}
};
}, [
socket,
screenShot,
canvasRef,
isDOMMode,
screencastHandler,
rrwebSnapshotHandler,
domModeHandler,
screenshotModeHandler,
domModeErrorHandler,
]);
const domHighlighterHandler = useCallback(
(data: {
rect: DOMRect;
selector: string;
elementInfo: ElementInfo | null;
childSelectors?: string[];
isDOMMode?: boolean;
}) => {
if (!isDOMMode || !currentSnapshot) {
return;
}
let iframeElement = document.querySelector(
"#dom-browser-iframe"
) as HTMLIFrameElement;
if (!iframeElement) {
iframeElement = document.querySelector(
"#browser-window iframe"
) as HTMLIFrameElement;
}
if (!iframeElement) {
const browserWindow = document.querySelector("#browser-window");
if (browserWindow) {
iframeElement = browserWindow.querySelector(
"iframe"
) as HTMLIFrameElement;
}
}
if (!iframeElement) {
console.error("Could not find iframe element for DOM highlighting");
return;
}
const iframeRect = iframeElement.getBoundingClientRect();
const IFRAME_BODY_PADDING = 16;
const absoluteRect = new DOMRect(
data.rect.x + iframeRect.left - IFRAME_BODY_PADDING,
data.rect.y + iframeRect.top - IFRAME_BODY_PADDING,
data.rect.width,
data.rect.height
);
const mappedData = {
...data,
rect: absoluteRect,
};
if (getList === true) {
if (listSelector) {
socket?.emit("listSelector", { selector: listSelector });
const hasValidChildSelectors =
Array.isArray(mappedData.childSelectors) &&
mappedData.childSelectors.length > 0;
if (limitMode) {
setHighlighterData(null);
} else if (paginationMode) {
if (
paginationType !== "" &&
!["none", "scrollDown", "scrollUp"].includes(paginationType)
) {
setHighlighterData(mappedData);
} else {
setHighlighterData(null);
}
} else if (
mappedData.childSelectors &&
mappedData.childSelectors.includes(mappedData.selector)
) {
setHighlighterData(mappedData);
} else if (
mappedData.elementInfo?.isIframeContent &&
mappedData.childSelectors
) {
const isIframeChild = mappedData.childSelectors.some(
(childSelector) =>
mappedData.selector.includes(":>>") &&
childSelector
.split(":>>")
.some((part) => mappedData.selector.includes(part.trim()))
);
setHighlighterData(isIframeChild ? mappedData : null);
} else if (
mappedData.selector.includes(":>>") &&
hasValidChildSelectors
) {
const selectorParts = mappedData.selector
.split(":>>")
.map((part) => part.trim());
const isValidMixedSelector = selectorParts.some((part) =>
mappedData.childSelectors!.some((childSelector) =>
childSelector.includes(part)
)
);
setHighlighterData(isValidMixedSelector ? mappedData : null);
} else if (
mappedData.elementInfo?.isShadowRoot &&
mappedData.childSelectors
) {
const isShadowChild = mappedData.childSelectors.some(
(childSelector) =>
mappedData.selector.includes(">>") &&
childSelector
.split(">>")
.some((part) => mappedData.selector.includes(part.trim()))
);
setHighlighterData(isShadowChild ? mappedData : null);
} else if (
mappedData.selector.includes(">>") &&
hasValidChildSelectors
) {
const selectorParts = mappedData.selector
.split(">>")
.map((part) => part.trim());
const isValidMixedSelector = selectorParts.some((part) =>
mappedData.childSelectors!.some((childSelector) =>
childSelector.includes(part)
)
);
setHighlighterData(isValidMixedSelector ? mappedData : null);
} else {
setHighlighterData(null);
}
} else {
setHighlighterData(mappedData);
}
} else {
// getText mode
setHighlighterData(mappedData);
}
},
[
isDOMMode,
currentSnapshot,
getList,
socket,
listSelector,
paginationMode,
paginationType,
limitMode,
]
);
const highlighterHandler = useCallback((data: { rect: DOMRect, selector: string, elementInfo: ElementInfo | null, childSelectors?: string[] }) => {
const now = performance.now();
@@ -260,20 +637,6 @@ export const BrowserWindow = () => {
}
}, [getList, socket, listSelector, paginationMode, paginationType, limitMode]);
useEffect(() => {
if (socket) {
socket.on('listDataExtracted', (response) => {
const { currentListId, data } = response;
updateListStepData(currentListId, data);
});
}
return () => {
socket?.off('listDataExtracted');
};
}, [socket]);
useEffect(() => {
document.addEventListener('mousemove', onMouseMove, false);
if (socket) {
@@ -304,6 +667,188 @@ export const BrowserWindow = () => {
}
}, [captureStage, listSelector, socket]);
const handleDOMElementSelection = useCallback(
(highlighterData: {
rect: DOMRect;
selector: string;
elementInfo: ElementInfo | null;
childSelectors?: string[];
}) => {
setShowAttributeModal(false);
setSelectedElement(null);
setAttributeOptions([]);
const options = getAttributeOptions(
highlighterData.elementInfo?.tagName || "",
highlighterData.elementInfo
);
if (getText === true) {
if (options.length === 1) {
const attribute = options[0].value;
const data =
attribute === "href"
? highlighterData.elementInfo?.url || ""
: attribute === "src"
? highlighterData.elementInfo?.imageUrl || ""
: highlighterData.elementInfo?.innerText || "";
addTextStep(
"",
data,
{
selector: highlighterData.selector,
tag: highlighterData.elementInfo?.tagName,
shadow: highlighterData.elementInfo?.isShadowRoot,
attribute,
},
currentTextActionId || `text-${crypto.randomUUID()}`
);
} else {
setAttributeOptions(options);
setSelectedElement({
selector: highlighterData.selector,
info: highlighterData.elementInfo,
});
setShowAttributeModal(true);
}
}
if (paginationMode && getList) {
if (
paginationType !== "" &&
paginationType !== "scrollDown" &&
paginationType !== "scrollUp" &&
paginationType !== "none"
) {
setPaginationSelector(highlighterData.selector);
notify(
`info`,
t(
"browser_window.attribute_modal.notifications.pagination_select_success"
)
);
addListStep(
listSelector!,
fields,
currentListId || 0,
currentListActionId || `list-${crypto.randomUUID()}`,
{ type: paginationType, selector: highlighterData.selector }
);
socket?.emit("setPaginationMode", { pagination: false });
}
return;
}
if (getList === true && !listSelector) {
let cleanedSelector = highlighterData.selector;
if (cleanedSelector.includes("nth-child")) {
cleanedSelector = cleanedSelector.replace(/:nth-child\(\d+\)/g, "");
}
setListSelector(cleanedSelector);
notify(
`info`,
t("browser_window.attribute_modal.notifications.list_select_success")
);
setCurrentListId(Date.now());
setFields({});
socket?.emit("setGetList", { getList: true });
socket?.emit("listSelector", { selector: cleanedSelector });
} else if (getList === true && listSelector && currentListId) {
if (options.length === 1) {
const attribute = options[0].value;
let currentSelector = highlighterData.selector;
if (currentSelector.includes(">")) {
const [firstPart, ...restParts] = currentSelector
.split(">")
.map((p) => p.trim());
const listSelectorRightPart = listSelector
.split(">")
.pop()
?.trim()
.replace(/:nth-child\(\d+\)/g, "");
if (
firstPart.includes("nth-child") &&
firstPart.replace(/:nth-child\(\d+\)/g, "") ===
listSelectorRightPart
) {
currentSelector = `${firstPart.replace(
/:nth-child\(\d+\)/g,
""
)} > ${restParts.join(" > ")}`;
}
}
const data =
attribute === "href"
? highlighterData.elementInfo?.url || ""
: attribute === "src"
? highlighterData.elementInfo?.imageUrl || ""
: highlighterData.elementInfo?.innerText || "";
const newField: TextStep = {
id: Date.now(),
type: "text",
label: `Label ${Object.keys(fields).length + 1}`,
data: data,
selectorObj: {
selector: currentSelector,
tag: highlighterData.elementInfo?.tagName,
shadow: highlighterData.elementInfo?.isShadowRoot,
attribute,
},
};
const updatedFields = {
...fields,
[newField.id]: newField,
};
setFields(updatedFields);
if (listSelector) {
addListStep(
listSelector,
updatedFields,
currentListId,
currentListActionId || `list-${crypto.randomUUID()}`,
{ type: "", selector: paginationSelector }
);
}
} else {
setAttributeOptions(options);
setSelectedElement({
selector: highlighterData.selector,
info: highlighterData.elementInfo,
});
setShowAttributeModal(true);
}
}
},
[
getText,
getList,
listSelector,
paginationMode,
paginationType,
fields,
currentListId,
currentTextActionId,
currentListActionId,
addTextStep,
addListStep,
notify,
socket,
t,
paginationSelector,
]
);
const handleClick = (e: React.MouseEvent<HTMLDivElement>) => {
if (highlighterData && canvasRef?.current) {
const canvasRect = canvasRef.current.getBoundingClientRect();
@@ -409,13 +954,6 @@ export const BrowserWindow = () => {
setFields(updatedFields);
if (listSelector) {
socket?.emit('extractListData', {
listSelector,
fields: updatedFields,
currentListId,
pagination: { type: '', selector: paginationSelector }
});
addListStep(
listSelector,
updatedFields,
@@ -482,13 +1020,6 @@ export const BrowserWindow = () => {
setFields(updatedFields);
if (listSelector) {
socket?.emit('extractListData', {
listSelector,
fields: updatedFields,
currentListId,
pagination: { type: '', selector: paginationSelector }
});
addListStep(
listSelector,
updatedFields,
@@ -500,7 +1031,14 @@ export const BrowserWindow = () => {
}
}
}
setShowAttributeModal(false);
setSelectedElement(null);
setAttributeOptions([]);
setTimeout(() => {
setShowAttributeModal(false);
}, 0);
};
const resetPaginationSelector = useCallback(() => {
@@ -519,8 +1057,12 @@ export const BrowserWindow = () => {
getText === true || getList === true ? (
<GenericModal
isOpen={showAttributeModal}
onClose={() => { }}
canBeClosed={false}
onClose={() => {
setShowAttributeModal(false);
setSelectedElement(null);
setAttributeOptions([]);
}}
canBeClosed={true}
modalStyle={modalStyle}
>
<div>
@@ -560,21 +1102,150 @@ export const BrowserWindow = () => {
</GenericModal>
) : null
}
<div style={{ height: dimensions.height, overflow: 'hidden' }}>
{((getText === true || getList === true) && !showAttributeModal && highlighterData?.rect != null && highlighterData?.rect.top != null) && canvasRef?.current ?
<Highlighter
unmodifiedRect={highlighterData?.rect}
displayedSelector={highlighterData?.selector}
{datePickerInfo && (
<DatePicker
coordinates={datePickerInfo.coordinates}
selector={datePickerInfo.selector}
onClose={() => setDatePickerInfo(null)}
/>
)}
{dropdownInfo && (
<Dropdown
coordinates={dropdownInfo.coordinates}
selector={dropdownInfo.selector}
options={dropdownInfo.options}
onClose={() => setDropdownInfo(null)}
/>
)}
{timePickerInfo && (
<TimePicker
coordinates={timePickerInfo.coordinates}
selector={timePickerInfo.selector}
onClose={() => setTimePickerInfo(null)}
/>
)}
{dateTimeLocalInfo && (
<DateTimeLocalPicker
coordinates={dateTimeLocalInfo.coordinates}
selector={dateTimeLocalInfo.selector}
onClose={() => setDateTimeLocalInfo(null)}
/>
)}
<div style={{ height: dimensions.height, overflow: "hidden" }}>
{(getText === true || getList === true) &&
!showAttributeModal &&
highlighterData?.rect != null && (
<>
{!isDOMMode && canvasRef?.current && (
<Highlighter
unmodifiedRect={highlighterData?.rect}
displayedSelector={highlighterData?.selector}
width={dimensions.width}
height={dimensions.height}
canvasRect={canvasRef.current.getBoundingClientRect()}
/>
)}
{isDOMMode && highlighterData && (
<>
<div
style={{
position: "absolute",
left: Math.max(0, highlighterData.rect.x),
top: Math.max(0, highlighterData.rect.y),
width: Math.min(
highlighterData.rect.width,
dimensions.width
),
height: Math.min(
highlighterData.rect.height,
dimensions.height
),
background: "rgba(255, 0, 195, 0.15)",
border: "2px solid #ff00c3",
borderRadius: "3px",
pointerEvents: "none",
zIndex: 1000,
boxShadow: "0 0 0 1px rgba(255, 255, 255, 0.8)",
transition: "all 0.1s ease-out",
}}
/>
</>
)}
</>
)}
{isDOMMode ? (
currentSnapshot ? (
<DOMBrowserRenderer
width={dimensions.width}
height={dimensions.height}
snapshot={currentSnapshot}
getList={getList}
getText={getText}
listSelector={listSelector}
paginationMode={paginationMode}
paginationType={paginationType}
limitMode={limitMode}
onHighlight={(data: any) => {
domHighlighterHandler(data);
}}
onElementSelect={handleDOMElementSelection}
onShowDatePicker={handleShowDatePicker}
onShowDropdown={handleShowDropdown}
onShowTimePicker={handleShowTimePicker}
onShowDateTimePicker={handleShowDateTimePicker}
/>
) : (
<div
style={{
width: dimensions.width,
height: dimensions.height,
display: "flex",
alignItems: "center",
justifyContent: "center",
background: "#f5f5f5",
borderRadius: "5px",
flexDirection: "column",
gap: "20px",
}}
>
<div
style={{
width: "60px",
height: "60px",
borderTop: "4px solid transparent",
borderRadius: "50%",
animation: "spin 1s linear infinite",
}}
/>
<div
style={{
fontSize: "18px",
color: "#ff00c3",
fontWeight: "bold",
}}
>
Loading website...
</div>
<style>{`
@keyframes spin {
0% { transform: rotate(0deg); }
100% { transform: rotate(360deg); }
}
`}</style>
</div>
)
) : (
/* Screenshot mode canvas */
<Canvas
onCreateRef={setCanvasReference}
width={dimensions.width}
height={dimensions.height}
canvasRect={canvasRef.current.getBoundingClientRect()}
/>
: null}
<Canvas
onCreateRef={setCanvasReference}
width={dimensions.width}
height={dimensions.height}
/>
)}
</div>
</div>
);

View File

@@ -16,12 +16,58 @@ const DatePicker: React.FC<DatePickerProps> = ({ coordinates, selector, onClose
setSelectedDate(e.target.value);
};
const updateDOMElement = (selector: string, value: string) => {
try {
let iframeElement = document.querySelector('#dom-browser-iframe') as HTMLIFrameElement;
if (!iframeElement) {
iframeElement = document.querySelector('#browser-window iframe') as HTMLIFrameElement;
}
if (!iframeElement) {
const browserWindow = document.querySelector('#browser-window');
if (browserWindow) {
iframeElement = browserWindow.querySelector('iframe') as HTMLIFrameElement;
}
}
if (!iframeElement) {
console.error('Could not find iframe element for DOM update');
return;
}
const iframeDoc = iframeElement.contentDocument;
if (!iframeDoc) {
console.error('Could not access iframe document');
return;
}
const element = iframeDoc.querySelector(selector) as HTMLInputElement;
if (element) {
element.value = value;
const changeEvent = new Event('change', { bubbles: true });
element.dispatchEvent(changeEvent);
const inputEvent = new Event('input', { bubbles: true });
element.dispatchEvent(inputEvent);
} else {
console.warn(`Could not find element with selector: ${selector}`);
}
} catch (error) {
console.error('Error updating DOM element:', error);
}
};
const handleConfirm = () => {
if (socket && selectedDate) {
socket.emit('input:date', {
selector,
value: selectedDate
});
updateDOMElement(selector, selectedDate);
onClose();
}
};

View File

@@ -16,12 +16,58 @@ const DateTimeLocalPicker: React.FC<DateTimeLocalPickerProps> = ({ coordinates,
setSelectedDateTime(e.target.value);
};
const updateDOMElement = (selector: string, value: string) => {
try {
let iframeElement = document.querySelector('#dom-browser-iframe') as HTMLIFrameElement;
if (!iframeElement) {
iframeElement = document.querySelector('#browser-window iframe') as HTMLIFrameElement;
}
if (!iframeElement) {
const browserWindow = document.querySelector('#browser-window');
if (browserWindow) {
iframeElement = browserWindow.querySelector('iframe') as HTMLIFrameElement;
}
}
if (!iframeElement) {
console.error('Could not find iframe element for DOM update');
return;
}
const iframeDoc = iframeElement.contentDocument;
if (!iframeDoc) {
console.error('Could not access iframe document');
return;
}
const element = iframeDoc.querySelector(selector) as HTMLInputElement;
if (element) {
element.value = value;
const changeEvent = new Event('change', { bubbles: true });
element.dispatchEvent(changeEvent);
const inputEvent = new Event('input', { bubbles: true });
element.dispatchEvent(inputEvent);
} else {
console.warn(`Could not find element with selector: ${selector}`);
}
} catch (error) {
console.error('Error updating DOM element:', error);
}
};
const handleConfirm = () => {
if (socket && selectedDateTime) {
socket.emit('input:datetime-local', {
selector,
value: selectedDateTime
});
updateDOMElement(selector, selectedDateTime);
onClose();
}
};
@@ -58,8 +104,8 @@ const DateTimeLocalPicker: React.FC<DateTimeLocalPickerProps> = ({ coordinates,
onClick={handleConfirm}
disabled={!selectedDateTime}
className={`px-3 py-1 text-sm rounded ${selectedDateTime
? 'bg-blue-500 text-white hover:bg-blue-600'
: 'bg-gray-300 text-gray-500 cursor-not-allowed'
? 'bg-blue-500 text-white hover:bg-blue-600'
: 'bg-gray-300 text-gray-500 cursor-not-allowed'
}`}
>
Confirm

View File

@@ -18,9 +18,65 @@ const Dropdown = ({ coordinates, selector, options, onClose }: DropdownProps) =>
const { socket } = useSocketStore();
const [hoveredIndex, setHoveredIndex] = useState<number | null>(null);
const updateDOMElement = (selector: string, value: string) => {
try {
let iframeElement = document.querySelector('#dom-browser-iframe') as HTMLIFrameElement;
if (!iframeElement) {
iframeElement = document.querySelector('#browser-window iframe') as HTMLIFrameElement;
}
if (!iframeElement) {
const browserWindow = document.querySelector('#browser-window');
if (browserWindow) {
iframeElement = browserWindow.querySelector('iframe') as HTMLIFrameElement;
}
}
if (!iframeElement) {
console.error('Could not find iframe element for DOM update');
return;
}
const iframeDoc = iframeElement.contentDocument;
if (!iframeDoc) {
console.error('Could not access iframe document');
return;
}
const selectElement = iframeDoc.querySelector(selector) as HTMLSelectElement;
if (selectElement) {
selectElement.value = value;
const optionElements = selectElement.querySelectorAll('option');
optionElements.forEach(option => {
if (option.value === value) {
option.selected = true;
option.setAttribute('selected', 'selected');
} else {
option.selected = false;
option.removeAttribute('selected');
}
});
const changeEvent = new Event('change', { bubbles: true });
selectElement.dispatchEvent(changeEvent);
const inputEvent = new Event('input', { bubbles: true });
selectElement.dispatchEvent(inputEvent);
} else {
console.warn(`Could not find select element with selector: ${selector}`);
}
} catch (error) {
console.error('Error updating DOM select element:', error);
}
};
const handleSelect = (value: string) => {
if (socket) {
socket.emit('input:dropdown', { selector, value });
updateDOMElement(selector, value);
}
onClose();
};

File diff suppressed because it is too large Load Diff

View File

@@ -21,6 +21,7 @@ import ActionDescriptionBox from '../action/ActionDescriptionBox';
import { useThemeMode } from '../../context/theme-provider';
import { useTranslation } from 'react-i18next';
import { useBrowserDimensionsStore } from '../../context/browserDimensions';
import { clientListExtractor } from '../../helpers/clientListExtractor';
const fetchWorkflow = (id: string, callback: (response: WorkflowFile) => void) => {
getActiveWorkflow(id).then(
@@ -51,6 +52,8 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
const [isCaptureTextConfirmed, setIsCaptureTextConfirmed] = useState(false);
const [isCaptureListConfirmed, setIsCaptureListConfirmed] = useState(false);
const { panelHeight } = useBrowserDimensionsStore();
const [isDOMMode, setIsDOMMode] = useState(false);
const [currentSnapshot, setCurrentSnapshot] = useState<any>(null);
const { lastAction, notify, currentWorkflowActionsState, setCurrentWorkflowActionsState, resetInterpretationLog, currentListActionId, setCurrentListActionId, currentTextActionId, setCurrentTextActionId, currentScreenshotActionId, setCurrentScreenshotActionId } = useGlobalInfoStore();
const {
@@ -69,7 +72,7 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
startAction, finishAction
} = useActionContext();
const { browserSteps, updateBrowserTextStepLabel, deleteBrowserStep, addScreenshotStep, updateListTextFieldLabel, removeListTextField, updateListStepLimit, deleteStepsByActionId } = useBrowserSteps();
const { browserSteps, updateBrowserTextStepLabel, deleteBrowserStep, addScreenshotStep, updateListTextFieldLabel, removeListTextField, updateListStepLimit, deleteStepsByActionId, updateListStepData } = useBrowserSteps();
const { id, socket } = useSocketStore();
const { t } = useTranslation();
@@ -79,6 +82,42 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
setWorkflow(data);
}, [setWorkflow]);
useEffect(() => {
if (socket) {
const domModeHandler = (data: any) => {
if (!data.userId || data.userId === id) {
setIsDOMMode(true);
}
};
const screenshotModeHandler = (data: any) => {
if (!data.userId || data.userId === id) {
setIsDOMMode(false);
setCurrentSnapshot(null);
}
};
const domcastHandler = (data: any) => {
if (!data.userId || data.userId === id) {
if (data.snapshotData && data.snapshotData.snapshot) {
setCurrentSnapshot(data.snapshotData);
setIsDOMMode(true);
}
}
};
socket.on("dom-mode-enabled", domModeHandler);
socket.on("screenshot-mode-enabled", screenshotModeHandler);
socket.on("domcast", domcastHandler);
return () => {
socket.off("dom-mode-enabled", domModeHandler);
socket.off("screenshot-mode-enabled", screenshotModeHandler);
socket.off("domcast", domcastHandler);
};
}
}, [socket, id]);
useEffect(() => {
if (socket) {
socket.on("workflow", workflowHandler);
@@ -129,6 +168,100 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
setShowCaptureText(true);
}, [workflow, setCurrentWorkflowActionsState]);
useEffect(() => {
if (socket) {
socket.on('listDataExtracted', (response) => {
if (!isDOMMode) {
const { currentListId, data } = response;
updateListStepData(currentListId, data);
}
});
}
return () => {
socket?.off('listDataExtracted');
};
}, [socket, updateListStepData, isDOMMode]);
const extractDataClientSide = useCallback(
(
listSelector: string,
fields: Record<string, any>,
currentListId: number
) => {
if (isDOMMode && currentSnapshot) {
try {
// Find the DOM iframe element
let iframeElement = document.querySelector(
"#dom-browser-iframe"
) as HTMLIFrameElement;
if (!iframeElement) {
iframeElement = document.querySelector(
"#browser-window iframe"
) as HTMLIFrameElement;
}
if (!iframeElement) {
const browserWindow = document.querySelector("#browser-window");
if (browserWindow) {
iframeElement = browserWindow.querySelector(
"iframe"
) as HTMLIFrameElement;
}
}
if (!iframeElement) {
console.error(
"Could not find the DOM iframe element for extraction"
);
return;
}
const iframeDoc = iframeElement.contentDocument;
if (!iframeDoc) {
console.error("Failed to get iframe document");
return;
}
// Use client-side extraction
const extractedData = clientListExtractor.extractListData(
iframeDoc,
listSelector,
fields,
5 // limit for preview
);
updateListStepData(currentListId, extractedData);
console.log("✅ Client-side extraction completed:", extractedData);
} catch (error) {
console.error("Error in client-side data extraction:", error);
notify("error", "Failed to extract data client-side");
}
} else {
// Fallback to socket-based extraction for screenshot mode
if (!socket) {
console.error("Socket not available for backend extraction");
return;
}
try {
socket.emit("extractListData", {
listSelector,
fields,
currentListId,
pagination: { type: "", selector: "" },
});
console.log("📤 Sent extraction request to backend");
} catch (error) {
console.error("Error in backend data extraction:", error);
}
}
},
[isDOMMode, currentSnapshot, updateListStepData, socket, notify]
);
const handleMouseEnter = (id: number) => {
setHoverStates(prev => ({ ...prev, [id]: true }));
};
@@ -338,17 +471,22 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
const stopCaptureAndEmitGetListSettings = useCallback(() => {
const settings = getListSettingsObject();
if (settings) {
const latestListStep = getLatestListStep(browserSteps);
if (latestListStep && settings) {
extractDataClientSide(latestListStep.listSelector!, latestListStep.fields, latestListStep.id);
socket?.emit('action', { action: 'scrapeList', settings });
} else {
notify('error', t('right_panel.errors.unable_create_settings'));
}
handleStopGetList();
setCurrentListActionId('');
resetInterpretationLog();
finishAction('list');
onFinishCapture();
}, [getListSettingsObject, socket, notify, handleStopGetList, resetInterpretationLog, finishAction, onFinishCapture, t]);
}, [getListSettingsObject, socket, notify, handleStopGetList, resetInterpretationLog, finishAction, onFinishCapture, t, browserSteps, extractDataClientSide]);
const hasUnconfirmedListTextFields = browserSteps.some(step =>
step.type === 'list' &&

View File

@@ -0,0 +1,734 @@
interface TextStep {
id: number;
type: "text";
label: string;
data: string;
selectorObj: {
selector: string;
tag?: string;
shadow?: boolean;
attribute: string;
};
}
interface ExtractedListData {
[key: string]: string;
}
interface TableField {
selector: string;
attribute: string;
tableContext?: string;
cellIndex?: number;
}
interface NonTableField {
selector: string;
attribute: string;
}
interface ContainerFields {
tableFields: Record<string, TableField>;
nonTableFields: Record<string, NonTableField>;
}
class ClientListExtractor {
private queryElement = (
rootElement: Element | Document,
selector: string
): Element | null => {
if (!selector.includes(">>") && !selector.includes(":>>")) {
return rootElement.querySelector(selector);
}
const parts = selector.split(/(?:>>|:>>)/).map((part) => part.trim());
let currentElement: Element | Document | null = rootElement;
for (let i = 0; i < parts.length; i++) {
if (!currentElement) return null;
if (
(currentElement as Element).tagName === "IFRAME" ||
(currentElement as Element).tagName === "FRAME"
) {
try {
const frameElement = currentElement as
| HTMLIFrameElement
| HTMLFrameElement;
const frameDoc =
frameElement.contentDocument ||
frameElement.contentWindow?.document;
if (!frameDoc) return null;
currentElement = frameDoc.querySelector(parts[i]);
continue;
} catch (e) {
console.warn(
`Cannot access ${(
currentElement as Element
).tagName.toLowerCase()} content:`,
e
);
return null;
}
}
let nextElement: Element | null = null;
if ("querySelector" in currentElement) {
nextElement = currentElement.querySelector(parts[i]);
}
if (
!nextElement &&
"shadowRoot" in currentElement &&
(currentElement as Element).shadowRoot
) {
nextElement = (currentElement as Element).shadowRoot!.querySelector(
parts[i]
);
}
if (!nextElement && "children" in currentElement) {
const children: any = Array.from(
(currentElement as Element).children || []
);
for (const child of children) {
if (child.shadowRoot) {
nextElement = child.shadowRoot.querySelector(parts[i]);
if (nextElement) break;
}
}
}
currentElement = nextElement;
}
return currentElement as Element | null;
};
private queryElementAll = (
rootElement: Element | Document,
selector: string
): Element[] => {
if (!selector.includes(">>") && !selector.includes(":>>")) {
return Array.from(rootElement.querySelectorAll(selector));
}
const parts = selector.split(/(?:>>|:>>)/).map((part) => part.trim());
let currentElements: (Element | Document)[] = [rootElement];
for (const part of parts) {
const nextElements: Element[] = [];
for (const element of currentElements) {
if (
(element as Element).tagName === "IFRAME" ||
(element as Element).tagName === "FRAME"
) {
try {
const frameElement = element as
| HTMLIFrameElement
| HTMLFrameElement;
const frameDoc =
frameElement.contentDocument ||
frameElement.contentWindow?.document;
if (frameDoc) {
nextElements.push(...Array.from(frameDoc.querySelectorAll(part)));
}
} catch (e) {
console.warn(
`Cannot access ${(
element as Element
).tagName.toLowerCase()} content:`,
e
);
continue;
}
} else {
if ("querySelectorAll" in element) {
nextElements.push(...Array.from(element.querySelectorAll(part)));
}
if ("shadowRoot" in element && (element as Element).shadowRoot) {
nextElements.push(
...Array.from(
(element as Element).shadowRoot!.querySelectorAll(part)
)
);
}
if ("children" in element) {
const children = Array.from((element as Element).children || []);
for (const child of children) {
if (child.shadowRoot) {
nextElements.push(
...Array.from(child.shadowRoot.querySelectorAll(part))
);
}
}
}
}
}
currentElements = nextElements;
}
return currentElements as Element[];
};
private extractValue = (
element: Element,
attribute: string
): string | null => {
if (!element) return null;
const baseURL =
element.ownerDocument?.location?.href || window.location.origin;
if (element.shadowRoot) {
const shadowContent = element.shadowRoot.textContent;
if (shadowContent?.trim()) {
return shadowContent.trim();
}
}
if (attribute === "innerText") {
return (element as HTMLElement).innerText?.trim() || null;
} else if (attribute === "innerHTML") {
return element.innerHTML?.trim() || null;
} else if (attribute === "src" || attribute === "href") {
if (attribute === "href" && element.tagName !== "A") {
const parentElement = element.parentElement;
if (parentElement && parentElement.tagName === "A") {
const parentHref = parentElement.getAttribute("href");
if (parentHref) {
try {
return new URL(parentHref, baseURL).href;
} catch (e) {
return parentHref;
}
}
}
}
const attrValue = element.getAttribute(attribute);
const dataAttr = attrValue || element.getAttribute("data-" + attribute);
if (!dataAttr || dataAttr.trim() === "") {
if (attribute === "src") {
const style = window.getComputedStyle(element as HTMLElement);
const bgImage = style.backgroundImage;
if (bgImage && bgImage !== "none") {
const matches = bgImage.match(/url\(['"]?([^'")]+)['"]?\)/);
return matches ? new URL(matches[1], baseURL).href : null;
}
}
return null;
}
try {
return new URL(dataAttr, baseURL).href;
} catch (e) {
console.warn("Error creating URL from", dataAttr, e);
return dataAttr;
}
}
return element.getAttribute(attribute);
};
private findTableAncestor = (
element: Element
): { type: string; element: Element } | null => {
let currentElement: Element | null = element;
const MAX_DEPTH = 5;
let depth = 0;
while (currentElement && depth < MAX_DEPTH) {
if (currentElement.getRootNode() instanceof ShadowRoot) {
currentElement = (currentElement.getRootNode() as ShadowRoot).host;
continue;
}
if (currentElement.tagName === "TD") {
return { type: "TD", element: currentElement };
} else if (currentElement.tagName === "TR") {
return { type: "TR", element: currentElement };
}
if (
currentElement.tagName === "IFRAME" ||
currentElement.tagName === "FRAME"
) {
try {
const frameElement = currentElement as
| HTMLIFrameElement
| HTMLFrameElement;
currentElement = frameElement.contentDocument?.body || null;
} catch (e) {
return null;
}
} else {
currentElement = currentElement.parentElement;
}
depth++;
}
return null;
};
private getCellIndex = (td: Element): number => {
if (td.getRootNode() instanceof ShadowRoot) {
const shadowRoot = td.getRootNode() as ShadowRoot;
const allCells = Array.from(shadowRoot.querySelectorAll("td"));
return allCells.indexOf(td as HTMLTableCellElement);
}
let index = 0;
let sibling = td;
while ((sibling = sibling.previousElementSibling as Element)) {
index++;
}
return index;
};
private hasThElement = (
row: Element,
tableFields: Record<string, TableField>
): boolean => {
for (const [_, { selector }] of Object.entries(tableFields)) {
const element = this.queryElement(row, selector);
if (element) {
let current: Element | ShadowRoot | Document | null = element;
while (current && current !== row) {
if (current.getRootNode() instanceof ShadowRoot) {
current = (current.getRootNode() as ShadowRoot).host;
continue;
}
if ((current as Element).tagName === "TH") return true;
if (
(current as Element).tagName === "IFRAME" ||
(current as Element).tagName === "FRAME"
) {
try {
const frameElement = current as
| HTMLIFrameElement
| HTMLFrameElement;
current = frameElement.contentDocument?.body || null;
} catch (e) {
break;
}
} else {
current = (current as Element).parentElement;
}
}
}
}
return false;
};
private filterRowsBasedOnTag = (
rows: Element[],
tableFields: Record<string, TableField>
): Element[] => {
for (const row of rows) {
if (this.hasThElement(row, tableFields)) {
return rows;
}
}
return rows.filter((row) => {
const directTH = row.getElementsByTagName("TH").length === 0;
const shadowTH = row.shadowRoot
? row.shadowRoot.querySelector("th") === null
: true;
return directTH && shadowTH;
});
};
private calculateClassSimilarity = (
classList1: string[],
classList2: string[]
): number => {
const set1 = new Set(classList1);
const set2 = new Set(classList2);
const intersection = new Set([...set1].filter((x) => set2.has(x)));
const union = new Set([...set1, ...set2]);
return intersection.size / union.size;
};
private findSimilarElements = (
baseElement: Element,
document: Document,
similarityThreshold: number = 0.7
): Element[] => {
const baseClasses = Array.from(baseElement.classList);
if (baseClasses.length === 0) return [];
const allElements: Element[] = [];
allElements.push(
...Array.from(document.getElementsByTagName(baseElement.tagName))
);
if (baseElement.getRootNode() instanceof ShadowRoot) {
const shadowHost = (baseElement.getRootNode() as ShadowRoot).host;
allElements.push(
...Array.from(shadowHost.getElementsByTagName(baseElement.tagName))
);
}
const frames = [
...Array.from(document.getElementsByTagName("iframe")),
...Array.from(document.getElementsByTagName("frame")),
];
for (const frame of frames) {
try {
const frameElement = frame as HTMLIFrameElement | HTMLFrameElement;
const frameDoc =
frameElement.contentDocument || frameElement.contentWindow?.document;
if (frameDoc) {
allElements.push(
...Array.from(frameDoc.getElementsByTagName(baseElement.tagName))
);
}
} catch (e) {
console.warn(
`Cannot access ${frame.tagName.toLowerCase()} content:`,
e
);
}
}
return allElements.filter((element) => {
if (element === baseElement) return false;
const similarity = this.calculateClassSimilarity(
baseClasses,
Array.from(element.classList)
);
return similarity >= similarityThreshold;
});
};
private convertFields = (
fields: any
): Record<string, { selector: string; attribute: string }> => {
const convertedFields: Record<
string,
{ selector: string; attribute: string }
> = {};
for (const [key, field] of Object.entries(fields)) {
const typedField = field as TextStep;
convertedFields[typedField.label] = {
selector: typedField.selectorObj.selector,
attribute: typedField.selectorObj.attribute,
};
}
return convertedFields;
};
public extractListData = (
iframeDocument: Document,
listSelector: string,
fields: any,
limit: number = 5
): ExtractedListData[] => {
try {
// Convert fields to the format expected by the extraction logic
const convertedFields = this.convertFields(fields);
// Get all container elements matching the list selector
let containers = this.queryElementAll(iframeDocument, listSelector);
if (containers.length === 0) {
console.warn("No containers found for listSelector:", listSelector);
return [];
}
// Enhanced container discovery: find similar elements if we need more containers
if (limit > 1 && containers.length === 1) {
const baseContainer = containers[0];
const similarContainers = this.findSimilarElements(
baseContainer,
iframeDocument,
0.7
);
if (similarContainers.length > 0) {
const newContainers = similarContainers.filter(
(container) => !container.matches(listSelector)
);
containers = [...containers, ...newContainers];
}
}
console.log("📦 Found containers:", containers.length);
// Analyze fields for table vs non-table context
const containerFields: ContainerFields[] = containers.map(() => ({
tableFields: {},
nonTableFields: {},
}));
containers.forEach((container, containerIndex) => {
for (const [label, field] of Object.entries(convertedFields)) {
const sampleElement = this.queryElement(container, field.selector);
if (sampleElement) {
const ancestor = this.findTableAncestor(sampleElement);
if (ancestor) {
containerFields[containerIndex].tableFields[label] = {
...field,
tableContext: ancestor.type,
cellIndex:
ancestor.type === "TD"
? this.getCellIndex(ancestor.element)
: -1,
};
} else {
containerFields[containerIndex].nonTableFields[label] = field;
}
} else {
containerFields[containerIndex].nonTableFields[label] = field;
}
}
});
// Extract table data
const tableData: ExtractedListData[] = [];
for (
let containerIndex = 0;
containerIndex < containers.length;
containerIndex++
) {
const container = containers[containerIndex];
const { tableFields } = containerFields[containerIndex];
if (Object.keys(tableFields).length > 0) {
const firstField = Object.values(tableFields)[0];
const firstElement = this.queryElement(
container,
firstField.selector
);
let tableContext: Element | null = firstElement;
// Find the table context
while (
tableContext &&
tableContext.tagName !== "TABLE" &&
tableContext !== container
) {
if (tableContext.getRootNode() instanceof ShadowRoot) {
tableContext = (tableContext.getRootNode() as ShadowRoot).host;
continue;
}
if (
tableContext.tagName === "IFRAME" ||
tableContext.tagName === "FRAME"
) {
try {
const frameElement = tableContext as
| HTMLIFrameElement
| HTMLFrameElement;
tableContext = frameElement.contentDocument?.body || null;
} catch (e) {
break;
}
} else {
tableContext = tableContext.parentElement;
}
}
if (tableContext) {
const rows: Element[] = [];
rows.push(...Array.from(tableContext.getElementsByTagName("TR")));
if (
tableContext.tagName === "IFRAME" ||
tableContext.tagName === "FRAME"
) {
try {
const frameElement = tableContext as
| HTMLIFrameElement
| HTMLFrameElement;
const frameDoc =
frameElement.contentDocument ||
frameElement.contentWindow?.document;
if (frameDoc) {
rows.push(...Array.from(frameDoc.getElementsByTagName("TR")));
}
} catch (e) {
console.warn(
`Cannot access ${tableContext.tagName.toLowerCase()} rows:`,
e
);
}
}
const processedRows = this.filterRowsBasedOnTag(rows, tableFields);
for (
let rowIndex = 0;
rowIndex < Math.min(processedRows.length, limit);
rowIndex++
) {
const record: ExtractedListData = {};
const currentRow = processedRows[rowIndex];
for (const [
label,
{ selector, attribute, cellIndex },
] of Object.entries(tableFields)) {
let element: Element | null = null;
if (cellIndex !== undefined && cellIndex >= 0) {
let td: Element | null =
currentRow.children[cellIndex] || null;
if (!td && currentRow.shadowRoot) {
const shadowCells = currentRow.shadowRoot.children;
if (shadowCells && shadowCells.length > cellIndex) {
td = shadowCells[cellIndex];
}
}
if (td) {
element = this.queryElement(td, selector);
if (
!element &&
selector
.split(/(?:>>|:>>)/)
.pop()
?.includes("td:nth-child")
) {
element = td;
}
if (!element) {
const tagOnlySelector = selector.split(".")[0];
element = this.queryElement(td, tagOnlySelector);
}
if (!element) {
let currentElement: Element | null = td;
while (
currentElement &&
currentElement.children.length > 0
) {
let foundContentChild = false;
for (const child of Array.from(
currentElement.children
)) {
if (this.extractValue(child, attribute)) {
currentElement = child;
foundContentChild = true;
break;
}
}
if (!foundContentChild) break;
}
element = currentElement;
}
}
} else {
element = this.queryElement(currentRow, selector);
}
if (element) {
const value = this.extractValue(element, attribute);
if (value !== null && value !== "") {
record[label] = value;
console.log(`✅ Extracted ${label}:`, value);
} else {
console.warn(
`❌ No value for ${label} in row ${rowIndex + 1}`
);
record[label] = "";
}
} else {
console.warn(
`❌ Element not found for ${label} with selector:`,
selector
);
record[label] = "";
}
}
if (Object.values(record).some((value) => value !== "")) {
tableData.push(record);
}
}
}
}
}
// Extract non-table data
const nonTableData: ExtractedListData[] = [];
for (
let containerIndex = 0;
containerIndex < containers.length;
containerIndex++
) {
if (nonTableData.length >= limit) break;
const container = containers[containerIndex];
const { nonTableFields } = containerFields[containerIndex];
if (Object.keys(nonTableFields).length > 0) {
const record: ExtractedListData = {};
for (const [label, { selector, attribute }] of Object.entries(
nonTableFields
)) {
const relativeSelector = selector.split(/(?:>>|:>>)/).slice(-1)[0];
const element = this.queryElement(container, relativeSelector);
if (element) {
const value = this.extractValue(element, attribute);
if (value !== null && value !== "") {
record[label] = value;
console.log(`✅ Extracted ${label}:`, value);
} else {
console.warn(
`❌ No value for ${label} in container ${containerIndex + 1}`
);
record[label] = "";
}
} else {
console.warn(
`❌ Element not found for ${label} with selector:`,
selector
);
record[label] = "";
}
}
if (Object.values(record).some((value) => value !== "")) {
nonTableData.push(record);
}
}
}
// Combine and limit results
const extractedData = [...tableData, ...nonTableData].slice(0, limit);
console.log("🎉 Client extraction complete:", {
totalRecords: extractedData.length,
tableRecords: tableData.length,
nonTableRecords: nonTableData.length,
data: extractedData,
});
return extractedData;
} catch (error) {
console.error("Error in client-side extractListData:", error);
return [];
}
};
}
export const clientListExtractor = new ClientListExtractor();

File diff suppressed because it is too large Load Diff