Merge pull request #659 from getmaxun/out-ss

feat: faster screenshot output preview data
This commit is contained in:
Karishma Shukla
2025-06-29 17:10:34 +05:30
committed by GitHub
7 changed files with 179 additions and 113 deletions

View File

@@ -1866,6 +1866,63 @@ export class RemoteBrowser {
) as Array<Record<string, string>>;
}
/**
* Captures a screenshot directly without running the workflow interpreter
* @param settings Screenshot settings containing fullPage, type, etc.
* @returns Promise<void>
*/
public captureDirectScreenshot = async (settings: {
fullPage: boolean;
type: 'png' | 'jpeg';
timeout?: number;
animations?: 'disabled' | 'allow';
caret?: 'hide' | 'initial';
scale?: 'css' | 'device';
}): Promise<void> => {
if (!this.currentPage) {
logger.error("No current page available for screenshot");
this.socket.emit('screenshotError', {
userId: this.userId,
error: 'No active page available'
});
return;
}
try {
this.socket.emit('screenshotCaptureStarted', {
userId: this.userId,
fullPage: settings.fullPage
});
const screenshotBuffer = await this.currentPage.screenshot({
fullPage: settings.fullPage,
type: settings.type || 'png',
timeout: settings.timeout || 30000,
animations: settings.animations || 'allow',
caret: settings.caret || 'hide',
scale: settings.scale || 'device'
});
const base64Data = screenshotBuffer.toString('base64');
const mimeType = `image/${settings.type || 'png'}`;
const dataUrl = `data:${mimeType};base64,${base64Data}`;
this.socket.emit('directScreenshotCaptured', {
userId: this.userId,
screenshot: dataUrl,
mimeType: mimeType,
fullPage: settings.fullPage,
timestamp: Date.now()
});
} catch (error) {
logger.error('Failed to capture direct screenshot:', error);
this.socket.emit('screenshotError', {
userId: this.userId,
error: error instanceof Error ? error.message : 'Unknown error occurred'
});
}
};
/**
* Registers all event listeners needed for the recording editor session.
* Should be called only once after the full initialization of the remote browser.
@@ -1874,6 +1931,16 @@ export class RemoteBrowser {
public registerEditorEvents = (): void => {
// For each event, include userId to make sure events are handled for the correct browser
logger.log('debug', `Registering editor events for user: ${this.userId}`);
this.socket.on(`captureDirectScreenshot:${this.userId}`, async (settings) => {
logger.debug(`Direct screenshot capture requested for user ${this.userId}`);
await this.captureDirectScreenshot(settings);
});
// For backward compatibility
this.socket.on('captureDirectScreenshot', async (settings) => {
await this.captureDirectScreenshot(settings);
});
// Listen for specific events for this user
this.socket.on(`rerender:${this.userId}`, async () => {

View File

@@ -856,7 +856,7 @@ export const DOMBrowserRenderer: React.FC<RRWebDOMBrowserRendererProps> = ({
/* Make everything interactive */
* {
cursor: ${isInCaptureMode ? "crosshair" : "pointer"} !important;
cursor: "pointer" !important;
}
/* Additional CSS from resources */
@@ -1127,7 +1127,7 @@ export const DOMBrowserRenderer: React.FC<RRWebDOMBrowserRendererProps> = ({
left: 0,
right: 0,
bottom: 0,
cursor: "pointer",
cursor: "pointer !important",
pointerEvents: "none",
zIndex: 999,
borderRadius: "0px 0px 5px 5px",

View File

@@ -72,7 +72,7 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
startAction, finishAction
} = useActionContext();
const { browserSteps, updateBrowserTextStepLabel, deleteBrowserStep, addScreenshotStep, updateListTextFieldLabel, removeListTextField, updateListStepLimit, deleteStepsByActionId, updateListStepData } = useBrowserSteps();
const { browserSteps, updateBrowserTextStepLabel, deleteBrowserStep, addScreenshotStep, updateListTextFieldLabel, removeListTextField, updateListStepLimit, deleteStepsByActionId, updateListStepData, updateScreenshotStepData } = useBrowserSteps();
const { id, socket } = useSocketStore();
const { t } = useTranslation();
@@ -183,6 +183,29 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
};
}, [socket, updateListStepData, isDOMMode]);
useEffect(() => {
if (socket) {
const handleDirectScreenshot = (data: any) => {
const screenshotSteps = browserSteps.filter(step =>
step.type === 'screenshot' && step.actionId === currentScreenshotActionId
);
if (screenshotSteps.length > 0) {
const latestStep = screenshotSteps[screenshotSteps.length - 1];
updateScreenshotStepData(latestStep.id, data.screenshot);
}
setCurrentScreenshotActionId('');
};
socket.on('directScreenshotCaptured', handleDirectScreenshot);
return () => {
socket.off('directScreenshotCaptured', handleDirectScreenshot);
};
}
}, [socket, id, notify, t, currentScreenshotActionId, updateScreenshotStepData, setCurrentScreenshotActionId]);
const extractDataClientSide = useCallback(
(
listSelector: string,
@@ -649,14 +672,15 @@ export const RightSidePanel: React.FC<RightSidePanelProps> = ({ onFinishCapture
}, [currentListActionId, browserSteps, stopGetList, deleteStepsByActionId, resetListState, setShowPaginationOptions, setShowLimitOptions, setCaptureStage, notify, t]);
const captureScreenshot = (fullPage: boolean) => {
const screenshotSettings: ScreenshotSettings = {
const screenshotSettings = {
fullPage,
type: 'png',
type: 'png' as const,
timeout: 30000,
animations: 'allow',
caret: 'hide',
scale: 'device',
animations: 'allow' as const,
caret: 'hide' as const,
scale: 'device' as const,
};
socket?.emit('captureDirectScreenshot', screenshotSettings);
socket?.emit('action', { action: 'screenshot', settings: screenshotSettings });
addScreenshotStep(fullPage, currentScreenshotActionId);
stopGetScreenshot();

View File

@@ -2,7 +2,11 @@ import React, { FC, useState } from 'react';
import { InterpretationButtons } from "../run/InterpretationButtons";
import { useSocketStore } from "../../context/socket";
export const SidePanelHeader = () => {
interface SidePanelHeaderProps {
onPreviewClick?: () => void;
}
export const SidePanelHeader = ({ onPreviewClick }: SidePanelHeaderProps) => {
const [steppingIsDisabled, setSteppingIsDisabled] = useState(true);
@@ -14,7 +18,10 @@ export const SidePanelHeader = () => {
return (
<div style={{ width: 'inherit' }}>
<InterpretationButtons enableStepping={(isPaused) => setSteppingIsDisabled(!isPaused)} />
<InterpretationButtons
enableStepping={(isPaused) => setSteppingIsDisabled(!isPaused)}
onPreviewComplete={onPreviewClick}
/>
{/* <Button
variant='outlined'
disabled={steppingIsDisabled}

View File

@@ -10,6 +10,7 @@ import { useTranslation } from "react-i18next";
interface InterpretationButtonsProps {
enableStepping: (isPaused: boolean) => void;
onPreviewComplete?: () => void;
}
interface InterpretationInfo {
@@ -22,7 +23,7 @@ const interpretationInfo: InterpretationInfo = {
isPaused: false,
};
export const InterpretationButtons = ({ enableStepping }: InterpretationButtonsProps) => {
export const InterpretationButtons = ({ enableStepping, onPreviewComplete }: InterpretationButtonsProps) => {
const { t } = useTranslation();
const [info, setInfo] = useState<InterpretationInfo>(interpretationInfo);
const [decisionModal, setDecisionModal] = useState<{
@@ -102,16 +103,20 @@ export const InterpretationButtons = ({ enableStepping }: InterpretationButtonsP
}, [socket, finishedHandler, breakpointHitHandler]);
const handlePlay = async () => {
if (!info.running) {
setInfo({ ...info, running: true });
const finished = await interpretCurrentRecording();
setInfo({ ...info, running: false });
if (finished) {
notify('info', t('interpretation_buttons.messages.run_finished'));
} else {
notify('error', t('interpretation_buttons.messages.run_failed'));
}
}
onPreviewComplete?.();
notify('info', t('interpretation_buttons.messages.run_finished'));
// Legacy code for running the interpretation
// if (!info.running) {
// setInfo({ ...info, running: true });
// // const finished = await interpretCurrentRecording();
// setInfo({ ...info, running: false });
// if (finished) {
// } else {
// notify('error', t('interpretation_buttons.messages.run_failed'));
// }
// }
};
// pause and stop logic (do not delete - we wil bring this back!)

View File

@@ -3,8 +3,6 @@ import SwipeableDrawer from '@mui/material/SwipeableDrawer';
import Typography from '@mui/material/Typography';
import { Button, Grid, Box } from '@mui/material';
import { useCallback, useEffect, useRef, useState } from "react";
import { useSocketStore } from "../../context/socket";
import { Buffer } from 'buffer';
import { useBrowserDimensionsStore } from "../../context/browserDimensions";
import Table from '@mui/material/Table';
import TableBody from '@mui/material/TableBody';
@@ -28,8 +26,6 @@ interface InterpretationLogProps {
export const InterpretationLog: React.FC<InterpretationLogProps> = ({ isOpen, setIsOpen }) => {
const { t } = useTranslation();
const [log, setLog] = useState<string>('');
const [customValue, setCustomValue] = useState('');
const [captureListData, setCaptureListData] = useState<any[]>([]);
const [captureTextData, setCaptureTextData] = useState<any[]>([]);
@@ -44,11 +40,10 @@ export const InterpretationLog: React.FC<InterpretationLogProps> = ({ isOpen, se
const { browserSteps } = useBrowserSteps();
const [activeActionId, setActiveActionId] = useState<number | null>(null);
const { browserWidth, outputPreviewHeight, outputPreviewWidth } = useBrowserDimensionsStore();
const { socket } = useSocketStore();
const { currentWorkflowActionsState, shouldResetInterpretationLog, notify } = useGlobalInfoStore();
const { currentWorkflowActionsState, shouldResetInterpretationLog } = useGlobalInfoStore();
const [showPreviewData, setShowPreviewData] = useState<boolean>(false);
const toggleDrawer = (newOpen: boolean) => (event: React.KeyboardEvent | React.MouseEvent) => {
if (
@@ -61,43 +56,6 @@ export const InterpretationLog: React.FC<InterpretationLogProps> = ({ isOpen, se
setIsOpen(newOpen);
};
const scrollLogToBottom = () => {
if (logEndRef.current) {
logEndRef.current.scrollIntoView({ behavior: "smooth" });
}
};
const handleLog = useCallback((msg: string, date: boolean = true) => {
if (!date) {
setLog((prevState) => prevState + '\n' + msg);
} else {
setLog((prevState) => prevState + '\n' + `[${new Date().toLocaleString()}] ` + msg);
}
scrollLogToBottom();
}, []);
useEffect(() => {
if (activeActionId !== null) {
const textSteps = browserSteps.filter(step => step.type === 'text');
if (textSteps.length > 0) {
const textDataRow: Record<string, string> = {};
textSteps.forEach(step => {
textDataRow[step.label] = step.data;
});
setCaptureTextData([textDataRow]);
}
const listSteps = browserSteps.filter(step => step.type === 'list');
if (listSteps.length > 0) {
setCaptureListData(listSteps);
}
updateActiveTab();
}
}, [activeActionId, browserSteps, t]);
const updateActiveTab = useCallback(() => {
const availableTabs = getAvailableTabs();
@@ -109,62 +67,49 @@ export const InterpretationLog: React.FC<InterpretationLogProps> = ({ isOpen, se
setActiveTab(availableTabs.findIndex(tab => tab.id === 'captureScreenshot'));
}
}, [captureListData.length, captureTextData.length, screenshotData.length]);
const handleBinaryCallback = useCallback(({ data, mimetype, type }: { data: any, mimetype: string, type: string }) => {
const base64String = Buffer.from(data).toString('base64');
const imageSrc = `data:${mimetype};base64,${base64String}`;
setLog((prevState) =>
prevState + '\n' + t('interpretation_log.data_sections.binary_received') + '\n'
+ t('interpretation_log.data_sections.mimetype') + mimetype + '\n'
+ t('interpretation_log.data_sections.image_below') + '\n'
+ t('interpretation_log.data_sections.separator'));
if (type === 'captureScreenshot') {
setScreenshotData(prev => [...prev, imageSrc]);
if (screenshotData.length === 0) {
const availableTabs = getAvailableTabs();
const tabIndex = availableTabs.findIndex(tab => tab.id === 'captureScreenshot');
if (tabIndex !== -1) setActiveTab(tabIndex);
}
useEffect(() => {
const textSteps = browserSteps.filter(step => step.type === 'text');
if (textSteps.length > 0) {
const textDataRow: Record<string, string> = {};
textSteps.forEach(step => {
textDataRow[step.label] = step.data;
});
setCaptureTextData([textDataRow]);
}
scrollLogToBottom();
}, [screenshotData.length, t]);
const listSteps = browserSteps.filter(step => step.type === 'list');
if (listSteps.length > 0) {
setCaptureListData(listSteps);
}
const handleActivePairId = useCallback((id: number) => {
setActiveActionId(id);
}, []);
const screenshotSteps = browserSteps.filter(step =>
step.type === 'screenshot'
) as Array<{ type: 'screenshot'; id: number; fullPage: boolean; actionId?: string; screenshotData?: string }>;
const handleCustomValueChange = (event: React.ChangeEvent<HTMLInputElement>) => {
setCustomValue(event.target.value);
};
const screenshotsWithData = screenshotSteps.filter(step => step.screenshotData);
if (screenshotsWithData.length > 0) {
const screenshots = screenshotsWithData.map(step => step.screenshotData!);
setScreenshotData(screenshots);
}
updateActiveTab();
}, [browserSteps, updateActiveTab]);
useEffect(() => {
if (shouldResetInterpretationLog) {
setLog('');
setCaptureListData([]);
setCaptureTextData([]);
setScreenshotData([]);
setActiveTab(0);
setCaptureListPage(0);
setScreenshotPage(0);
setActiveActionId(null);
setShowPreviewData(false);
}
}, [shouldResetInterpretationLog]);
useEffect(() => {
socket?.on('log', handleLog);
socket?.on('binaryCallback', handleBinaryCallback);
socket?.on('activePairId', handleActivePairId);
return () => {
socket?.off('log', handleLog);
socket?.off('binaryCallback', handleBinaryCallback);
socket?.off('activePairId', handleActivePairId);
};
}, [socket, handleLog, handleBinaryCallback, handleActivePairId]);
const getAvailableTabs = useCallback(() => {
const tabs = [];
@@ -181,7 +126,7 @@ export const InterpretationLog: React.FC<InterpretationLogProps> = ({ isOpen, se
}
return tabs;
}, [captureListData.length, captureTextData.length, screenshotData.length]);
}, [captureListData.length, captureTextData.length, screenshotData.length, showPreviewData]);
const availableTabs = getAvailableTabs();
@@ -264,7 +209,7 @@ export const InterpretationLog: React.FC<InterpretationLogProps> = ({ isOpen, se
{t('interpretation_log.titles.output_preview')}
</Typography>
{availableTabs.length > 0 ? (
{showPreviewData && availableTabs.length > 0 ? (
<>
{shouldShowTabs && (
<Box
@@ -488,7 +433,7 @@ export const InterpretationLog: React.FC<InterpretationLogProps> = ({ isOpen, se
<Typography variant="h6" gutterBottom align="left">
{t('interpretation_log.messages.successful_training')}
</Typography>
<SidePanelHeader />
<SidePanelHeader onPreviewClick={() => setShowPreviewData(true)} />
</>
) : (
<Typography variant="h6" gutterBottom align="left">

View File

@@ -6,7 +6,7 @@ export interface TextStep {
label: string;
data: string;
selectorObj: SelectorObject;
actionId?: string;
actionId?: string;
}
interface ScreenshotStep {
@@ -14,6 +14,7 @@ interface ScreenshotStep {
type: 'screenshot';
fullPage: boolean;
actionId?: string;
screenshotData?: string;
}
export interface ListStep {
@@ -26,7 +27,7 @@ export interface ListStep {
selector: string;
};
limit?: number;
actionId?: string;
actionId?: string;
}
export type BrowserStep = TextStep | ScreenshotStep | ListStep;
@@ -50,7 +51,9 @@ interface BrowserStepsContextType {
updateListStepLimit: (listId: number, limit: number) => void;
updateListStepData: (listId: number, extractedData: any[]) => void;
removeListTextField: (listId: number, fieldKey: string) => void;
deleteStepsByActionId: (actionId: string) => void;
deleteStepsByActionId: (actionId: string) => void;
updateScreenshotStepData: (id: number, screenshotData: string) => void;
}
const BrowserStepsContext = createContext<BrowserStepsContextType | undefined>(undefined);
@@ -178,6 +181,20 @@ export const BrowserStepsProvider: React.FC<{ children: React.ReactNode }> = ({
});
};
const updateScreenshotStepData = (id: number, screenshotData: string) => {
setBrowserSteps(prevSteps => {
return prevSteps.map(step => {
if (step.type === 'screenshot' && step.id === id) {
return {
...step,
screenshotData: screenshotData
};
}
return step;
});
});
};
const updateListStepLimit = (listId: number, limit: number) => {
setBrowserSteps(prevSteps =>
prevSteps.map(step => {
@@ -219,7 +236,8 @@ export const BrowserStepsProvider: React.FC<{ children: React.ReactNode }> = ({
updateListStepLimit,
updateListStepData,
removeListTextField,
deleteStepsByActionId,
deleteStepsByActionId,
updateScreenshotStepData,
}}>
{children}
</BrowserStepsContext.Provider>