Merge pull request #19 from amhsirak/develop

feat: browser revamp
This commit is contained in:
Karishma Shukla
2024-07-16 23:05:39 +05:30
committed by GitHub
14 changed files with 362 additions and 75 deletions

View File

@@ -0,0 +1,226 @@
/* eslint-disable @typescript-eslint/no-unused-vars */
const area = (element) => element.offsetHeight * element.offsetWidth;
function getBiggestElement(selector) {
const elements = Array.from(document.querySelectorAll(selector));
const biggest = elements.reduce(
(max, elem) => (
area(elem) > area(max) ? elem : max),
{ offsetHeight: 0, offsetWidth: 0 },
);
return biggest;
}
/**
* Generates structural selector (describing element by its DOM tree location).
*
* **The generated selector is not guaranteed to be unique!** (In fact, this is
* the desired behaviour in here.)
* @param {HTMLElement} element Element being described.
* @returns {string} CSS-compliant selector describing the element's location in the DOM tree.
*/
function GetSelectorStructural(element) {
// Base conditions for the recursive approach.
if (element.tagName === 'BODY') {
return 'BODY';
}
const selector = element.tagName;
if (element.parentElement) {
return `${GetSelectorStructural(element.parentElement)} > ${selector}`;
}
return selector;
}
/**
* Heuristic method to find collections of "interesting" items on the page.
* @returns {Array<HTMLElement>} A collection of interesting DOM nodes
* (online store products, plane tickets, list items... and many more?)
*/
function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, metricType = 'size_deviation') {
const restoreScroll = (() => {
const { scrollX, scrollY } = window;
return () => {
window.scrollTo(scrollX, scrollY);
};
})();
/**
* @typedef {Array<{x: number, y: number}>} Grid
*/
/**
* Returns an array of grid-aligned {x,y} points.
* @param {number} [granularity=0.005] sets the number of generated points
* (the higher the granularity, the more points).
* @returns {Grid} Array of {x, y} objects.
*/
function getGrid(startX = 0, startY = 0, granularity = 0.005) {
const width = window.innerWidth;
const height = window.innerHeight;
const out = [];
for (let x = 0; x < width; x += 1 / granularity) {
for (let y = 0; y < height; y += 1 / granularity) {
out.push({ x: startX + x, y: startY + y });
}
}
return out;
}
let maxSelector = { selector: 'body', metric: 0 };
const updateMaximumWithPoint = (point) => {
const currentElement = document.elementFromPoint(point.x, point.y);
const selector = GetSelectorStructural(currentElement);
const elements = Array.from(document.querySelectorAll(selector))
.filter((element) => area(element) > minArea);
// If the current selector targets less than three elements,
// we consider it not interesting (would be a very underwhelming scraper)
if (elements.length < 3) {
return;
}
let metric = null;
if (metricType === 'total_area') {
metric = elements
.reduce((p, x) => p + area(x), 0);
} else if (metricType === 'size_deviation') {
// This could use a proper "statistics" approach... but meh, so far so good!
const sizes = elements
.map((element) => area(element));
metric = (1 - (Math.max(...sizes) - Math.min(...sizes)) / Math.max(...sizes));
}
// console.debug(`Total ${metricType} is ${metric}.`)
if (metric > maxSelector.metric && elements.length < maxCountPerPage) {
maxSelector = { selector, metric };
}
};
for (let scroll = 0; scroll < scrolls; scroll += 1) {
window.scrollTo(0, scroll * window.innerHeight);
const grid = getGrid();
grid.forEach(updateMaximumWithPoint);
}
restoreScroll();
let out = Array.from(document.querySelectorAll(maxSelector.selector));
const different = (x, i, a) => a.findIndex((e) => e === x) === i;
// as long as we don't merge any two elements by substituing them for their parents,
// we substitute.
while (out.map((x) => x.parentElement).every(different)
&& out.forEach((x) => x.parentElement !== null)) {
out = out.map((x) => x.parentElement ?? x);
}
return out;
}
/**
* Returns a "scrape" result from the current page.
* @returns {Array<Object>} *Curated* array of scraped information (with sparse rows removed)
*/
function scrape(selector = null) {
/**
* **crudeRecords** contains uncurated rundowns of "scrapable" elements
* @type {Array<Object>}
*/
const crudeRecords = (selector
? Array.from(document.querySelectorAll(selector))
: scrapableHeuristics())
.map((record) => ({
...Array.from(record.querySelectorAll('img'))
.reduce((p, x, i) => {
let url = null;
if (x.srcset) {
const urls = x.srcset.split(', ');
[url] = urls[urls.length - 1].split(' ');
}
/**
* Contains the largest elements from `srcset` - if `srcset` is not present, contains
* URL from the `src` attribute
*
* If the `src` attribute contains a data url, imgUrl contains `undefined`.
*/
let imgUrl;
if (x.srcset) {
imgUrl = url;
} else if (x.src.indexOf('data:') === -1) {
imgUrl = x.src;
}
return ({
...p,
...(imgUrl ? { [`img_${i}`]: imgUrl } : {}),
});
}, {}),
...record.innerText.split('\n')
.reduce((p, x, i) => ({
...p,
[`record_${String(i).padStart(4, '0')}`]: x.trim(),
}), {}),
}));
return crudeRecords;
}
/**
* Given an object with named lists of elements,
* groups the elements by their distance in the DOM tree.
* @param {Object.<string, object[]>} lists The named lists of HTML elements.
* @returns {Array.<Object.<string, string>>}
*/
function scrapeSchema(lists) {
function omap(object, f, kf = (x) => x) {
return Object.fromEntries(
Object.entries(object)
.map(([k, v]) => [kf(k), f(v)]),
);
}
function ofilter(object, f) {
return Object.fromEntries(
Object.entries(object)
.filter(([k, v]) => f(k, v)),
);
}
function getSeedKey(listObj) {
const maxLength = Math.max(...Object.values(omap(listObj, (x) => x.length)));
return Object.keys(ofilter(listObj, (_, v) => v.length === maxLength))[0];
}
function getMBEs(elements) {
return elements.map((element) => {
let candidate = element;
const isUniqueChild = (e) => elements
.filter((elem) => e.parentNode?.contains(elem))
.length === 1;
while (candidate && isUniqueChild(candidate)) {
candidate = candidate.parentNode;
}
return candidate;
});
}
const seedName = getSeedKey(lists);
const MBEs = getMBEs(lists[seedName]);
return MBEs.map((mbe) => omap(
lists,
(listOfElements) => listOfElements.find((elem) => mbe.contains(elem))?.innerText,
));
}

View File

@@ -0,0 +1,5 @@
export const unaryOperators = ['$not'] as const;
export const naryOperators = ['$and', '$or'] as const;
export const operators = [...unaryOperators, ...naryOperators] as const;
export const meta = ['$before', '$after'] as const;

View File

@@ -0,0 +1,58 @@
import { Page } from 'playwright';
import {
naryOperators, unaryOperators, operators, meta,
} from './logic';
export type Operator = typeof operators[number];
export type UnaryOperator = typeof unaryOperators[number];
export type NAryOperator = typeof naryOperators[number];
export type Meta = typeof meta[number];
export type SelectorArray = string[];
type RegexableString = string | { '$regex': string };
type BaseConditions = {
'url': RegexableString,
'cookies': Record<string, RegexableString>,
'selectors': SelectorArray, // (CSS/Playwright) selectors use their own logic, there is no reason (and several technical difficulties) to allow regular expression notation
} & Record<Meta, RegexableString>;
export type Where =
Partial<{ [key in NAryOperator]: Where[] }> & // either a logic operator (arity N)
Partial<{ [key in UnaryOperator]: Where }> & // or an unary operator
Partial<BaseConditions>; // or one of the base conditions
type MethodNames<T> = {
[K in keyof T]: T[K] extends Function ? K : never;
}[keyof T];
export type CustomFunctions = 'scrape' | 'scrapeSchema' | 'scroll' | 'screenshot' | 'script' | 'enqueueLinks' | 'flag';
export type What = {
action: MethodNames<Page> | CustomFunctions,
args?: any[]
};
export type PageState = Partial<BaseConditions>;
export type ParamType = Record<string, any>;
export type MetaData = {
name?: string,
desc?: string,
};
export interface WhereWhatPair {
id?: string
where: Where
what: What[]
}
export type Workflow = WhereWhatPair[];
export type WorkflowFile = {
meta?: MetaData,
workflow: Workflow
};

View File

@@ -24,6 +24,7 @@
"dotenv": "^16.0.0",
"express": "^4.17.2",
"fortawesome": "^0.0.1-security",
"html2canvas-pro": "^1.5.3",
"joi": "^17.6.0",
"loglevel": "^1.8.0",
"loglevel-plugin-remote": "^0.6.8",

View File

@@ -143,7 +143,7 @@ export class RemoteBrowser {
return;
}
this.client.on('Page.screencastFrame', ({ data: base64, sessionId }) => {
this.emitScreenshot(base64);
this.emitScreenshot(base64)
setTimeout(async () => {
try {
if (!this.client) {
@@ -180,7 +180,9 @@ export class RemoteBrowser {
*/
public makeAndEmitScreenshot = async (): Promise<void> => {
try {
const screenshot = await this.currentPage?.screenshot();
const screenshot = await this.currentPage?.screenshot(
{ type: 'jpeg', quality: 90, fullPage: true }
);
if (screenshot) {
this.emitScreenshot(screenshot.toString('base64'));
}
@@ -313,7 +315,7 @@ export class RemoteBrowser {
logger.log('warn', 'client is not initialized');
return;
}
await this.client.send('Page.startScreencast', { format: 'jpeg', quality: 75 });
await this.client.send('Page.startScreencast', { format: 'jpeg', quality: 90 });
logger.log('info', `Browser started with screencasting a page.`);
};

View File

@@ -99,8 +99,7 @@ router.get('/interpret', async (req, res) => {
await interpretWholeWorkflow();
return res.send('interpretation done');
} catch (e) {
return res.send('interpretation done');
return res.status(400);
return res.send('interpretation failed');
}
});

View File

@@ -484,6 +484,7 @@ export class WorkflowGenerator {
public generateDataForHighlighter = async (page: Page, coordinates: Coordinates) => {
const rect = await getRect(page, coordinates);
const displaySelector = await this.generateSelector(page, coordinates, ActionType.Click);
//console.log('Backend Rectangle:', rect)
if (rect) {
this.socket.emit('highlighter', { rect, selector: displaySelector });
}

View File

@@ -68,24 +68,58 @@ export const getElementInformation = async (
const el = document.elementFromPoint(x, y) as HTMLElement;
if (el) {
const { parentElement } = el;
// Match the logic in recorder.ts for link clicks
const element = parentElement?.tagName === 'A' ? parentElement : el;
return {
let info: {
tagName: string;
hasOnlyText?: boolean;
innerText?: string;
url?: string;
imageUrl?: string;
} = {
tagName: element?.tagName ?? '',
hasOnlyText: element?.children?.length === 0 &&
element?.innerText?.length > 0,
};
if (element?.tagName === 'A') {
info.url = (element as HTMLAnchorElement).href;
info.innerText = element.innerText ?? '';
} else if (element?.tagName === 'IMG') {
info.imageUrl = (element as HTMLImageElement).src;
} else {
info.hasOnlyText = element?.children?.length === 0 &&
element?.innerText?.length > 0;
info.innerText = element?.innerText ?? '';
}
return info;
}
return null;
},
{ x: coordinates.x, y: coordinates.y },
);
if (elementInfo) {
if (elementInfo.tagName === 'A') {
if (elementInfo.innerText) {
console.log(`Link text: ${elementInfo.innerText}, URL: ${elementInfo.url}`);
} else {
console.log(`URL: ${elementInfo.url}`);
}
} else if (elementInfo.tagName === 'IMG') {
console.log(`Image URL: ${elementInfo.imageUrl}`);
} else {
console.log(`Element innerText: ${elementInfo.innerText}`);
}
}
return elementInfo;
} catch (error) {
const { message, stack } = error as Error;
logger.log('error', `Error while retrieving selector: ${message}`);
logger.log('error', `Stack: ${stack}`);
console.error('Error while retrieving selector:', message);
console.error('Stack:', stack);
}
}
};
/**
* Returns the best and unique css {@link Selectors} for the element on the page.

View File

@@ -14,22 +14,9 @@ export const Highlighter = ({ unmodifiedRect, displayedSelector = '', width, hei
if (!unmodifiedRect) {
return null;
} else {
// const unshiftedRect = mapRect(unmodifiedRect, width, height);
// console.log('unshiftedRect', unshiftedRect)
// const rect = {
// bottom: unshiftedRect.bottom + canvasRect.top,
// top: unshiftedRect.top + canvasRect.top,
// left: unshiftedRect.left + canvasRect.left,
// right: unshiftedRect.right + canvasRect.left,
// x: unshiftedRect.x + canvasRect.left,
// y: unshiftedRect.y + canvasRect.top,
// width: unshiftedRect.width,
// height: unshiftedRect.height,
// }
const rect = {
top: unmodifiedRect.top + canvasRect.top,
left: unmodifiedRect.left + canvasRect.left,
top: unmodifiedRect.top + canvasRect.top + window.scrollY,
left: unmodifiedRect.left + canvasRect.left + window.scrollX,
right: unmodifiedRect.right + canvasRect.left,
bottom: unmodifiedRect.bottom + canvasRect.top,
width: unmodifiedRect.width,
@@ -41,26 +28,6 @@ export const Highlighter = ({ unmodifiedRect, displayedSelector = '', width, hei
console.log('rectangle:', rect)
console.log('canvas rectangle:', canvasRect)
// make the highlighting rectangle stay in browser window boundaries
// if (rect.bottom > canvasRect.bottom) {
// rect.height = height - unshiftedRect.top;
// }
// if (rect.top < canvasRect.top) {
// rect.height = rect.height - (canvasRect.top - rect.top);
// rect.top = canvasRect.top;
// }
// if (rect.right > canvasRect.right) {
// rect.width = width - unshiftedRect.left;
// }
// if (rect.left < canvasRect.left) {
// rect.width = rect.width - (canvasRect.left - rect.left);
// rect.left = canvasRect.left;
// }
return (
<div>
<HighlighterOutline
@@ -88,9 +55,9 @@ const HighlighterOutline = styled.div<HighlighterOutlineProps>`
position: fixed !important;
background: #ff5d5b26 !important;
outline: 4px solid pink !important;
// border: 4px solid #ff5d5b !important;
//border: 4px solid #ff5d5b !important;
z-index: 2147483647 !important;
// border-radius: 5px;
//border-radius: 5px;
top: ${(p: HighlighterOutlineProps) => p.top}px;
left: ${(p: HighlighterOutlineProps) => p.left}px;
width: ${(p: HighlighterOutlineProps) => p.width}px;

View File

@@ -123,17 +123,15 @@ const Canvas = ({ width, height, onCreateRef }: CanvasProps) => {
}, [onMouseEvent]);
return (
// <canvas tabIndex={0} ref={canvasRef} height={height} width={width} />
<canvas
tabIndex={0}
ref={canvasRef}
height={720}
width={1280}
style={{ width: '1280px', height: '720px' }} // Ensure dimensions are explicitly set
/>
);
};
export default Canvas;
export default Canvas;

View File

@@ -44,7 +44,7 @@ export const NavBar = ({newRecording, recordingName, isRecording}:NavBarProps) =
justifyContent: 'flex-start',
}}>
<RecordingIcon/>
<div style={{padding: '11px'}}><ProjectName>Browser Recorder</ProjectName></div>
<div style={{padding: '11px'}}><ProjectName>Maxun</ProjectName></div>
</div>
<div style={{
display: 'flex',

View File

@@ -70,7 +70,7 @@ export const BrowserWindow = () => {
return (
<>
{(highlighterData?.rect != null && highlighterData?.rect.top != null) && canvasRef?.current ?
< Highlighter
<Highlighter
unmodifiedRect={highlighterData?.rect}
displayedSelector={highlighterData?.selector}
width={width}
@@ -96,11 +96,9 @@ const drawImage = (image: string, canvas: HTMLCanvasElement): void => {
img.src = image;
img.onload = () => {
URL.revokeObjectURL(img.src);
//ctx?.clearRect(0, 0, canvas?.width || 0, VIEWPORT_H || 0);
// ctx?.drawImage(img, 0, 0, canvas.width , canvas.height);
ctx?.drawImage(img, 0, 0, 1280, 720); // Explicitly draw image at 1280 x 720
ctx?.drawImage(img, 0, 0, 1280, 720);
console.log('Image drawn on canvas:', img.width, img.height);
console.log('Image drawn on canvas:', canvas.width, canvas.height);
};
};
};

View File

@@ -7,15 +7,13 @@ import { SelectChangeEvent } from "@mui/material/Select/Select";
import { SimpleBox } from "../atoms/Box";
import Typography from "@mui/material/Typography";
import { useGlobalInfoStore } from "../../context/globalInfo";
import { PairDetail } from "../molecules/PairDetail";
import { PairForEdit } from "../../pages/RecordingPage";
interface RightSidePanelProps {
pairForEdit: PairForEdit;
changeBrowserDimensions: () => void;
}
export const RightSidePanel = ({pairForEdit, changeBrowserDimensions}: RightSidePanelProps) => {
export const RightSidePanel = ({pairForEdit}: RightSidePanelProps) => {
const [content, setContent] = useState<string>('action');
const [action, setAction] = React.useState<string>('');
@@ -48,9 +46,6 @@ export const RightSidePanel = ({pairForEdit, changeBrowserDimensions}: RightSide
backgroundColor: 'white',
alignItems: "center",
}}>
<Button onClick={() => {
changeBrowserDimensions();
}}>resize browser</Button>
<SimpleBox height={60} width='100%' background='lightGray' radius='0%'>
<Typography sx={{ padding: '10px' }}>
Last action:
@@ -58,11 +53,6 @@ export const RightSidePanel = ({pairForEdit, changeBrowserDimensions}: RightSide
</Typography>
</SimpleBox>
<Tabs value={content} onChange={handleChange} centered>
<Tab label="Actions" value="action" />
<Tab label="Pair detail" value="detail"/>
</Tabs>
{content === 'action' ? (
<React.Fragment>
<ActionDescription>Type of action:</ActionDescription>
@@ -87,7 +77,7 @@ export const RightSidePanel = ({pairForEdit, changeBrowserDimensions}: RightSide
}
</React.Fragment>
)
: <PairDetail pair={pairForEdit.pair} index={pairForEdit.index}/>
: null
}
</Paper>
);

View File

@@ -10,6 +10,7 @@ import { useBrowserDimensionsStore } from "../context/browserDimensions";
import { useGlobalInfoStore } from "../context/globalInfo";
import { editRecordingFromStorage } from "../api/storage";
import { WhereWhatPair } from "@wbr-project/wbr-interpret";
import styled from "styled-components";
interface RecordingPageProps {
recordingName?: string;
@@ -106,22 +107,29 @@ export const RecordingPage = ({ recordingName }: RecordingPageProps) => {
<div>
{isLoaded ?
<Grid container direction="row" spacing={0}>
{/* <Grid item xs={2} ref={workflowListRef} style={{ display: "flex", flexDirection: "row" }}>
<Grid item xs={2} ref={workflowListRef} style={{ display: "flex", flexDirection: "row" }}>
<LeftSidePanel
sidePanelRef={workflowListRef.current}
alreadyHasScrollbar={hasScrollbar}
recordingName={recordingName ? recordingName : ''}
handleSelectPairForEdit={handleSelectPairForEdit}
/>
</Grid> */}
</Grid>
<Grid id="browser-content" ref={browserContentRef} item xs>
<BrowserContent />
</Grid>
{/* <Grid item xs={2}>
<RightSidePanel pairForEdit={pairForEdit} changeBrowserDimensions={changeBrowserDimensions}/>
</Grid> */}
<Grid item xs={2}>
<RightSidePanel pairForEdit={pairForEdit} />
</Grid>
</Grid>
: <Loader />}
</div>
);
};
const RecordingPageWrapper = styled.div`
position: relative;
width: 100vw;
height: 100vh;
overflow: hidden;
`;