From 87e97cec11732f9f81fbd618ee6e0e73c12afe34 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Tue, 25 Jun 2024 19:28:09 +0530 Subject: [PATCH 01/49] fix: project name --- src/components/molecules/NavBar.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/molecules/NavBar.tsx b/src/components/molecules/NavBar.tsx index 011277c1..640f09cb 100644 --- a/src/components/molecules/NavBar.tsx +++ b/src/components/molecules/NavBar.tsx @@ -44,7 +44,7 @@ export const NavBar = ({newRecording, recordingName, isRecording}:NavBarProps) = justifyContent: 'flex-start', }}> -
Browser Recorder
+
Maxun
Date: Tue, 25 Jun 2024 20:15:31 +0530 Subject: [PATCH 02/49] feat: create selector around clicked element --- src/components/organisms/BrowserWindow.tsx | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/src/components/organisms/BrowserWindow.tsx b/src/components/organisms/BrowserWindow.tsx index 32ab9607..afefbb41 100644 --- a/src/components/organisms/BrowserWindow.tsx +++ b/src/components/organisms/BrowserWindow.tsx @@ -9,6 +9,7 @@ export const BrowserWindow = () => { const [canvasRef, setCanvasReference] = useState | undefined>(undefined); const [screenShot, setScreenShot] = useState(""); const [highlighterData, setHighlighterData] = useState<{ rect: DOMRect, selector: string } | null>(null); + const [selectedElement, setSelectedElement] = useState<{ rect: DOMRect, selector: string } | null>(null); const { socket } = useSocketStore(); const { width, height } = useBrowserDimensionsStore(); @@ -55,17 +56,25 @@ export const BrowserWindow = () => { console.log('Highlighter Rect via socket:', data.rect) }, [highlighterData]) + const handleClick = useCallback(() => { + if (highlighterData) { + setSelectedElement(highlighterData); + } + }, [highlighterData]); + useEffect(() => { document.addEventListener('mousemove', onMouseMove, false); + document.addEventListener('click', handleClick); if (socket) { socket.on("highlighter", highlighterHandler); } //cleaning function return () => { document.removeEventListener('mousemove', onMouseMove); + document.removeEventListener('click', handleClick); socket?.off("highlighter", highlighterHandler); }; - }, [socket, onMouseMove]); + }, [socket, onMouseMove, handleClick]); return ( <> @@ -76,8 +85,19 @@ export const BrowserWindow = () => { width={width} height={height} canvasRect={canvasRef.current.getBoundingClientRect()} + isSelected={false} /> : null} + {selectedElement && canvasRef?.current ? + + : null} Date: Tue, 25 Jun 2024 20:16:30 +0530 Subject: [PATCH 03/49] feat: isSelected prop --- src/components/atoms/Highlighter.tsx | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/components/atoms/Highlighter.tsx b/src/components/atoms/Highlighter.tsx index 3199b083..ea1732d7 100644 --- a/src/components/atoms/Highlighter.tsx +++ b/src/components/atoms/Highlighter.tsx @@ -8,9 +8,10 @@ interface HighlighterProps { width: number; height: number; canvasRect: DOMRect; + isSelected: boolean; }; -export const Highlighter = ({ unmodifiedRect, displayedSelector = '', width, height, canvasRect }: HighlighterProps) => { +export const Highlighter = ({ unmodifiedRect, displayedSelector = '', width, height, canvasRect, isSelected }: HighlighterProps) => { if (!unmodifiedRect) { return null; } else { @@ -69,6 +70,10 @@ export const Highlighter = ({ unmodifiedRect, displayedSelector = '', width, hei left={rect.left} width={rect.width} height={rect.height} + style={{ + background: isSelected ? '#ff0000' : '#ff5d5b26', + outline: isSelected ? '2px solid red' : '2px solid pink', + }} /> Date: Tue, 25 Jun 2024 20:18:17 +0530 Subject: [PATCH 04/49] feat: use array for selected elements --- src/components/organisms/BrowserWindow.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/organisms/BrowserWindow.tsx b/src/components/organisms/BrowserWindow.tsx index afefbb41..5284196b 100644 --- a/src/components/organisms/BrowserWindow.tsx +++ b/src/components/organisms/BrowserWindow.tsx @@ -9,7 +9,7 @@ export const BrowserWindow = () => { const [canvasRef, setCanvasReference] = useState | undefined>(undefined); const [screenShot, setScreenShot] = useState(""); const [highlighterData, setHighlighterData] = useState<{ rect: DOMRect, selector: string } | null>(null); - const [selectedElement, setSelectedElement] = useState<{ rect: DOMRect, selector: string } | null>(null); + const [selectedElement, setSelectedElement] = useState>([]); const { socket } = useSocketStore(); const { width, height } = useBrowserDimensionsStore(); From d5b4dd368a3bb124c2d4648dbb4fcfdf26b777de Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Tue, 25 Jun 2024 20:19:23 +0530 Subject: [PATCH 05/49] fix: handle array in handleClick --- src/components/organisms/BrowserWindow.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/organisms/BrowserWindow.tsx b/src/components/organisms/BrowserWindow.tsx index 5284196b..cffe4b1c 100644 --- a/src/components/organisms/BrowserWindow.tsx +++ b/src/components/organisms/BrowserWindow.tsx @@ -58,7 +58,7 @@ export const BrowserWindow = () => { const handleClick = useCallback(() => { if (highlighterData) { - setSelectedElement(highlighterData); + setSelectedElement(prev => [...prev, highlighterData]); } }, [highlighterData]); From 6e927c3cb51405de5c5121511901461f9eafbb05 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Tue, 25 Jun 2024 22:39:29 +0530 Subject: [PATCH 06/49] feat: select clicked elements --- src/components/organisms/BrowserWindow.tsx | 46 ++++++++++++++++------ 1 file changed, 35 insertions(+), 11 deletions(-) diff --git a/src/components/organisms/BrowserWindow.tsx b/src/components/organisms/BrowserWindow.tsx index cffe4b1c..c8fc1b1d 100644 --- a/src/components/organisms/BrowserWindow.tsx +++ b/src/components/organisms/BrowserWindow.tsx @@ -58,7 +58,7 @@ export const BrowserWindow = () => { const handleClick = useCallback(() => { if (highlighterData) { - setSelectedElement(prev => [...prev, highlighterData]); + setSelectedElements(prev => [...prev, highlighterData]); } }, [highlighterData]); @@ -76,6 +76,27 @@ export const BrowserWindow = () => { }; }, [socket, onMouseMove, handleClick]); + // Adjust selected elements' positions after scroll + useEffect(() => { + const handleScroll = () => { + if (canvasRef && canvasRef.current) { + const canvasRect = canvasRef.current.getBoundingClientRect(); + setSelectedElements(prev => prev.map(element => ({ + ...element, + rect: new DOMRect( + element.rect.x, + element.rect.y, + element.rect.width, + element.rect.height + ) + }))); + } + }; + + window.addEventListener('scroll', handleScroll); + return () => window.removeEventListener('scroll', handleScroll); + }, [canvasRef]); + return ( <> {(highlighterData?.rect != null && highlighterData?.rect.top != null) && canvasRef?.current ? @@ -88,16 +109,19 @@ export const BrowserWindow = () => { isSelected={false} /> : null} - {selectedElement && canvasRef?.current ? - - : null} + {selectedElements.map((element, index) => ( + canvasRef?.current ? + + : null + ))} Date: Tue, 25 Jun 2024 22:40:05 +0530 Subject: [PATCH 07/49] fix: rename to use plural --- src/components/organisms/BrowserWindow.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/organisms/BrowserWindow.tsx b/src/components/organisms/BrowserWindow.tsx index c8fc1b1d..09bd453a 100644 --- a/src/components/organisms/BrowserWindow.tsx +++ b/src/components/organisms/BrowserWindow.tsx @@ -9,7 +9,7 @@ export const BrowserWindow = () => { const [canvasRef, setCanvasReference] = useState | undefined>(undefined); const [screenShot, setScreenShot] = useState(""); const [highlighterData, setHighlighterData] = useState<{ rect: DOMRect, selector: string } | null>(null); - const [selectedElement, setSelectedElement] = useState>([]); + const [selectedElements, setSelectedElements] = useState>([]); const { socket } = useSocketStore(); const { width, height } = useBrowserDimensionsStore(); From 1d310c827b11e95363b5940fb2c8e03efe60ba86 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Tue, 25 Jun 2024 22:45:14 +0530 Subject: [PATCH 08/49] fix: make isSelected true --- src/components/organisms/BrowserWindow.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/organisms/BrowserWindow.tsx b/src/components/organisms/BrowserWindow.tsx index 09bd453a..30492696 100644 --- a/src/components/organisms/BrowserWindow.tsx +++ b/src/components/organisms/BrowserWindow.tsx @@ -118,7 +118,7 @@ export const BrowserWindow = () => { width={width} height={height} canvasRect={canvasRef.current.getBoundingClientRect()} - isSelected={false} + isSelected={true} /> : null ))} From c56759bc49313f0e12fde18873c037dc683278fc Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Tue, 25 Jun 2024 22:46:52 +0530 Subject: [PATCH 09/49] chore: lint --- src/components/organisms/BrowserWindow.tsx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/components/organisms/BrowserWindow.tsx b/src/components/organisms/BrowserWindow.tsx index 30492696..16d0b4bd 100644 --- a/src/components/organisms/BrowserWindow.tsx +++ b/src/components/organisms/BrowserWindow.tsx @@ -110,8 +110,8 @@ export const BrowserWindow = () => { /> : null} {selectedElements.map((element, index) => ( - canvasRef?.current ? - { height={height} canvasRect={canvasRef.current.getBoundingClientRect()} isSelected={true} - /> - : null + /> + : null ))} Date: Tue, 25 Jun 2024 22:47:13 +0530 Subject: [PATCH 10/49] fix: spacing --- src/components/organisms/BrowserWindow.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/organisms/BrowserWindow.tsx b/src/components/organisms/BrowserWindow.tsx index 16d0b4bd..e67ff986 100644 --- a/src/components/organisms/BrowserWindow.tsx +++ b/src/components/organisms/BrowserWindow.tsx @@ -100,7 +100,7 @@ export const BrowserWindow = () => { return ( <> {(highlighterData?.rect != null && highlighterData?.rect.top != null) && canvasRef?.current ? - < Highlighter + Date: Tue, 25 Jun 2024 23:33:57 +0530 Subject: [PATCH 11/49] fix: remove scroll useffect --- src/components/organisms/BrowserWindow.tsx | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/src/components/organisms/BrowserWindow.tsx b/src/components/organisms/BrowserWindow.tsx index e67ff986..13ec0a91 100644 --- a/src/components/organisms/BrowserWindow.tsx +++ b/src/components/organisms/BrowserWindow.tsx @@ -76,26 +76,6 @@ export const BrowserWindow = () => { }; }, [socket, onMouseMove, handleClick]); - // Adjust selected elements' positions after scroll - useEffect(() => { - const handleScroll = () => { - if (canvasRef && canvasRef.current) { - const canvasRect = canvasRef.current.getBoundingClientRect(); - setSelectedElements(prev => prev.map(element => ({ - ...element, - rect: new DOMRect( - element.rect.x, - element.rect.y, - element.rect.width, - element.rect.height - ) - }))); - } - }; - - window.addEventListener('scroll', handleScroll); - return () => window.removeEventListener('scroll', handleScroll); - }, [canvasRef]); return ( <> From c455a42af9fb25f48ead3e8fcb309a0eed8a3099 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Tue, 25 Jun 2024 23:35:14 +0530 Subject: [PATCH 12/49] chore: remove unwanted code --- src/components/atoms/canvas.tsx | 1 - 1 file changed, 1 deletion(-) diff --git a/src/components/atoms/canvas.tsx b/src/components/atoms/canvas.tsx index 5152cf5e..50819d34 100644 --- a/src/components/atoms/canvas.tsx +++ b/src/components/atoms/canvas.tsx @@ -123,7 +123,6 @@ const Canvas = ({ width, height, onCreateRef }: CanvasProps) => { }, [onMouseEvent]); return ( - // Date: Mon, 1 Jul 2024 23:02:24 +0530 Subject: [PATCH 13/49] feat: get html --- server/src/browser-management/classes/RemoteBrowser.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/server/src/browser-management/classes/RemoteBrowser.ts b/server/src/browser-management/classes/RemoteBrowser.ts index f5527581..39b5f95e 100644 --- a/server/src/browser-management/classes/RemoteBrowser.ts +++ b/server/src/browser-management/classes/RemoteBrowser.ts @@ -174,12 +174,14 @@ export class RemoteBrowser { } }; - /** + /** * Makes and emits a single screenshot to the client side. * @returns {Promise} */ public makeAndEmitScreenshot = async (): Promise => { try { + const html = await this.currentPage?.content(); + await this.currentPage?.setContent(html || '') const screenshot = await this.currentPage?.screenshot(); if (screenshot) { this.emitScreenshot(screenshot.toString('base64')); @@ -188,7 +190,7 @@ export class RemoteBrowser { const { message } = e as Error; logger.log('error', message); } - }; + }; /** * Updates the active socket instance. From 3060882d02cf819cc9d54b397d57331024067d4f Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Mon, 1 Jul 2024 23:03:02 +0530 Subject: [PATCH 14/49] feat: revert to playwright screenshot --- server/src/browser-management/classes/RemoteBrowser.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/server/src/browser-management/classes/RemoteBrowser.ts b/server/src/browser-management/classes/RemoteBrowser.ts index 39b5f95e..d39ddf7b 100644 --- a/server/src/browser-management/classes/RemoteBrowser.ts +++ b/server/src/browser-management/classes/RemoteBrowser.ts @@ -180,8 +180,6 @@ export class RemoteBrowser { */ public makeAndEmitScreenshot = async (): Promise => { try { - const html = await this.currentPage?.content(); - await this.currentPage?.setContent(html || '') const screenshot = await this.currentPage?.screenshot(); if (screenshot) { this.emitScreenshot(screenshot.toString('base64')); From 7c2245260d51052e837d25854e9e5c272432a658 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 3 Jul 2024 23:15:06 +0530 Subject: [PATCH 15/49] fix: interpretation failure message --- server/src/routes/record.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/routes/record.ts b/server/src/routes/record.ts index da62bf8e..66e40352 100644 --- a/server/src/routes/record.ts +++ b/server/src/routes/record.ts @@ -99,7 +99,7 @@ router.get('/interpret', async (req, res) => { await interpretWholeWorkflow(); return res.send('interpretation done'); } catch (e) { - return res.send('interpretation done'); + return res.send('interpretation failed'); return res.status(400); } }); From a108fb2e41da28377044113bcb770c148aab8032 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 3 Jul 2024 23:57:18 +0530 Subject: [PATCH 16/49] fix: remove return statement --- server/src/routes/record.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/server/src/routes/record.ts b/server/src/routes/record.ts index 66e40352..0a20f66c 100644 --- a/server/src/routes/record.ts +++ b/server/src/routes/record.ts @@ -100,7 +100,6 @@ router.get('/interpret', async (req, res) => { return res.send('interpretation done'); } catch (e) { return res.send('interpretation failed'); - return res.status(400); } }); From 7ec8290405330c6e26207196795ec125a24ed44a Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sat, 6 Jul 2024 22:59:21 +0530 Subject: [PATCH 17/49] feat: generator --- server/src/workflow-management/classes/Generator.ts | 1 + 1 file changed, 1 insertion(+) diff --git a/server/src/workflow-management/classes/Generator.ts b/server/src/workflow-management/classes/Generator.ts index 2634635d..fa0a7cec 100644 --- a/server/src/workflow-management/classes/Generator.ts +++ b/server/src/workflow-management/classes/Generator.ts @@ -484,6 +484,7 @@ export class WorkflowGenerator { public generateDataForHighlighter = async (page: Page, coordinates: Coordinates) => { const rect = await getRect(page, coordinates); const displaySelector = await this.generateSelector(page, coordinates, ActionType.Click); + console.log('Backend Rectangle:', rect) if (rect) { this.socket.emit('highlighter', { rect, selector: displaySelector }); } From 53476dce53f151b035ab7c29128cf8ecfc09b8cd Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sat, 6 Jul 2024 23:02:17 +0530 Subject: [PATCH 18/49] chore: comment out br --- server/src/workflow-management/classes/Generator.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/workflow-management/classes/Generator.ts b/server/src/workflow-management/classes/Generator.ts index fa0a7cec..234d7014 100644 --- a/server/src/workflow-management/classes/Generator.ts +++ b/server/src/workflow-management/classes/Generator.ts @@ -484,7 +484,7 @@ export class WorkflowGenerator { public generateDataForHighlighter = async (page: Page, coordinates: Coordinates) => { const rect = await getRect(page, coordinates); const displaySelector = await this.generateSelector(page, coordinates, ActionType.Click); - console.log('Backend Rectangle:', rect) + //console.log('Backend Rectangle:', rect) if (rect) { this.socket.emit('highlighter', { rect, selector: displaySelector }); } From 885f5b120e441056db29adca7b093729ebad978d Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sun, 7 Jul 2024 17:25:48 +0530 Subject: [PATCH 19/49] chore(deps): install html2canvas-pro --- package.json | 1 + 1 file changed, 1 insertion(+) diff --git a/package.json b/package.json index 5a901005..3a30a35c 100644 --- a/package.json +++ b/package.json @@ -24,6 +24,7 @@ "dotenv": "^16.0.0", "express": "^4.17.2", "fortawesome": "^0.0.1-security", + "html2canvas-pro": "^1.5.3", "joi": "^17.6.0", "loglevel": "^1.8.0", "loglevel-plugin-remote": "^0.6.8", From 7a0d4f303cc594e412cda8789dc560858741eee3 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Tue, 9 Jul 2024 22:58:18 +0530 Subject: [PATCH 20/49] feat: get html content --- .../src/browser-management/classes/RemoteBrowser.ts | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/server/src/browser-management/classes/RemoteBrowser.ts b/server/src/browser-management/classes/RemoteBrowser.ts index d39ddf7b..01e71b35 100644 --- a/server/src/browser-management/classes/RemoteBrowser.ts +++ b/server/src/browser-management/classes/RemoteBrowser.ts @@ -143,7 +143,7 @@ export class RemoteBrowser { return; } this.client.on('Page.screencastFrame', ({ data: base64, sessionId }) => { - this.emitScreenshot(base64); + this.emitScreenshot(base64) setTimeout(async () => { try { if (!this.client) { @@ -174,21 +174,22 @@ export class RemoteBrowser { } }; - /** + /** * Makes and emits a single screenshot to the client side. * @returns {Promise} */ public makeAndEmitScreenshot = async (): Promise => { try { - const screenshot = await this.currentPage?.screenshot(); + const screenshot = await this.currentPage?.content(); if (screenshot) { - this.emitScreenshot(screenshot.toString('base64')); + const base64Html = Buffer.from(screenshot).toString('base64'); + this.emitScreenshot(base64Html); } } catch (e) { const { message } = e as Error; logger.log('error', message); } - }; + }; /** * Updates the active socket instance. @@ -337,7 +338,7 @@ export class RemoteBrowser { * @returns void */ private emitScreenshot = (payload: any): void => { - const dataWithMimeType = ('data:image/jpeg;base64,').concat(payload); + const dataWithMimeType = (',').concat(payload); this.socket.emit('screencast', dataWithMimeType); logger.log('debug', `Screenshot emitted`); }; From 70b34cba39f9bb2a258121bc598a9723d68ef5ef Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Tue, 9 Jul 2024 22:58:58 +0530 Subject: [PATCH 21/49] feat: html base64 --- server/src/browser-management/classes/RemoteBrowser.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/browser-management/classes/RemoteBrowser.ts b/server/src/browser-management/classes/RemoteBrowser.ts index 01e71b35..4d553883 100644 --- a/server/src/browser-management/classes/RemoteBrowser.ts +++ b/server/src/browser-management/classes/RemoteBrowser.ts @@ -338,7 +338,7 @@ export class RemoteBrowser { * @returns void */ private emitScreenshot = (payload: any): void => { - const dataWithMimeType = (',').concat(payload); + const dataWithMimeType = ('').concat(payload); this.socket.emit('screencast', dataWithMimeType); logger.log('debug', `Screenshot emitted`); }; From 81749ed07d223304843c5fbf6d4547a88191b4dd Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Tue, 9 Jul 2024 22:59:55 +0530 Subject: [PATCH 22/49] feat: set data:text/html;base64 mime type --- server/src/browser-management/classes/RemoteBrowser.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/browser-management/classes/RemoteBrowser.ts b/server/src/browser-management/classes/RemoteBrowser.ts index 4d553883..0dbf3ee1 100644 --- a/server/src/browser-management/classes/RemoteBrowser.ts +++ b/server/src/browser-management/classes/RemoteBrowser.ts @@ -338,7 +338,7 @@ export class RemoteBrowser { * @returns void */ private emitScreenshot = (payload: any): void => { - const dataWithMimeType = ('').concat(payload); + const dataWithMimeType = ('data:text/html;base64,').concat(payload); this.socket.emit('screencast', dataWithMimeType); logger.log('debug', `Screenshot emitted`); }; From 7e14323e1e2d4b9851b4e3ce2fd998830ed53826 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Tue, 9 Jul 2024 23:00:33 +0530 Subject: [PATCH 23/49] feat(exp): remove screencast options --- server/src/browser-management/classes/RemoteBrowser.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/browser-management/classes/RemoteBrowser.ts b/server/src/browser-management/classes/RemoteBrowser.ts index 0dbf3ee1..b41752bc 100644 --- a/server/src/browser-management/classes/RemoteBrowser.ts +++ b/server/src/browser-management/classes/RemoteBrowser.ts @@ -314,7 +314,7 @@ export class RemoteBrowser { logger.log('warn', 'client is not initialized'); return; } - await this.client.send('Page.startScreencast', { format: 'jpeg', quality: 75 }); + await this.client.send('Page.startScreencast'); logger.log('info', `Browser started with screencasting a page.`); }; From 2db73d956b2016f168654bb6b94d831792c2f1e5 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Thu, 11 Jul 2024 17:27:49 +0530 Subject: [PATCH 24/49] fix: revert to screenshot --- server/src/browser-management/classes/RemoteBrowser.ts | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/server/src/browser-management/classes/RemoteBrowser.ts b/server/src/browser-management/classes/RemoteBrowser.ts index b41752bc..c30a7fad 100644 --- a/server/src/browser-management/classes/RemoteBrowser.ts +++ b/server/src/browser-management/classes/RemoteBrowser.ts @@ -180,10 +180,9 @@ export class RemoteBrowser { */ public makeAndEmitScreenshot = async (): Promise => { try { - const screenshot = await this.currentPage?.content(); + const screenshot = await this.currentPage?.screenshot(); if (screenshot) { - const base64Html = Buffer.from(screenshot).toString('base64'); - this.emitScreenshot(base64Html); + this.emitScreenshot(screenshot.toString('base64')); } } catch (e) { const { message } = e as Error; From 465b87c7d7c36af858aa398ec68de14e9ce43451 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Thu, 11 Jul 2024 17:29:11 +0530 Subject: [PATCH 25/49] fix: revert to jpeg mimetype --- server/src/browser-management/classes/RemoteBrowser.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/browser-management/classes/RemoteBrowser.ts b/server/src/browser-management/classes/RemoteBrowser.ts index c30a7fad..b983963e 100644 --- a/server/src/browser-management/classes/RemoteBrowser.ts +++ b/server/src/browser-management/classes/RemoteBrowser.ts @@ -337,7 +337,7 @@ export class RemoteBrowser { * @returns void */ private emitScreenshot = (payload: any): void => { - const dataWithMimeType = ('data:text/html;base64,').concat(payload); + const dataWithMimeType = ('data:image/jpeg;base64,').concat(payload); this.socket.emit('screencast', dataWithMimeType); logger.log('debug', `Screenshot emitted`); }; From 0b1fd5dcb8f1471a189a74f48cfb607ec8ac773f Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sat, 13 Jul 2024 17:06:58 +0530 Subject: [PATCH 26/49] feat: screecast options --- server/src/browser-management/classes/RemoteBrowser.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/browser-management/classes/RemoteBrowser.ts b/server/src/browser-management/classes/RemoteBrowser.ts index b983963e..556f3436 100644 --- a/server/src/browser-management/classes/RemoteBrowser.ts +++ b/server/src/browser-management/classes/RemoteBrowser.ts @@ -313,7 +313,7 @@ export class RemoteBrowser { logger.log('warn', 'client is not initialized'); return; } - await this.client.send('Page.startScreencast'); + await this.client.send('Page.startScreencast', { format: 'jpeg', quality: 90 }); logger.log('info', `Browser started with screencasting a page.`); }; From 76d50194e8b3c0f96342c1e34734597f50a249f7 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sat, 13 Jul 2024 17:07:23 +0530 Subject: [PATCH 27/49] feat: screenshot options --- server/src/browser-management/classes/RemoteBrowser.ts | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/server/src/browser-management/classes/RemoteBrowser.ts b/server/src/browser-management/classes/RemoteBrowser.ts index 556f3436..d7376537 100644 --- a/server/src/browser-management/classes/RemoteBrowser.ts +++ b/server/src/browser-management/classes/RemoteBrowser.ts @@ -180,7 +180,9 @@ export class RemoteBrowser { */ public makeAndEmitScreenshot = async (): Promise => { try { - const screenshot = await this.currentPage?.screenshot(); + const screenshot = await this.currentPage?.screenshot( + { type: 'jpeg', quality: 90, fullPage: true } + ); if (screenshot) { this.emitScreenshot(screenshot.toString('base64')); } From dac1e3d0ac0469bd75cb5a325f76aaeaf128cec8 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sat, 13 Jul 2024 22:21:42 +0530 Subject: [PATCH 28/49] chore: remove unused code --- src/components/atoms/canvas.tsx | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/components/atoms/canvas.tsx b/src/components/atoms/canvas.tsx index 50819d34..9a879106 100644 --- a/src/components/atoms/canvas.tsx +++ b/src/components/atoms/canvas.tsx @@ -128,11 +128,10 @@ const Canvas = ({ width, height, onCreateRef }: CanvasProps) => { ref={canvasRef} height={720} width={1280} - style={{ width: '1280px', height: '720px' }} // Ensure dimensions are explicitly set /> ); }; -export default Canvas; +export default Canvas; \ No newline at end of file From eb33ce06429948d2a8dabc4f6624fd771181c1fb Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sat, 13 Jul 2024 22:39:31 +0530 Subject: [PATCH 29/49] feat: remove pair detail --- src/components/organisms/RightSidePanel.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/organisms/RightSidePanel.tsx b/src/components/organisms/RightSidePanel.tsx index 3ad8dff5..a63b6333 100644 --- a/src/components/organisms/RightSidePanel.tsx +++ b/src/components/organisms/RightSidePanel.tsx @@ -87,7 +87,7 @@ export const RightSidePanel = ({pairForEdit, changeBrowserDimensions}: RightSide } ) - : + : null } ); From e06443c57257f69e0f3d9bcae148243179be1e1d Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sat, 13 Jul 2024 22:41:15 +0530 Subject: [PATCH 30/49] feat: remove pair detail tav --- src/components/organisms/RightSidePanel.tsx | 1 - 1 file changed, 1 deletion(-) diff --git a/src/components/organisms/RightSidePanel.tsx b/src/components/organisms/RightSidePanel.tsx index a63b6333..1ade84c2 100644 --- a/src/components/organisms/RightSidePanel.tsx +++ b/src/components/organisms/RightSidePanel.tsx @@ -60,7 +60,6 @@ export const RightSidePanel = ({pairForEdit, changeBrowserDimensions}: RightSide - {content === 'action' ? ( From 70a2fdb9ae7f8b0debf7973f6adbe883a7ffddda Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sat, 13 Jul 2024 22:41:33 +0530 Subject: [PATCH 31/49] chore: remove unused import --- src/components/organisms/RightSidePanel.tsx | 1 - 1 file changed, 1 deletion(-) diff --git a/src/components/organisms/RightSidePanel.tsx b/src/components/organisms/RightSidePanel.tsx index 1ade84c2..7488dae1 100644 --- a/src/components/organisms/RightSidePanel.tsx +++ b/src/components/organisms/RightSidePanel.tsx @@ -7,7 +7,6 @@ import { SelectChangeEvent } from "@mui/material/Select/Select"; import { SimpleBox } from "../atoms/Box"; import Typography from "@mui/material/Typography"; import { useGlobalInfoStore } from "../../context/globalInfo"; -import { PairDetail } from "../molecules/PairDetail"; import { PairForEdit } from "../../pages/RecordingPage"; interface RightSidePanelProps { From 2b9adacc960124c9fdbdce1f89a5e90b3fc9eaed Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sat, 13 Jul 2024 22:41:57 +0530 Subject: [PATCH 32/49] feat: remove tabs --- src/components/organisms/RightSidePanel.tsx | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/components/organisms/RightSidePanel.tsx b/src/components/organisms/RightSidePanel.tsx index 7488dae1..2681849b 100644 --- a/src/components/organisms/RightSidePanel.tsx +++ b/src/components/organisms/RightSidePanel.tsx @@ -57,10 +57,6 @@ export const RightSidePanel = ({pairForEdit, changeBrowserDimensions}: RightSide - - - - {content === 'action' ? ( Type of action: From a0606f5766e269dad081211a8efbdd568589f59b Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sat, 13 Jul 2024 23:02:05 +0530 Subject: [PATCH 33/49] feat: rm resize browser --- src/components/organisms/RightSidePanel.tsx | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/components/organisms/RightSidePanel.tsx b/src/components/organisms/RightSidePanel.tsx index 2681849b..2769c0f1 100644 --- a/src/components/organisms/RightSidePanel.tsx +++ b/src/components/organisms/RightSidePanel.tsx @@ -11,10 +11,9 @@ import { PairForEdit } from "../../pages/RecordingPage"; interface RightSidePanelProps { pairForEdit: PairForEdit; - changeBrowserDimensions: () => void; } -export const RightSidePanel = ({pairForEdit, changeBrowserDimensions}: RightSidePanelProps) => { +export const RightSidePanel = ({pairForEdit}: RightSidePanelProps) => { const [content, setContent] = useState('action'); const [action, setAction] = React.useState(''); @@ -47,9 +46,6 @@ export const RightSidePanel = ({pairForEdit, changeBrowserDimensions}: RightSide backgroundColor: 'white', alignItems: "center", }}> - Last action: From 51f1755b14fea0553b84a3e318668ebb5198c810 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sat, 13 Jul 2024 23:03:05 +0530 Subject: [PATCH 34/49] fix: rm changeBrowserDimension prop --- src/pages/RecordingPage.tsx | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/src/pages/RecordingPage.tsx b/src/pages/RecordingPage.tsx index 7a2f03a8..b3bcedea 100644 --- a/src/pages/RecordingPage.tsx +++ b/src/pages/RecordingPage.tsx @@ -10,6 +10,7 @@ import { useBrowserDimensionsStore } from "../context/browserDimensions"; import { useGlobalInfoStore } from "../context/globalInfo"; import { editRecordingFromStorage } from "../api/storage"; import { WhereWhatPair } from "@wbr-project/wbr-interpret"; +import styled from "styled-components"; interface RecordingPageProps { recordingName?: string; @@ -106,22 +107,29 @@ export const RecordingPage = ({ recordingName }: RecordingPageProps) => {
{isLoaded ? - {/* + - */} + - {/* - - */} + + + : }
); }; + +const RecordingPageWrapper = styled.div` + position: relative; + width: 100vw; + height: 100vh; + overflow: hidden; +`; \ No newline at end of file From 7063cdf5aa522dc6eb5813f411aa6b6c0b0cde75 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sun, 14 Jul 2024 00:46:39 +0530 Subject: [PATCH 35/49] chore: remove unused code --- src/components/organisms/BrowserWindow.tsx | 34 +++------------------- 1 file changed, 4 insertions(+), 30 deletions(-) diff --git a/src/components/organisms/BrowserWindow.tsx b/src/components/organisms/BrowserWindow.tsx index 13ec0a91..c4c2e638 100644 --- a/src/components/organisms/BrowserWindow.tsx +++ b/src/components/organisms/BrowserWindow.tsx @@ -9,7 +9,6 @@ export const BrowserWindow = () => { const [canvasRef, setCanvasReference] = useState | undefined>(undefined); const [screenShot, setScreenShot] = useState(""); const [highlighterData, setHighlighterData] = useState<{ rect: DOMRect, selector: string } | null>(null); - const [selectedElements, setSelectedElements] = useState>([]); const { socket } = useSocketStore(); const { width, height } = useBrowserDimensionsStore(); @@ -56,52 +55,29 @@ export const BrowserWindow = () => { console.log('Highlighter Rect via socket:', data.rect) }, [highlighterData]) - const handleClick = useCallback(() => { - if (highlighterData) { - setSelectedElements(prev => [...prev, highlighterData]); - } - }, [highlighterData]); - useEffect(() => { document.addEventListener('mousemove', onMouseMove, false); - document.addEventListener('click', handleClick); if (socket) { socket.on("highlighter", highlighterHandler); } //cleaning function return () => { document.removeEventListener('mousemove', onMouseMove); - document.removeEventListener('click', handleClick); socket?.off("highlighter", highlighterHandler); }; - }, [socket, onMouseMove, handleClick]); - + }, [socket, onMouseMove]); return ( <> {(highlighterData?.rect != null && highlighterData?.rect.top != null) && canvasRef?.current ? - : null} - {selectedElements.map((element, index) => ( - canvasRef?.current ? - - : null - ))} { img.src = image; img.onload = () => { URL.revokeObjectURL(img.src); - //ctx?.clearRect(0, 0, canvas?.width || 0, VIEWPORT_H || 0); - // ctx?.drawImage(img, 0, 0, canvas.width , canvas.height); - ctx?.drawImage(img, 0, 0, 1280, 720); // Explicitly draw image at 1280 x 720 + ctx?.drawImage(img, 0, 0, 1280, 720); console.log('Image drawn on canvas:', img.width, img.height); console.log('Image drawn on canvas:', canvas.width, canvas.height); }; -}; +}; \ No newline at end of file From e8f09d1be016239aa5ef4e5aa4ff7aad5891044d Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sun, 14 Jul 2024 00:47:11 +0530 Subject: [PATCH 36/49] chore: lint --- src/components/organisms/BrowserWindow.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/organisms/BrowserWindow.tsx b/src/components/organisms/BrowserWindow.tsx index c4c2e638..d81ff967 100644 --- a/src/components/organisms/BrowserWindow.tsx +++ b/src/components/organisms/BrowserWindow.tsx @@ -70,7 +70,7 @@ export const BrowserWindow = () => { return ( <> {(highlighterData?.rect != null && highlighterData?.rect.top != null) && canvasRef?.current ? - < Highlighter + Date: Mon, 15 Jul 2024 19:29:17 +0530 Subject: [PATCH 37/49] feat: return innerText --- server/src/workflow-management/selector.ts | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/server/src/workflow-management/selector.ts b/server/src/workflow-management/selector.ts index 957cb3b9..06263950 100644 --- a/server/src/workflow-management/selector.ts +++ b/server/src/workflow-management/selector.ts @@ -74,18 +74,26 @@ export const getElementInformation = async ( tagName: element?.tagName ?? '', hasOnlyText: element?.children?.length === 0 && element?.innerText?.length > 0, - } + innerText: element?.innerText ?? '', // Add innerText to the returned object + }; } + return null; }, { x: coordinates.x, y: coordinates.y }, ); + + if (elementInfo) { + console.log(`Element : ${elementInfo.innerText}`); // Print innerText to the console + } + return elementInfo; } catch (error) { const { message, stack } = error as Error; - logger.log('error', `Error while retrieving selector: ${message}`); - logger.log('error', `Stack: ${stack}`); + console.error('Error while retrieving selector:', message); + console.error('Stack:', stack); } -} +}; + /** * Returns the best and unique css {@link Selectors} for the element on the page. From bcbe9a3062b6aacd1458957ca5ca6d592efe8a42 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Mon, 15 Jul 2024 20:53:55 +0530 Subject: [PATCH 38/49] feat: get link url & img url --- server/src/workflow-management/selector.ts | 40 ++++++++++++++++++---- 1 file changed, 33 insertions(+), 7 deletions(-) diff --git a/server/src/workflow-management/selector.ts b/server/src/workflow-management/selector.ts index 06263950..0290ee9f 100644 --- a/server/src/workflow-management/selector.ts +++ b/server/src/workflow-management/selector.ts @@ -68,14 +68,30 @@ export const getElementInformation = async ( const el = document.elementFromPoint(x, y) as HTMLElement; if (el) { const { parentElement } = el; - // Match the logic in recorder.ts for link clicks const element = parentElement?.tagName === 'A' ? parentElement : el; - return { + + let info: { + tagName: string; + hasOnlyText?: boolean; + innerText?: string; + url?: string; + imageUrl?: string; + } = { tagName: element?.tagName ?? '', - hasOnlyText: element?.children?.length === 0 && - element?.innerText?.length > 0, - innerText: element?.innerText ?? '', // Add innerText to the returned object }; + + if (element?.tagName === 'A') { + info.url = (element as HTMLAnchorElement).href; + info.innerText = element.innerText ?? ''; + } else if (element?.tagName === 'IMG') { + info.imageUrl = (element as HTMLImageElement).src; + } else { + info.hasOnlyText = element?.children?.length === 0 && + element?.innerText?.length > 0; + info.innerText = element?.innerText ?? ''; + } + + return info; } return null; }, @@ -83,9 +99,19 @@ export const getElementInformation = async ( ); if (elementInfo) { - console.log(`Element : ${elementInfo.innerText}`); // Print innerText to the console + if (elementInfo.tagName === 'A') { + if (elementInfo.innerText) { + console.log(`Link text: ${elementInfo.innerText}, URL: ${elementInfo.url}`); + } else { + console.log(`URL: ${elementInfo.url}`); + } + } else if (elementInfo.tagName === 'IMG') { + console.log(`Image URL: ${elementInfo.imageUrl}`); + } else { + console.log(`Element innerText: ${elementInfo.innerText}`); + } } - + return elementInfo; } catch (error) { const { message, stack } = error as Error; From de0b4f3efac60b350d3907d411381e7700ce1ce0 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Mon, 15 Jul 2024 22:26:22 +0530 Subject: [PATCH 39/49] feat: operators --- mx-interpreter/types/logic.ts | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 mx-interpreter/types/logic.ts diff --git a/mx-interpreter/types/logic.ts b/mx-interpreter/types/logic.ts new file mode 100644 index 00000000..5d06abbe --- /dev/null +++ b/mx-interpreter/types/logic.ts @@ -0,0 +1,5 @@ +export const unaryOperators = ['$not'] as const; +export const naryOperators = ['$and', '$or'] as const; + +export const operators = [...unaryOperators, ...naryOperators] as const; +export const meta = ['$before', '$after'] as const; \ No newline at end of file From dd7a7918c6ae4e4d7b98135dace7e33b33594661 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Mon, 15 Jul 2024 22:32:37 +0530 Subject: [PATCH 40/49] feat: types for workflow & actions --- mx-interpreter/types/workflow.ts | 58 ++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) create mode 100644 mx-interpreter/types/workflow.ts diff --git a/mx-interpreter/types/workflow.ts b/mx-interpreter/types/workflow.ts new file mode 100644 index 00000000..ac9fda22 --- /dev/null +++ b/mx-interpreter/types/workflow.ts @@ -0,0 +1,58 @@ +import { Page } from 'playwright'; +import { + naryOperators, unaryOperators, operators, meta, +} from './logic'; + +export type Operator = typeof operators[number]; +export type UnaryOperator = typeof unaryOperators[number]; +export type NAryOperator = typeof naryOperators[number]; + +export type Meta = typeof meta[number]; + +export type SelectorArray = string[]; + +type RegexableString = string | { '$regex':string }; + +type BaseConditions = { + 'url': RegexableString, + 'cookies': Record, + 'selectors': SelectorArray, // (CSS/Playwright) selectors use their own logic, there is no reason (and several technical difficulties) to allow regular expression notation +} & Record; + +export type Where = +Partial<{ [key in NAryOperator]: Where[] }> & // either a logic operator (arity N) +Partial<{ [key in UnaryOperator]: Where }> & // or an unary operator +Partial; // or one of the base conditions + +type MethodNames = { + [K in keyof T]: T[K] extends Function ? K : never; +}[keyof T]; + +export type CustomFunctions = 'scrape' | 'scrapeSchema' | 'scroll' | 'screenshot' | 'script' | 'enqueueLinks' | 'flag'; + +export type What = { + action: MethodNames | CustomFunctions, + args?: any[] +}; + +export type PageState = Partial; + +export type ParamType = Record; + +export type MetaData = { + name?: string, + desc?: string, +}; + +export interface WhereWhatPair { + id?: string + where: Where + what: What[] +} + +export type Workflow = WhereWhatPair[]; + +export type WorkflowFile = { + meta?: MetaData, + workflow: Workflow +}; \ No newline at end of file From 627536a2f22ce534fca7ddd156b2453a600bd9b7 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Mon, 15 Jul 2024 22:33:09 +0530 Subject: [PATCH 41/49] chore: lint --- mx-interpreter/types/workflow.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/mx-interpreter/types/workflow.ts b/mx-interpreter/types/workflow.ts index ac9fda22..36c6d14d 100644 --- a/mx-interpreter/types/workflow.ts +++ b/mx-interpreter/types/workflow.ts @@ -11,7 +11,7 @@ export type Meta = typeof meta[number]; export type SelectorArray = string[]; -type RegexableString = string | { '$regex':string }; +type RegexableString = string | { '$regex': string }; type BaseConditions = { 'url': RegexableString, @@ -20,9 +20,9 @@ type BaseConditions = { } & Record; export type Where = -Partial<{ [key in NAryOperator]: Where[] }> & // either a logic operator (arity N) -Partial<{ [key in UnaryOperator]: Where }> & // or an unary operator -Partial; // or one of the base conditions + Partial<{ [key in NAryOperator]: Where[] }> & // either a logic operator (arity N) + Partial<{ [key in UnaryOperator]: Where }> & // or an unary operator + Partial; // or one of the base conditions type MethodNames = { [K in keyof T]: T[K] extends Function ? K : never; From b26a55d52da071db50cd8c2d7a5a808a5b736fdc Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Tue, 16 Jul 2024 00:06:08 +0530 Subject: [PATCH 42/49] feat: get biggest element --- mx-interpreter/browserSide/scraper.js | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 mx-interpreter/browserSide/scraper.js diff --git a/mx-interpreter/browserSide/scraper.js b/mx-interpreter/browserSide/scraper.js new file mode 100644 index 00000000..c64c4808 --- /dev/null +++ b/mx-interpreter/browserSide/scraper.js @@ -0,0 +1,13 @@ +/* eslint-disable @typescript-eslint/no-unused-vars */ + +const area = (element) => element.offsetHeight * element.offsetWidth; + +function getBiggestElement(selector) { + const elements = Array.from(document.querySelectorAll(selector)); + const biggest = elements.reduce( + (max, elem) => ( + area(elem) > area(max) ? elem : max), + { offsetHeight: 0, offsetWidth: 0 }, + ); + return biggest; +} From c04fafd726d2189d19db84f4f26fafb8458f1d1c Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Tue, 16 Jul 2024 00:25:47 +0530 Subject: [PATCH 43/49] feat: scrapable heuristics --- mx-interpreter/browserSide/scraper.js | 40 +++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/mx-interpreter/browserSide/scraper.js b/mx-interpreter/browserSide/scraper.js index c64c4808..d9e5a0d9 100644 --- a/mx-interpreter/browserSide/scraper.js +++ b/mx-interpreter/browserSide/scraper.js @@ -11,3 +11,43 @@ function getBiggestElement(selector) { ); return biggest; } + +/** + * Generates structural selector (describing element by its DOM tree location). + * + * **The generated selector is not guaranteed to be unique!** (In fact, this is + * the desired behaviour in here.) + * @param {HTMLElement} element Element being described. + * @returns {string} CSS-compliant selector describing the element's location in the DOM tree. + */ +function GetSelectorStructural(element) { + // Base conditions for the recursive approach. + if (element.tagName === 'BODY') { + return 'BODY'; + } + const selector = element.tagName; + if (element.parentElement) { + return `${GetSelectorStructural(element.parentElement)} > ${selector}`; + } + + return selector; +} + +/** + * Heuristic method to find collections of "interesting" items on the page. + * @returns {Array} A collection of interesting DOM nodes + * (online store products, plane tickets, list items... and many more?) + */ +function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, metricType = 'size_deviation') { + const restoreScroll = (() => { + const { scrollX, scrollY } = window; + return () => { + window.scrollTo(scrollX, scrollY); + }; + })(); + + /** +* @typedef {Array<{x: number, y: number}>} Grid +*/ + + \ No newline at end of file From 2db0d0c52f8c4e96e659d488e44140bbf8a0b71f Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Tue, 16 Jul 2024 00:26:21 +0530 Subject: [PATCH 44/49] feat: get grid --- mx-interpreter/browserSide/scraper.js | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/mx-interpreter/browserSide/scraper.js b/mx-interpreter/browserSide/scraper.js index d9e5a0d9..719d8f59 100644 --- a/mx-interpreter/browserSide/scraper.js +++ b/mx-interpreter/browserSide/scraper.js @@ -50,4 +50,23 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, * @typedef {Array<{x: number, y: number}>} Grid */ - \ No newline at end of file + /** + * Returns an array of grid-aligned {x,y} points. + * @param {number} [granularity=0.005] sets the number of generated points + * (the higher the granularity, the more points). + * @returns {Grid} Array of {x, y} objects. + */ + function getGrid(startX = 0, startY = 0, granularity = 0.005) { + const width = window.innerWidth; + const height = window.innerHeight; + + const out = []; + for (let x = 0; x < width; x += 1 / granularity) { + for (let y = 0; y < height; y += 1 / granularity) { + out.push({ x: startX + x, y: startY + y }); + } + } + return out; + } + + \ No newline at end of file From 639b6aecbd8b62d3065099f407181a64871f6216 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Tue, 16 Jul 2024 00:27:01 +0530 Subject: [PATCH 45/49] feat: update maximum with point --- mx-interpreter/browserSide/scraper.js | 58 ++++++++++++++++++++++++++- 1 file changed, 57 insertions(+), 1 deletion(-) diff --git a/mx-interpreter/browserSide/scraper.js b/mx-interpreter/browserSide/scraper.js index 719d8f59..9cede7aa 100644 --- a/mx-interpreter/browserSide/scraper.js +++ b/mx-interpreter/browserSide/scraper.js @@ -69,4 +69,60 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, return out; } - \ No newline at end of file + let maxSelector = { selector: 'body', metric: 0 }; + + const updateMaximumWithPoint = (point) => { + const currentElement = document.elementFromPoint(point.x, point.y); + const selector = GetSelectorStructural(currentElement); + + const elements = Array.from(document.querySelectorAll(selector)) + .filter((element) => area(element) > minArea); + + // If the current selector targets less than three elements, + // we consider it not interesting (would be a very underwhelming scraper) + if (elements.length < 3) { + return; + } + + let metric = null; + + if (metricType === 'total_area') { + metric = elements + .reduce((p, x) => p + area(x), 0); + } else if (metricType === 'size_deviation') { + // This could use a proper "statistics" approach... but meh, so far so good! + const sizes = elements + .map((element) => area(element)); + + metric = (1 - (Math.max(...sizes) - Math.min(...sizes)) / Math.max(...sizes)); + } + + // console.debug(`Total ${metricType} is ${metric}.`) + if (metric > maxSelector.metric && elements.length < maxCountPerPage) { + maxSelector = { selector, metric }; + } + }; + + for (let scroll = 0; scroll < scrolls; scroll += 1) { + window.scrollTo(0, scroll * window.innerHeight); + + const grid = getGrid(); + + grid.forEach(updateMaximumWithPoint); + } + + restoreScroll(); + + let out = Array.from(document.querySelectorAll(maxSelector.selector)); + + const different = (x, i, a) => a.findIndex((e) => e === x) === i; + // as long as we don't merge any two elements by substituing them for their parents, + // we substitute. + while (out.map((x) => x.parentElement).every(different) + && out.forEach((x) => x.parentElement !== null)) { + out = out.map((x) => x.parentElement ?? x); + } + + return out; +} + From 2ad663e8693eca49d5479b436455b0e19534bf35 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Tue, 16 Jul 2024 00:27:43 +0530 Subject: [PATCH 46/49] feat: scrape result from current page --- mx-interpreter/browserSide/scraper.js | 48 +++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/mx-interpreter/browserSide/scraper.js b/mx-interpreter/browserSide/scraper.js index 9cede7aa..19267957 100644 --- a/mx-interpreter/browserSide/scraper.js +++ b/mx-interpreter/browserSide/scraper.js @@ -126,3 +126,51 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, return out; } +/** + * Returns a "scrape" result from the current page. + * @returns {Array} *Curated* array of scraped information (with sparse rows removed) + */ +function scrape(selector = null) { + /** + * **crudeRecords** contains uncurated rundowns of "scrapable" elements + * @type {Array} + */ + const crudeRecords = (selector + ? Array.from(document.querySelectorAll(selector)) + : scrapableHeuristics()) + .map((record) => ({ + ...Array.from(record.querySelectorAll('img')) + .reduce((p, x, i) => { + let url = null; + if (x.srcset) { + const urls = x.srcset.split(', '); + [url] = urls[urls.length - 1].split(' '); + } + + /** + * Contains the largest elements from `srcset` - if `srcset` is not present, contains + * URL from the `src` attribute + * + * If the `src` attribute contains a data url, imgUrl contains `undefined`. + */ + let imgUrl; + if (x.srcset) { + imgUrl = url; + } else if (x.src.indexOf('data:') === -1) { + imgUrl = x.src; + } + + return ({ + ...p, + ...(imgUrl ? { [`img_${i}`]: imgUrl } : {}), + }); + }, {}), + ...record.innerText.split('\n') + .reduce((p, x, i) => ({ + ...p, + [`record_${String(i).padStart(4, '0')}`]: x.trim(), + }), {}), + })); + + return crudeRecords; +} \ No newline at end of file From 37ab9dcfe9e5e9ff7a9e529d40360a00aa6816ea Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Tue, 16 Jul 2024 00:28:05 +0530 Subject: [PATCH 47/49] feat: scrape schema --- mx-interpreter/browserSide/scraper.js | 50 +++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/mx-interpreter/browserSide/scraper.js b/mx-interpreter/browserSide/scraper.js index 19267957..568bad06 100644 --- a/mx-interpreter/browserSide/scraper.js +++ b/mx-interpreter/browserSide/scraper.js @@ -173,4 +173,54 @@ function scrape(selector = null) { })); return crudeRecords; +} + +/** + * Given an object with named lists of elements, + * groups the elements by their distance in the DOM tree. + * @param {Object.} lists The named lists of HTML elements. + * @returns {Array.>} + */ +function scrapeSchema(lists) { + function omap(object, f, kf = (x) => x) { + return Object.fromEntries( + Object.entries(object) + .map(([k, v]) => [kf(k), f(v)]), + ); + } + + function ofilter(object, f) { + return Object.fromEntries( + Object.entries(object) + .filter(([k, v]) => f(k, v)), + ); + } + + function getSeedKey(listObj) { + const maxLength = Math.max(...Object.values(omap(listObj, (x) => x.length))); + return Object.keys(ofilter(listObj, (_, v) => v.length === maxLength))[0]; + } + + function getMBEs(elements) { + return elements.map((element) => { + let candidate = element; + const isUniqueChild = (e) => elements + .filter((elem) => e.parentNode?.contains(elem)) + .length === 1; + + while (candidate && isUniqueChild(candidate)) { + candidate = candidate.parentNode; + } + + return candidate; + }); + } + + const seedName = getSeedKey(lists); + const MBEs = getMBEs(lists[seedName]); + + return MBEs.map((mbe) => omap( + lists, + (listOfElements) => listOfElements.find((elem) => mbe.contains(elem))?.innerText, + )); } \ No newline at end of file From b70b1be6e95105d3971bcb10963323b912aabbe5 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Tue, 16 Jul 2024 00:28:20 +0530 Subject: [PATCH 48/49] chore: lint --- mx-interpreter/browserSide/scraper.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mx-interpreter/browserSide/scraper.js b/mx-interpreter/browserSide/scraper.js index 568bad06..c411f642 100644 --- a/mx-interpreter/browserSide/scraper.js +++ b/mx-interpreter/browserSide/scraper.js @@ -119,7 +119,7 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, // as long as we don't merge any two elements by substituing them for their parents, // we substitute. while (out.map((x) => x.parentElement).every(different) - && out.forEach((x) => x.parentElement !== null)) { + && out.forEach((x) => x.parentElement !== null)) { out = out.map((x) => x.parentElement ?? x); } From 234717b65e50f5da7c7296dc47119f01b7f55cfd Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Tue, 16 Jul 2024 20:06:42 +0530 Subject: [PATCH 49/49] chore: remove prev. rectangle --- src/components/atoms/Highlighter.tsx | 48 +++------------------------- 1 file changed, 5 insertions(+), 43 deletions(-) diff --git a/src/components/atoms/Highlighter.tsx b/src/components/atoms/Highlighter.tsx index ea1732d7..4949277d 100644 --- a/src/components/atoms/Highlighter.tsx +++ b/src/components/atoms/Highlighter.tsx @@ -8,29 +8,15 @@ interface HighlighterProps { width: number; height: number; canvasRect: DOMRect; - isSelected: boolean; }; -export const Highlighter = ({ unmodifiedRect, displayedSelector = '', width, height, canvasRect, isSelected }: HighlighterProps) => { +export const Highlighter = ({ unmodifiedRect, displayedSelector = '', width, height, canvasRect }: HighlighterProps) => { if (!unmodifiedRect) { return null; } else { - // const unshiftedRect = mapRect(unmodifiedRect, width, height); - // console.log('unshiftedRect', unshiftedRect) - // const rect = { - // bottom: unshiftedRect.bottom + canvasRect.top, - // top: unshiftedRect.top + canvasRect.top, - // left: unshiftedRect.left + canvasRect.left, - // right: unshiftedRect.right + canvasRect.left, - // x: unshiftedRect.x + canvasRect.left, - // y: unshiftedRect.y + canvasRect.top, - // width: unshiftedRect.width, - // height: unshiftedRect.height, - // } - const rect = { - top: unmodifiedRect.top + canvasRect.top, - left: unmodifiedRect.left + canvasRect.left, + top: unmodifiedRect.top + canvasRect.top + window.scrollY, + left: unmodifiedRect.left + canvasRect.left + window.scrollX, right: unmodifiedRect.right + canvasRect.left, bottom: unmodifiedRect.bottom + canvasRect.top, width: unmodifiedRect.width, @@ -42,26 +28,6 @@ export const Highlighter = ({ unmodifiedRect, displayedSelector = '', width, hei console.log('rectangle:', rect) console.log('canvas rectangle:', canvasRect) - // make the highlighting rectangle stay in browser window boundaries - // if (rect.bottom > canvasRect.bottom) { - // rect.height = height - unshiftedRect.top; - // } - - // if (rect.top < canvasRect.top) { - // rect.height = rect.height - (canvasRect.top - rect.top); - // rect.top = canvasRect.top; - // } - - // if (rect.right > canvasRect.right) { - // rect.width = width - unshiftedRect.left; - // } - - // if (rect.left < canvasRect.left) { - // rect.width = rect.width - (canvasRect.left - rect.left); - // rect.left = canvasRect.left; - // } - - return (
` position: fixed !important; background: #ff5d5b26 !important; outline: 4px solid pink !important; - // border: 4px solid #ff5d5b !important; + //border: 4px solid #ff5d5b !important; z-index: 2147483647 !important; - // border-radius: 5px; + //border-radius: 5px; top: ${(p: HighlighterOutlineProps) => p.top}px; left: ${(p: HighlighterOutlineProps) => p.left}px; width: ${(p: HighlighterOutlineProps) => p.width}px;