From 65e9cb10e9464020f137660549e0efc64f116bc7 Mon Sep 17 00:00:00 2001 From: LawyZheng Date: Thu, 14 Aug 2025 14:51:43 +0800 Subject: [PATCH] optimize scraping part 2 (#3185) --- skyvern/webeye/scraper/domUtils.js | 41 +++++++++++++++++++++++++++--- 1 file changed, 37 insertions(+), 4 deletions(-) diff --git a/skyvern/webeye/scraper/domUtils.js b/skyvern/webeye/scraper/domUtils.js index 566a199d..1d70b4c6 100644 --- a/skyvern/webeye/scraper/domUtils.js +++ b/skyvern/webeye/scraper/domUtils.js @@ -92,6 +92,7 @@ class Rect { } class DomUtils { + static visibleClientRectCache = new WeakMap(); // // Bounds the rect by the current viewport dimensions. If the rect is offscreen or has a height or // width < 3 then null is returned instead of a rect. @@ -113,7 +114,18 @@ class DomUtils { } } + // add cache to optimize performance static getVisibleClientRect(element, testChildren) { + // check cache + const cacheKey = `${testChildren}`; + if (DomUtils.visibleClientRectCache.has(element)) { + const elementCache = DomUtils.visibleClientRectCache.get(element); + if (elementCache.has(cacheKey)) { + _jsConsoleLog("hit cache to get the rect of element"); + return elementCache.get(cacheKey); + } + } + // Note: this call will be expensive if we modify the DOM in between calls. let clientRect; if (testChildren == null) testChildren = false; @@ -138,6 +150,8 @@ class DomUtils { return isInlineZeroFontSize; }; + let result = null; + for (clientRect of clientRects) { // If the link has zero dimensions, it may be wrapping visible but floated elements. Check for // this. @@ -172,8 +186,10 @@ class DomUtils { childClientRect.height < 3 ) continue; - return childClientRect; + result = childClientRect; + break; } + if (result) break; } else { clientRect = this.cropRectToVisible(clientRect); @@ -192,11 +208,23 @@ class DomUtils { if (computedStyle.getPropertyValue("visibility") !== "visible") continue; - return clientRect; + result = clientRect; + break; } } - return null; + // cache result + if (!DomUtils.visibleClientRectCache.has(element)) { + DomUtils.visibleClientRectCache.set(element, new Map()); + } + DomUtils.visibleClientRectCache.get(element).set(cacheKey, result); + + return result; + } + + // clear cache + static clearVisibleClientRectCache() { + DomUtils.visibleClientRectCache = new WeakMap(); } static getViewportTopLeft() { @@ -1453,7 +1481,6 @@ async function buildElementObject( text: getElementText(element), afterPseudoText: getPseudoContent(element, "::after"), children: [], - rect: DomUtils.getVisibleClientRect(element, true), // if purgeable is True, which means this element is only used for building the tree relationship purgeable: purgeable, // don't trim any attr of this element if keepAllAttr=True @@ -1763,9 +1790,15 @@ async function buildElementTree( function drawBoundingBoxes(elements) { // draw a red border around the elements + DomUtils.clearVisibleClientRectCache(); + elements.forEach((element) => { + const ele = getDOMElementBySkyvenElement(element); + element.rect = DomUtils.getVisibleClientRect(ele, true); + }); var groups = groupElementsVisually(elements); var hintMarkers = createHintMarkersForGroups(groups); addHintMarkersToPage(hintMarkers); + DomUtils.clearVisibleClientRectCache(); } async function buildElementsAndDrawBoundingBoxes(