diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index 83d196fe..009cf9da 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -229,12 +229,25 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, return MBEs.map((mbe) => omap( lists, - ({ selector }, key) => { - const elem = Array.from(document.querySelectorAll(selector)).find((elem) => mbe.contains(elem)); - return elem ? elem.innerText : undefined; + ({ selector, attribute }, key) => { + const elem = Array.from(document.querySelectorAll(selector)).find((elem) => mbe.contains(elem)); + if (!elem) return undefined; + + switch (attribute) { + case 'href': + return elem.getAttribute('href'); + case 'src': + return elem.getAttribute('src'); + case 'innerText': + return elem.innerText; + case 'textContent': + return elem.textContent; + default: + return elem.innerText; + } }, (key) => key // Use the original key in the output - )); + )); } })(window); \ No newline at end of file