feat(core): extraction based on attribute

This commit is contained in:
karishmas6
2024-08-04 03:53:59 +05:30
parent 062ded7c01
commit e943a8c253

View File

@@ -229,12 +229,25 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
return MBEs.map((mbe) => omap( return MBEs.map((mbe) => omap(
lists, lists,
({ selector }, key) => { ({ selector, attribute }, key) => {
const elem = Array.from(document.querySelectorAll(selector)).find((elem) => mbe.contains(elem)); const elem = Array.from(document.querySelectorAll(selector)).find((elem) => mbe.contains(elem));
return elem ? elem.innerText : undefined; if (!elem) return undefined;
switch (attribute) {
case 'href':
return elem.getAttribute('href');
case 'src':
return elem.getAttribute('src');
case 'innerText':
return elem.innerText;
case 'textContent':
return elem.textContent;
default:
return elem.innerText;
}
}, },
(key) => key // Use the original key in the output (key) => key // Use the original key in the output
)); ));
} }
})(window); })(window);