feat(core): extraction based on attribute

This commit is contained in:
karishmas6
2024-08-04 03:53:59 +05:30
parent 062ded7c01
commit e943a8c253

View File

@@ -229,12 +229,25 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
return MBEs.map((mbe) => omap(
lists,
({ selector }, key) => {
const elem = Array.from(document.querySelectorAll(selector)).find((elem) => mbe.contains(elem));
return elem ? elem.innerText : undefined;
({ selector, attribute }, key) => {
const elem = Array.from(document.querySelectorAll(selector)).find((elem) => mbe.contains(elem));
if (!elem) return undefined;
switch (attribute) {
case 'href':
return elem.getAttribute('href');
case 'src':
return elem.getAttribute('src');
case 'innerText':
return elem.innerText;
case 'textContent':
return elem.textContent;
default:
return elem.innerText;
}
},
(key) => key // Use the original key in the output
));
));
}
})(window);