feat: check parent element url and extract

This commit is contained in:
Rohit
2025-04-23 17:21:14 +05:30
parent 0ca306fdc5
commit 540c9ec709

View File

@@ -541,8 +541,21 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
} else if (attribute === 'innerHTML') {
return element.innerHTML.trim();
} else if (attribute === 'src' || attribute === 'href') {
const attrValue = element.getAttribute(attribute);
if (attribute === 'href' && element.tagName !== 'A') {
const parentElement = element.parentElement;
if (parentElement && parentElement.tagName === 'A') {
const parentHref = parentElement.getAttribute('href');
if (parentHref) {
try {
return new URL(parentHref, baseURL).href;
} catch (e) {
return parentHref;
}
}
}
}
const attrValue = element.getAttribute(attribute);
const dataAttr = attrValue || element.getAttribute('data-' + attribute);
if (!dataAttr || dataAttr.trim() === '') {