feat: handle relative src urls in scrape schema

This commit is contained in:
amhsirak
2024-11-23 07:07:12 +05:30
parent 232f838902
commit 6d59a58e30

View File

@@ -235,10 +235,11 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
switch (attribute) { switch (attribute) {
case 'href': case 'href':
const relativeHref = elem.getAttribute('href'); // Get the href attribute const relativeHref = elem.getAttribute('href');
return relativeHref ? new URL(relativeHref, window.location.origin).href : null; // Convert to full URL return relativeHref ? new URL(relativeHref, window.location.origin).href : null;
case 'src': case 'src':
return elem.getAttribute('src'); const relativeSrc = elem.getAttribute('src');
return relativeSrc ? new URL(relativeSrc, window.location.origin).href : null;
case 'innerText': case 'innerText':
return elem.innerText; return elem.innerText;
case 'textContent': case 'textContent':