feat: handle relative src urls in scrape schema
This commit is contained in:
@@ -235,10 +235,11 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
|
|||||||
|
|
||||||
switch (attribute) {
|
switch (attribute) {
|
||||||
case 'href':
|
case 'href':
|
||||||
const relativeHref = elem.getAttribute('href'); // Get the href attribute
|
const relativeHref = elem.getAttribute('href');
|
||||||
return relativeHref ? new URL(relativeHref, window.location.origin).href : null; // Convert to full URL
|
return relativeHref ? new URL(relativeHref, window.location.origin).href : null;
|
||||||
case 'src':
|
case 'src':
|
||||||
return elem.getAttribute('src');
|
const relativeSrc = elem.getAttribute('src');
|
||||||
|
return relativeSrc ? new URL(relativeSrc, window.location.origin).href : null;
|
||||||
case 'innerText':
|
case 'innerText':
|
||||||
return elem.innerText;
|
return elem.innerText;
|
||||||
case 'textContent':
|
case 'textContent':
|
||||||
|
|||||||
Reference in New Issue
Block a user