feat: handle relative src urls in scrape list

This commit is contained in:
amhsirak
2024-11-23 07:06:15 +05:30
parent 94aa8bcf42
commit 232f838902

View File

@@ -282,7 +282,9 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
} else if (attribute === 'innerHTML') { } else if (attribute === 'innerHTML') {
record[label] = fieldElement.innerHTML.trim(); record[label] = fieldElement.innerHTML.trim();
} else if (attribute === 'src') { } else if (attribute === 'src') {
record[label] = fieldElement.src; // Handle relative 'src' URLs
const src = fieldElement.getAttribute('src');
record[label] = src ? new URL(src, baseUrl).href : null;
} else if (attribute === 'href') { } else if (attribute === 'href') {
// Handle relative 'href' URLs // Handle relative 'href' URLs
const href = fieldElement.getAttribute('href'); const href = fieldElement.getAttribute('href');