Merge pull request #228 from getmaxun/list-url

fix: use `window.location.origin` as baseUrl for scrapeList
This commit is contained in:
Karishma Shukla
2024-12-09 21:14:43 +05:30
committed by GitHub

View File

@@ -283,13 +283,13 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
} else if (attribute === 'innerHTML') { } else if (attribute === 'innerHTML') {
record[label] = fieldElement.innerHTML.trim(); record[label] = fieldElement.innerHTML.trim();
} else if (attribute === 'src') { } else if (attribute === 'src') {
// Handle relative 'src' URLs // Handle relative 'src' URLs
const src = fieldElement.getAttribute('src'); const src = fieldElement.getAttribute('src');
record[label] = src ? new URL(src, baseUrl).href : null; record[label] = src ? new URL(src, window.location.origin).href : null;
} else if (attribute === 'href') { } else if (attribute === 'href') {
// Handle relative 'href' URLs // Handle relative 'href' URLs
const href = fieldElement.getAttribute('href'); const href = fieldElement.getAttribute('href');
record[label] = href ? new URL(href, baseUrl).href : null; record[label] = href ? new URL(href, window.location.origin).href : null;
} else { } else {
record[label] = fieldElement.getAttribute(attribute); record[label] = fieldElement.getAttribute(attribute);
} }
@@ -346,5 +346,5 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
return results; return results;
}; };
})(window); })(window);