From 94aa8bcf42d0266febb54516c12a98a562be47b3 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Sat, 23 Nov 2024 07:05:55 +0530 Subject: [PATCH] feat: handle relative href urls in scrape list --- maxun-core/src/browserSide/scraper.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index 7ee64dc1..e5e1c697 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -284,7 +284,9 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, } else if (attribute === 'src') { record[label] = fieldElement.src; } else if (attribute === 'href') { - record[label] = fieldElement.href; + // Handle relative 'href' URLs + const href = fieldElement.getAttribute('href'); + record[label] = href ? new URL(href, baseUrl).href : null; } else { record[label] = fieldElement.getAttribute(attribute); }