From 5ed8e8ae427fc47e645c7b37dfb770df96f727dc Mon Sep 17 00:00:00 2001 From: amhsirak Date: Fri, 6 Dec 2024 03:20:30 +0530 Subject: [PATCH 1/3] fix: use window.location.origin instead of base url --- maxun-core/src/browserSide/scraper.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index 369a08be..467d0eb2 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -285,7 +285,7 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, } else if (attribute === 'src') { // Handle relative 'src' URLs const src = fieldElement.getAttribute('src'); - record[label] = src ? new URL(src, baseUrl).href : null; + record[label] = src ? new URL(src, window.location.origin).href : null; } else if (attribute === 'href') { // Handle relative 'href' URLs const href = fieldElement.getAttribute('href'); From 5985bc11f058c2e730b3daee180a6fa0ff710163 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Fri, 6 Dec 2024 03:20:48 +0530 Subject: [PATCH 2/3] fix: use window.location.origin instead of base url --- maxun-core/src/browserSide/scraper.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index 467d0eb2..d3410de4 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -289,7 +289,7 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, } else if (attribute === 'href') { // Handle relative 'href' URLs const href = fieldElement.getAttribute('href'); - record[label] = href ? new URL(href, baseUrl).href : null; + record[label] = href ? new URL(href, window.location.origin).href : null; } else { record[label] = fieldElement.getAttribute(attribute); } From 429ddaa5719634b84f481c2360702c1bc59aa296 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Fri, 6 Dec 2024 03:21:18 +0530 Subject: [PATCH 3/3] chore: lint --- maxun-core/src/browserSide/scraper.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index d3410de4..09b6578b 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -283,9 +283,9 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, } else if (attribute === 'innerHTML') { record[label] = fieldElement.innerHTML.trim(); } else if (attribute === 'src') { - // Handle relative 'src' URLs - const src = fieldElement.getAttribute('src'); - record[label] = src ? new URL(src, window.location.origin).href : null; + // Handle relative 'src' URLs + const src = fieldElement.getAttribute('src'); + record[label] = src ? new URL(src, window.location.origin).href : null; } else if (attribute === 'href') { // Handle relative 'href' URLs const href = fieldElement.getAttribute('href'); @@ -346,5 +346,5 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, return results; }; - + })(window); \ No newline at end of file