From 202ecf977b95ca9bfd3f3146ad5b20f030c43ace Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 7 Aug 2024 18:19:48 +0530 Subject: [PATCH] chore: lint --- maxun-core/src/browserSide/scraper.js | 98 +++++++++++++-------------- 1 file changed, 49 insertions(+), 49 deletions(-) diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index 5de56ae6..8528f529 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -260,57 +260,57 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, * @param {boolean} [config.flexible=false] - Whether to use flexible matching for field selectors * @returns {Array.>} Array of arrays of scraped items, one sub-array per list */ -window.scrapeList = function(config) { - const { listSelector, fields, limit, flexible = false } = config; - - const lists = Array.from(document.querySelectorAll(listSelector)); - - return lists.map(list => { - const listItems = Array.from(list.children); - - const itemsToScrape = limit ? listItems.slice(0, limit) : listItems; - - // scrape each item - return itemsToScrape.map(item => { - const scrapedItem = {}; - - for (const [fieldName, fieldConfig] of Object.entries(fields)) { - let element; - - if (flexible) { - // try multiple strategies to find the element - element = item.querySelector(fieldConfig.selector) || - item.querySelector(`[class*="${fieldConfig.selector}"]`) || - Array.from(item.querySelectorAll('*')) - .find(el => el.textContent.trim() === fieldConfig.selector); - } else { - element = item.querySelector(fieldConfig.selector); - } - - if (element) { - switch (fieldConfig.attribute) { - case 'href': - scrapedItem[fieldName] = element.getAttribute('href'); - break; - case 'src': - scrapedItem[fieldName] = element.getAttribute('src'); - break; - case 'textContent': - scrapedItem[fieldName] = element.textContent.trim(); - break; - case 'innerText': - default: - scrapedItem[fieldName] = element.innerText.trim(); - break; + window.scrapeList = function (config) { + const { listSelector, fields, limit, flexible = false } = config; + + const lists = Array.from(document.querySelectorAll(listSelector)); + + return lists.map(list => { + const listItems = Array.from(list.children); + + const itemsToScrape = limit ? listItems.slice(0, limit) : listItems; + + // scrape each item + return itemsToScrape.map(item => { + const scrapedItem = {}; + + for (const [fieldName, fieldConfig] of Object.entries(fields)) { + let element; + + if (flexible) { + // try multiple strategies to find the element + element = item.querySelector(fieldConfig.selector) || + item.querySelector(`[class*="${fieldConfig.selector}"]`) || + Array.from(item.querySelectorAll('*')) + .find(el => el.textContent.trim() === fieldConfig.selector); + } else { + element = item.querySelector(fieldConfig.selector); + } + + if (element) { + switch (fieldConfig.attribute) { + case 'href': + scrapedItem[fieldName] = element.getAttribute('href'); + break; + case 'src': + scrapedItem[fieldName] = element.getAttribute('src'); + break; + case 'textContent': + scrapedItem[fieldName] = element.textContent.trim(); + break; + case 'innerText': + default: + scrapedItem[fieldName] = element.innerText.trim(); + break; + } + } else { + scrapedItem[fieldName] = null; } - } else { - scrapedItem[fieldName] = null; } - } - - return scrapedItem; + + return scrapedItem; + }); }); - }); -}; + }; })(window); \ No newline at end of file