From a7c7d538891b1ddb2ce8834bba5cfc558697190d Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Tue, 16 Apr 2024 23:49:35 +0530 Subject: [PATCH] feat: basic scraping (wip) --- scraper/src/scraper.js | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 scraper/src/scraper.js diff --git a/scraper/src/scraper.js b/scraper/src/scraper.js new file mode 100644 index 00000000..f33a39f3 --- /dev/null +++ b/scraper/src/scraper.js @@ -0,0 +1,29 @@ +const { chromium } = require('playwright'); + +async function scraper(url, selectors) { + const browser = await chromium.launch(); + const page = await browser.newPage(); + + try { + await page.goto(url); + // Handle any required interactions (logins, captchas, etc.) + + const scrapedData = await page.evaluate((selectors) => { + const data = []; + for (const selector of selectors) { + const elements = Array.from(document.querySelectorAll(selector)); + const elementData = elements.map((el) => el.textContent); + data.push(elementData); + } + return data; + }, selectors); + + await browser.close(); + return scrapedData; + } catch (error) { + await browser.close(); + throw error; + } +} + +module.exports = scraper; \ No newline at end of file