From 75d55f398c11831fd0f5d90d18c6824761b3192e Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Thu, 18 Apr 2024 22:19:40 +0530 Subject: [PATCH] chore: move cli scraper --- scraper/src/scraper.js | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/scraper/src/scraper.js b/scraper/src/scraper.js index e69de29b..cdfd0291 100644 --- a/scraper/src/scraper.js +++ b/scraper/src/scraper.js @@ -0,0 +1,32 @@ +async function scrapeData(url, selectors, waitForSeconds = 2) { + const crawler = new PlaywrightCrawler({ + requestHandler: async ({ page }) => { + await page.goto(url); + + // Wait for specific time (optional) + await page.waitForTimeout(waitForSeconds * 1000); + + console.log('Received selectors:', selectors); + + const scrapedData = []; + for (const selector of selectors) { + const elementData = await page.$$eval(selector, elements => elements.map(el => el.textContent.trim())); + scrapedData.push(...elementData); + } + + console.log('Scraped data:', scrapedData); // Replace with desired saving method + }, + }); + + await crawler.run([{ url }]); +} + +const url = process.argv[2]; +const selectors = process.argv.slice(3); // Selectors are passed as subsequent arguments +const waitForSeconds = parseInt(process.argv[selectors.length + 2] || 2); // Optional wait time + +if (url && selectors.length > 0) { + await scrapeData(url, selectors, waitForSeconds); +} else { + console.error('Please provide URL and selectors as arguments.'); +} \ No newline at end of file