From 32ec5b6f4508460db1b38a05f3f33ff1d7070ef8 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Thu, 18 Apr 2024 22:19:24 +0530 Subject: [PATCH] chore: move cli scraper --- scraper/src/main.js | 35 +---------------------------------- 1 file changed, 1 insertion(+), 34 deletions(-) diff --git a/scraper/src/main.js b/scraper/src/main.js index e78d88ed..ba598a7e 100644 --- a/scraper/src/main.js +++ b/scraper/src/main.js @@ -20,37 +20,4 @@ fastify.get('/', async (request, reply) => { await fastify.listen(3000, (err, address) => { if (err) throw err; console.log(`Server listening on ${fastify.server.address().port}`) -}); - -async function scrapeData(url, selectors, waitForSeconds = 2) { - const crawler = new PlaywrightCrawler({ - requestHandler: async ({ page }) => { - await page.goto(url); - - // Wait for specific time (optional) - await page.waitForTimeout(waitForSeconds * 1000); - - console.log('Received selectors:', selectors); - - const scrapedData = []; - for (const selector of selectors) { - const elementData = await page.$$eval(selector, elements => elements.map(el => el.textContent.trim())); - scrapedData.push(...elementData); - } - - console.log('Scraped data:', scrapedData); // Replace with desired saving method - }, - }); - - await crawler.run([{ url }]); -} - -const url = process.argv[2]; -const selectors = process.argv.slice(3); // Selectors are passed as subsequent arguments -const waitForSeconds = parseInt(process.argv[selectors.length + 2] || 2); // Optional wait time - -if (url && selectors.length > 0) { - await scrapeData(url, selectors, waitForSeconds); -} else { - console.error('Please provide URL and selectors as arguments.'); -} \ No newline at end of file +}); \ No newline at end of file