From e34ab021b0b248456cc18e5987a90588481f84f4 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Mon, 27 May 2024 20:56:07 +0530 Subject: [PATCH] chore(api): remove old code --- scraper/src/load.js | 19 ------------------- scraper/src/main.js | 43 ------------------------------------------ scraper/src/scraper.js | 25 ------------------------ scraper/tsconfig.json | 28 +++++++++++++-------------- 4 files changed, 14 insertions(+), 101 deletions(-) delete mode 100644 scraper/src/load.js delete mode 100644 scraper/src/main.js delete mode 100644 scraper/src/scraper.js diff --git a/scraper/src/load.js b/scraper/src/load.js deleted file mode 100644 index 5b824ba3..00000000 --- a/scraper/src/load.js +++ /dev/null @@ -1,19 +0,0 @@ -import { PlaywrightCrawler, Configuration } from "crawlee"; - -async function loadWebsite(url) { - let htmlContent = ''; - const crawler = new PlaywrightCrawler({ - requestHandler: async ({ page }) => { - await page.goto(url); - htmlContent = await page.content(); - } - }, - new Configuration({ - persistStorage: false, - })); - - await crawler.run([url]); - return htmlContent; -} - -export default loadWebsite; \ No newline at end of file diff --git a/scraper/src/main.js b/scraper/src/main.js deleted file mode 100644 index 29276f53..00000000 --- a/scraper/src/main.js +++ /dev/null @@ -1,43 +0,0 @@ -import Fastify from 'fastify' -import cors from '@fastify/cors' -import scrapeData from './scraper'; -import loadWebsite from './load'; - -const fastify = Fastify(); - -// Change this later -const corsOptions = { - origin: 'http://localhost:5173' -} - -await fastify.register(cors, corsOptions) - -fastify.get('/', async (request, reply) => { - reply.send('Vroom Vroom Vroom'); -}); - -fastify.post('/load-website', async (request, reply) => { - const { url } = request.body; - try { - const response = await loadWebsite(url); - reply.send(response); - console.log('Response is::', response) - } catch (error) { - reply.status(500).send({ error: error }); - } -}); - -fastify.post('/scrape', async (request, reply) => { - const { url, selectors } = request.body; - try { - const response = await scrapeData(url, selectors); - reply.send(response); - } catch (error) { - reply.status(500).send({ error: error.message }); - } -}); - -await fastify.listen(3000, (err, address) => { - if (err) throw err; - console.log(`Server listening on ${fastify.server.address().port}`) -}); \ No newline at end of file diff --git a/scraper/src/scraper.js b/scraper/src/scraper.js deleted file mode 100644 index 40c976a7..00000000 --- a/scraper/src/scraper.js +++ /dev/null @@ -1,25 +0,0 @@ -import { PlaywrightCrawler, Configuration } from 'crawlee'; - -async function scrapeData(url, selectors) { - const scrapedData = []; - const crawler = new PlaywrightCrawler({ - requestHandler: async ({ page, request }) => { - await page.goto(url); - console.log('Received selectors:', selectors); - for (const selector of selectors) { - const elementData = await page.$$eval(selector, elements => elements.map(el => el.textContent.trim())); - scrapedData.push(...elementData); - } - console.log('Scraped data:', scrapedData); - }, - }, - new Configuration({ - persistStorage: false, - })); - - await crawler.run([url]); - return scrapedData; - -} - -export default scrapeData; \ No newline at end of file diff --git a/scraper/tsconfig.json b/scraper/tsconfig.json index 5b76f5cd..120529f9 100644 --- a/scraper/tsconfig.json +++ b/scraper/tsconfig.json @@ -1,14 +1,14 @@ -{ - "extends": "@apify/tsconfig", - "compilerOptions": { - "module": "NodeNext", - "moduleResolution": "NodeNext", - "target": "ES2022", - "outDir": "dist", - "noUnusedLocals": false, - "lib": ["DOM"] - }, - "include": [ - "./src/**/*" - ] -} +// { +// "extends": "@apify/tsconfig", +// "compilerOptions": { +// "module": "NodeNext", +// "moduleResolution": "NodeNext", +// "target": "ES2022", +// "outDir": "dist", +// "noUnusedLocals": false, +// "lib": ["DOM"] +// }, +// "include": [ +// "./src/**/*" +// ] +// }