From d92db8920abbbe46811fb0a61101037c52497362 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Tue, 16 Apr 2024 22:42:26 +0530 Subject: [PATCH] chore: remove crawlee logic --- scraper/src/main.js | 4 +--- scraper/src/main.ts | 15 --------------- scraper/src/routes.ts | 21 --------------------- 3 files changed, 1 insertion(+), 39 deletions(-) delete mode 100644 scraper/src/main.ts delete mode 100644 scraper/src/routes.ts diff --git a/scraper/src/main.js b/scraper/src/main.js index cc1a2f69..6b2df41a 100644 --- a/scraper/src/main.js +++ b/scraper/src/main.js @@ -1,6 +1,4 @@ -const fastify = require('fastify')({ - logger: true -}); +const fastify = require('fastify')(); const scraper = require('./scraper'); // Change this later const corsOptions = { diff --git a/scraper/src/main.ts b/scraper/src/main.ts deleted file mode 100644 index 53ecb8a7..00000000 --- a/scraper/src/main.ts +++ /dev/null @@ -1,15 +0,0 @@ -// For more information, see https://crawlee.dev/ -import { PlaywrightCrawler, ProxyConfiguration } from 'crawlee'; - -import { router } from './routes.js'; - -const startUrls = ['https://crawlee.dev']; - -const crawler = new PlaywrightCrawler({ - // proxyConfiguration: new ProxyConfiguration({ proxyUrls: ['...'] }), - requestHandler: router, - // Comment this option to scrape the full website. - maxRequestsPerCrawl: 20, -}); - -await crawler.run(startUrls); diff --git a/scraper/src/routes.ts b/scraper/src/routes.ts deleted file mode 100644 index e2bea3cd..00000000 --- a/scraper/src/routes.ts +++ /dev/null @@ -1,21 +0,0 @@ -import { createPlaywrightRouter } from 'crawlee'; - -export const router = createPlaywrightRouter(); - -router.addDefaultHandler(async ({ enqueueLinks, log }) => { - log.info(`enqueueing new URLs`); - await enqueueLinks({ - globs: ['https://crawlee.dev/**'], - label: 'detail', - }); -}); - -router.addHandler('detail', async ({ request, page, log, pushData }) => { - const title = await page.title(); - log.info(`${title}`, { url: request.loadedUrl }); - - await pushData({ - url: request.loadedUrl, - title, - }); -});