chore: remove crawlee logic

This commit is contained in:
karishmas6
2024-04-16 22:42:26 +05:30
parent ea3dd0185d
commit d92db8920a
3 changed files with 1 additions and 39 deletions

View File

@@ -1,6 +1,4 @@
const fastify = require('fastify')({
logger: true
});
const fastify = require('fastify')();
const scraper = require('./scraper');
// Change this later
const corsOptions = {

View File

@@ -1,15 +0,0 @@
// For more information, see https://crawlee.dev/
import { PlaywrightCrawler, ProxyConfiguration } from 'crawlee';
import { router } from './routes.js';
const startUrls = ['https://crawlee.dev'];
const crawler = new PlaywrightCrawler({
// proxyConfiguration: new ProxyConfiguration({ proxyUrls: ['...'] }),
requestHandler: router,
// Comment this option to scrape the full website.
maxRequestsPerCrawl: 20,
});
await crawler.run(startUrls);

View File

@@ -1,21 +0,0 @@
import { createPlaywrightRouter } from 'crawlee';
export const router = createPlaywrightRouter();
router.addDefaultHandler(async ({ enqueueLinks, log }) => {
log.info(`enqueueing new URLs`);
await enqueueLinks({
globs: ['https://crawlee.dev/**'],
label: 'detail',
});
});
router.addHandler('detail', async ({ request, page, log, pushData }) => {
const title = await page.title();
log.info(`${title}`, { url: request.loadedUrl });
await pushData({
url: request.loadedUrl,
title,
});
});