chore: remove crawlee logic
This commit is contained in:
@@ -1,6 +1,4 @@
|
||||
const fastify = require('fastify')({
|
||||
logger: true
|
||||
});
|
||||
const fastify = require('fastify')();
|
||||
const scraper = require('./scraper');
|
||||
// Change this later
|
||||
const corsOptions = {
|
||||
|
||||
@@ -1,15 +0,0 @@
|
||||
// For more information, see https://crawlee.dev/
|
||||
import { PlaywrightCrawler, ProxyConfiguration } from 'crawlee';
|
||||
|
||||
import { router } from './routes.js';
|
||||
|
||||
const startUrls = ['https://crawlee.dev'];
|
||||
|
||||
const crawler = new PlaywrightCrawler({
|
||||
// proxyConfiguration: new ProxyConfiguration({ proxyUrls: ['...'] }),
|
||||
requestHandler: router,
|
||||
// Comment this option to scrape the full website.
|
||||
maxRequestsPerCrawl: 20,
|
||||
});
|
||||
|
||||
await crawler.run(startUrls);
|
||||
@@ -1,21 +0,0 @@
|
||||
import { createPlaywrightRouter } from 'crawlee';
|
||||
|
||||
export const router = createPlaywrightRouter();
|
||||
|
||||
router.addDefaultHandler(async ({ enqueueLinks, log }) => {
|
||||
log.info(`enqueueing new URLs`);
|
||||
await enqueueLinks({
|
||||
globs: ['https://crawlee.dev/**'],
|
||||
label: 'detail',
|
||||
});
|
||||
});
|
||||
|
||||
router.addHandler('detail', async ({ request, page, log, pushData }) => {
|
||||
const title = await page.title();
|
||||
log.info(`${title}`, { url: request.loadedUrl });
|
||||
|
||||
await pushData({
|
||||
url: request.loadedUrl,
|
||||
title,
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user