chore: remove crawlee logic
This commit is contained in:
@@ -1,6 +1,4 @@
|
|||||||
const fastify = require('fastify')({
|
const fastify = require('fastify')();
|
||||||
logger: true
|
|
||||||
});
|
|
||||||
const scraper = require('./scraper');
|
const scraper = require('./scraper');
|
||||||
// Change this later
|
// Change this later
|
||||||
const corsOptions = {
|
const corsOptions = {
|
||||||
|
|||||||
@@ -1,15 +0,0 @@
|
|||||||
// For more information, see https://crawlee.dev/
|
|
||||||
import { PlaywrightCrawler, ProxyConfiguration } from 'crawlee';
|
|
||||||
|
|
||||||
import { router } from './routes.js';
|
|
||||||
|
|
||||||
const startUrls = ['https://crawlee.dev'];
|
|
||||||
|
|
||||||
const crawler = new PlaywrightCrawler({
|
|
||||||
// proxyConfiguration: new ProxyConfiguration({ proxyUrls: ['...'] }),
|
|
||||||
requestHandler: router,
|
|
||||||
// Comment this option to scrape the full website.
|
|
||||||
maxRequestsPerCrawl: 20,
|
|
||||||
});
|
|
||||||
|
|
||||||
await crawler.run(startUrls);
|
|
||||||
@@ -1,21 +0,0 @@
|
|||||||
import { createPlaywrightRouter } from 'crawlee';
|
|
||||||
|
|
||||||
export const router = createPlaywrightRouter();
|
|
||||||
|
|
||||||
router.addDefaultHandler(async ({ enqueueLinks, log }) => {
|
|
||||||
log.info(`enqueueing new URLs`);
|
|
||||||
await enqueueLinks({
|
|
||||||
globs: ['https://crawlee.dev/**'],
|
|
||||||
label: 'detail',
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
router.addHandler('detail', async ({ request, page, log, pushData }) => {
|
|
||||||
const title = await page.title();
|
|
||||||
log.info(`${title}`, { url: request.loadedUrl });
|
|
||||||
|
|
||||||
await pushData({
|
|
||||||
url: request.loadedUrl,
|
|
||||||
title,
|
|
||||||
});
|
|
||||||
});
|
|
||||||
Reference in New Issue
Block a user