2024-04-17 00:39:12 +05:30
|
|
|
import Fastify from 'fastify'
|
|
|
|
|
import cors from '@fastify/cors'
|
2024-04-16 23:52:11 +05:30
|
|
|
import fetch from 'node-fetch';
|
2024-04-18 21:38:41 +05:30
|
|
|
import playwright from 'playwright'
|
2024-04-18 22:17:09 +05:30
|
|
|
import { PlaywrightCrawler } from 'crawlee';
|
2024-04-16 23:48:32 +05:30
|
|
|
|
2024-04-17 00:39:12 +05:30
|
|
|
const fastify = Fastify();
|
2024-04-16 23:48:32 +05:30
|
|
|
|
2024-04-12 12:24:41 +05:30
|
|
|
// Change this later
|
|
|
|
|
const corsOptions = {
|
|
|
|
|
origin: 'http://localhost:5173'
|
|
|
|
|
}
|
2024-04-17 00:39:12 +05:30
|
|
|
|
2024-04-17 00:42:03 +05:30
|
|
|
await fastify.register(cors, corsOptions)
|
2024-04-12 12:24:41 +05:30
|
|
|
|
2024-04-18 21:38:41 +05:30
|
|
|
fastify.get('/', async (request, reply) => {
|
|
|
|
|
reply.send('Welcome to the Playwright Scraper API');
|
|
|
|
|
});
|
|
|
|
|
|
2024-04-17 00:40:37 +05:30
|
|
|
await fastify.listen(3000, (err, address) => {
|
2024-04-12 12:12:55 +05:30
|
|
|
if (err) throw err;
|
2024-04-12 12:24:41 +05:30
|
|
|
console.log(`Server listening on ${fastify.server.address().port}`)
|
2024-04-18 22:17:09 +05:30
|
|
|
});
|
|
|
|
|
|
|
|
|
|
async function scrapeData(url, selectors, waitForSeconds = 2) {
|
|
|
|
|
const crawler = new PlaywrightCrawler({
|
|
|
|
|
requestHandler: async ({ page }) => {
|
|
|
|
|
await page.goto(url);
|
|
|
|
|
|
|
|
|
|
// Wait for specific time (optional)
|
|
|
|
|
await page.waitForTimeout(waitForSeconds * 1000);
|
|
|
|
|
|
|
|
|
|
console.log('Received selectors:', selectors);
|
|
|
|
|
|
|
|
|
|
const scrapedData = [];
|
|
|
|
|
for (const selector of selectors) {
|
|
|
|
|
const elementData = await page.$$eval(selector, elements => elements.map(el => el.textContent.trim()));
|
|
|
|
|
scrapedData.push(...elementData);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
console.log('Scraped data:', scrapedData); // Replace with desired saving method
|
|
|
|
|
},
|
|
|
|
|
});
|
|
|
|
|
|
|
|
|
|
await crawler.run([{ url }]);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
const url = process.argv[2];
|
|
|
|
|
const selectors = process.argv.slice(3); // Selectors are passed as subsequent arguments
|
|
|
|
|
const waitForSeconds = parseInt(process.argv[selectors.length + 2] || 2); // Optional wait time
|
|
|
|
|
|
|
|
|
|
if (url && selectors.length > 0) {
|
|
|
|
|
await scrapeData(url, selectors, waitForSeconds);
|
|
|
|
|
} else {
|
|
|
|
|
console.error('Please provide URL and selectors as arguments.');
|
|
|
|
|
}
|