Files
parcer/scraper/src/scraper.js

25 lines
791 B
JavaScript
Raw Normal View History

2024-04-20 00:47:50 +05:30
import { PlaywrightCrawler, Configuration } from 'crawlee';
2024-04-18 22:20:06 +05:30
2024-04-20 00:51:10 +05:30
async function scrapeData(url, selectors) {
const scrapedData = [];
2024-04-18 22:19:40 +05:30
const crawler = new PlaywrightCrawler({
2024-04-20 00:47:50 +05:30
requestHandler: async ({ page, request }) => {
2024-04-18 22:19:40 +05:30
await page.goto(url);
console.log('Received selectors:', selectors);
for (const selector of selectors) {
const elementData = await page.$$eval(selector, elements => elements.map(el => el.textContent.trim()));
scrapedData.push(...elementData);
}
2024-04-19 01:01:16 +05:30
console.log('Scraped data:', scrapedData);
2024-04-18 22:19:40 +05:30
},
2024-04-20 00:47:50 +05:30
},
2024-04-20 00:51:38 +05:30
new Configuration({
persistStorage: false,
}));
2024-04-20 00:47:50 +05:30
2024-04-20 00:51:38 +05:30
await crawler.run([url]);
return scrapedData;
2024-04-20 00:51:38 +05:30
2024-04-19 02:23:46 +05:30
}
export default scrapeData;