feat: basic scraping (wip)
This commit is contained in:
29
scraper/src/scraper.js
Normal file
29
scraper/src/scraper.js
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
const { chromium } = require('playwright');
|
||||||
|
|
||||||
|
async function scraper(url, selectors) {
|
||||||
|
const browser = await chromium.launch();
|
||||||
|
const page = await browser.newPage();
|
||||||
|
|
||||||
|
try {
|
||||||
|
await page.goto(url);
|
||||||
|
// Handle any required interactions (logins, captchas, etc.)
|
||||||
|
|
||||||
|
const scrapedData = await page.evaluate((selectors) => {
|
||||||
|
const data = [];
|
||||||
|
for (const selector of selectors) {
|
||||||
|
const elements = Array.from(document.querySelectorAll(selector));
|
||||||
|
const elementData = elements.map((el) => el.textContent);
|
||||||
|
data.push(elementData);
|
||||||
|
}
|
||||||
|
return data;
|
||||||
|
}, selectors);
|
||||||
|
|
||||||
|
await browser.close();
|
||||||
|
return scrapedData;
|
||||||
|
} catch (error) {
|
||||||
|
await browser.close();
|
||||||
|
throw error;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
module.exports = scraper;
|
||||||
Reference in New Issue
Block a user