diff --git a/scraper/src/scraper.js b/scraper/src/scraper.js index 24bd2941..903645d6 100644 --- a/scraper/src/scraper.js +++ b/scraper/src/scraper.js @@ -1,31 +1,35 @@ -import playwright from 'playwright'; +import playwright from 'playwright' -async function scraper(url, selectors) { -const browser = await playwright.chromium.launch({ - headless: false // setting this to true will not run the UI -}); - const page = await browser.newPage(); - - try { - await page.goto(url); - // Handle any required interactions (logins, captchas, etc.) - - const scrapedData = await page.evaluate((selectors) => { - const data = []; - for (const selector of selectors) { - const elements = Array.from(document.querySelectorAll(selector)); - const elementData = elements.map((el) => el.textContent); - data.push(elementData); +fastify.post('/scrape', async (request, reply) => { + const { url, selectors } = request.body; + + try { + const browser = await playwright.chromium.launch({ headless: true }); // Launch headless browser + const page = await browser.newPage(); + + try { + await page.goto(url); + // Handle any required interactions (logins, captchas, etc.) + + const scrapedData = await page.evaluate((selectors) => { + const data = []; + for (const selector of selectors) { + const elements = Array.from(document.querySelectorAll(selector)); + const elementData = elements.map((el) => el.textContent); + data.push(elementData); + } + return data; + }, selectors); + + await browser.close(); + reply.send(scrapedData); + } catch (error) { + console.error('Error scraping:', error); + await browser.close(); + reply.status(500).send({ error: 'Failed to scrape data' }); } - return data; - }, selectors); - - await browser.close(); - return scrapedData; - } catch (error) { - await browser.close(); - throw error; - } -} - -export default scraper; \ No newline at end of file + } catch (error) { + console.error('Error launching browser:', error); + reply.status(500).send({ error: 'Failed to initiate scraping' }); + } + }); \ No newline at end of file