From 2c1a9a4cd0b272645f951129b73ee790dbb81a3d Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sun, 21 Apr 2024 19:51:35 +0530 Subject: [PATCH] feat: load website html --- scraper/src/load.js | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 scraper/src/load.js diff --git a/scraper/src/load.js b/scraper/src/load.js new file mode 100644 index 00000000..5b824ba3 --- /dev/null +++ b/scraper/src/load.js @@ -0,0 +1,19 @@ +import { PlaywrightCrawler, Configuration } from "crawlee"; + +async function loadWebsite(url) { + let htmlContent = ''; + const crawler = new PlaywrightCrawler({ + requestHandler: async ({ page }) => { + await page.goto(url); + htmlContent = await page.content(); + } + }, + new Configuration({ + persistStorage: false, + })); + + await crawler.run([url]); + return htmlContent; +} + +export default loadWebsite; \ No newline at end of file