feat: load website html
This commit is contained in:
19
scraper/src/load.js
Normal file
19
scraper/src/load.js
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
import { PlaywrightCrawler, Configuration } from "crawlee";
|
||||||
|
|
||||||
|
async function loadWebsite(url) {
|
||||||
|
let htmlContent = '';
|
||||||
|
const crawler = new PlaywrightCrawler({
|
||||||
|
requestHandler: async ({ page }) => {
|
||||||
|
await page.goto(url);
|
||||||
|
htmlContent = await page.content();
|
||||||
|
}
|
||||||
|
},
|
||||||
|
new Configuration({
|
||||||
|
persistStorage: false,
|
||||||
|
}));
|
||||||
|
|
||||||
|
await crawler.run([url]);
|
||||||
|
return htmlContent;
|
||||||
|
}
|
||||||
|
|
||||||
|
export default loadWebsite;
|
||||||
Reference in New Issue
Block a user