fix: return scrapedData after crawlee.run
This commit is contained in:
@@ -1,28 +1,25 @@
|
|||||||
import { PlaywrightCrawler } from 'crawlee';
|
import { PlaywrightCrawler } from 'crawlee';
|
||||||
|
|
||||||
async function scrapeData(url, selectors, waitForSeconds = 2) {
|
async function scrapeData(url, selectors, waitForSeconds = 2) {
|
||||||
|
const scrapedData = [];
|
||||||
const crawler = new PlaywrightCrawler({
|
const crawler = new PlaywrightCrawler({
|
||||||
requestHandler: async ({ page }) => {
|
requestHandler: async ({ page }) => {
|
||||||
await page.goto(url);
|
await page.goto(url);
|
||||||
|
|
||||||
// Wait for specific time (optional)
|
|
||||||
await page.waitForTimeout(waitForSeconds * 1000);
|
await page.waitForTimeout(waitForSeconds * 1000);
|
||||||
|
|
||||||
console.log('Received selectors:', selectors);
|
console.log('Received selectors:', selectors);
|
||||||
|
|
||||||
const scrapedData = [];
|
|
||||||
for (const selector of selectors) {
|
for (const selector of selectors) {
|
||||||
const elementData = await page.$$eval(selector, elements => elements.map(el => el.textContent.trim()));
|
const elementData = await page.$$eval(selector, elements => elements.map(el => el.textContent.trim()));
|
||||||
scrapedData.push(...elementData);
|
scrapedData.push(...elementData);
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log('Scraped data:', scrapedData);
|
console.log('Scraped data:', scrapedData);
|
||||||
|
|
||||||
return { data: scrapedData };
|
|
||||||
},
|
},
|
||||||
});
|
});
|
||||||
|
|
||||||
await crawler.run([{ url }]);
|
await crawler.run([{ url }]);
|
||||||
|
return scrapedData;
|
||||||
}
|
}
|
||||||
|
|
||||||
export default scrapeData
|
export default scrapeData;
|
||||||
Reference in New Issue
Block a user