From 92bb30d8b7edf7234d9c8a913ad77ad26de4bcc5 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Thu, 1 Aug 2024 19:16:13 +0530 Subject: [PATCH] fix(core): script injection for scrape | scrapeSchema --- maxun-core/src/interpret.ts | 48 ++++++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 17 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 9ac32b0e..c7cedc02 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -277,25 +277,39 @@ export default class Interpreter extends EventEmitter { await page.close(); }, scrape: async (selector?: string) => { - const scrapeResults: Record[] = await page - // eslint-disable-next-line - // @ts-ignore - .evaluate((s) => scrape(s ?? null), selector); + // Check if 'scrape' function is available in the page context + const isScrapeAvailable = await page.evaluate(() => typeof window.scrape === 'function'); + + if (!isScrapeAvailable) { + // Inject the script that defines the 'scrape' function + await page.addScriptTag({ path: path.join(__dirname, 'browserSide', 'scraper.js') }); + } + + const scrapeResults: Record[] = await page.evaluate((s) => window.scrape(s ?? null), selector); await this.options.serializableCallback(scrapeResults); - }, - scrapeSchema: async (schema: Record) => { + }, + + scrapeSchema: async (schema: Record) => { + // Check if 'scrapeSchema' function is available in the page context + const isScrapeSchemaAvailable = await page.evaluate(() => typeof window.scrapeSchema === 'function'); + + if (!isScrapeSchemaAvailable) { + // Inject the script that defines the 'scrapeSchema' function + await page.addScriptTag({ path: path.join(__dirname, 'browserSide', 'scraper.js') }); + } + const handleLists = await Promise.all( - Object.values(schema).map((selector) => page.$$(selector)), + Object.values(schema).map((selector) => page.$$(selector)), ); - + const namedHandleLists = Object.fromEntries( - Object.keys(schema).map((key, i) => [key, handleLists[i]]), + Object.keys(schema).map((key, i) => [key, handleLists[i]]), ); - - const scrapeResult = await page.evaluate((n) => scrapeSchema(n), namedHandleLists); - - this.options.serializableCallback(scrapeResult); - }, + + const scrapeResult = await page.evaluate((n) => window.scrapeSchema(n), namedHandleLists); + await this.options.serializableCallback(scrapeResult); + }, + scroll: async (pages?: number) => { await page.evaluate(async (pagesInternal) => { for (let i = 1; i <= (pagesInternal ?? 1); i += 1) { @@ -433,9 +447,9 @@ export default class Interpreter extends EventEmitter { this.initializedWorkflow = Preprocessor.initWorkflow(this.workflow, params); // @ts-ignore - if (await page.evaluate(() => !window.scrape)) { - page.context().addInitScript({ path: path.join(__dirname, 'browserSide', 'scraper.js') }); - } + // if (await page.evaluate(() => !window.scrape)) { + // page.context().addInitScript({ path: path.join(__dirname, 'browserSide', 'scraper.js') }); + // } this.stopper = () => { this.stopper = null;