fix(core): script injection for scrape | scrapeSchema

This commit is contained in:
karishmas6
2024-08-01 19:16:13 +05:30
parent 7d57e5adc0
commit 92bb30d8b7

View File

@@ -277,25 +277,39 @@ export default class Interpreter extends EventEmitter {
await page.close(); await page.close();
}, },
scrape: async (selector?: string) => { scrape: async (selector?: string) => {
const scrapeResults: Record<string, string>[] = <any>await page // Check if 'scrape' function is available in the page context
// eslint-disable-next-line const isScrapeAvailable = await page.evaluate(() => typeof window.scrape === 'function');
// @ts-ignore
.evaluate((s) => scrape(s ?? null), selector); if (!isScrapeAvailable) {
// Inject the script that defines the 'scrape' function
await page.addScriptTag({ path: path.join(__dirname, 'browserSide', 'scraper.js') });
}
const scrapeResults: Record<string, string>[] = await page.evaluate((s) => window.scrape(s ?? null), selector);
await this.options.serializableCallback(scrapeResults); await this.options.serializableCallback(scrapeResults);
}, },
scrapeSchema: async (schema: Record<string, string>) => {
scrapeSchema: async (schema: Record<string, string>) => {
// Check if 'scrapeSchema' function is available in the page context
const isScrapeSchemaAvailable = await page.evaluate(() => typeof window.scrapeSchema === 'function');
if (!isScrapeSchemaAvailable) {
// Inject the script that defines the 'scrapeSchema' function
await page.addScriptTag({ path: path.join(__dirname, 'browserSide', 'scraper.js') });
}
const handleLists = await Promise.all( const handleLists = await Promise.all(
Object.values(schema).map((selector) => page.$$(selector)), Object.values(schema).map((selector) => page.$$(selector)),
); );
const namedHandleLists = Object.fromEntries( const namedHandleLists = Object.fromEntries(
Object.keys(schema).map((key, i) => [key, handleLists[i]]), Object.keys(schema).map((key, i) => [key, handleLists[i]]),
); );
const scrapeResult = await page.evaluate((n) => scrapeSchema(n), namedHandleLists); const scrapeResult = await page.evaluate((n) => window.scrapeSchema(n), namedHandleLists);
await this.options.serializableCallback(scrapeResult);
this.options.serializableCallback(scrapeResult); },
},
scroll: async (pages?: number) => { scroll: async (pages?: number) => {
await page.evaluate(async (pagesInternal) => { await page.evaluate(async (pagesInternal) => {
for (let i = 1; i <= (pagesInternal ?? 1); i += 1) { for (let i = 1; i <= (pagesInternal ?? 1); i += 1) {
@@ -433,9 +447,9 @@ export default class Interpreter extends EventEmitter {
this.initializedWorkflow = Preprocessor.initWorkflow(this.workflow, params); this.initializedWorkflow = Preprocessor.initWorkflow(this.workflow, params);
// @ts-ignore // @ts-ignore
if (await page.evaluate(() => !<any>window.scrape)) { // if (await page.evaluate(() => !<any>window.scrape)) {
page.context().addInitScript({ path: path.join(__dirname, 'browserSide', 'scraper.js') }); // page.context().addInitScript({ path: path.join(__dirname, 'browserSide', 'scraper.js') });
} // }
this.stopper = () => { this.stopper = () => {
this.stopper = null; this.stopper = null;