fix: use addInitScript THE RIGHT WAY!!!

This commit is contained in:
karishmas6
2024-08-02 19:05:41 +05:30
parent 12e40f089d
commit 2aa7cdc4bc

View File

@@ -1,5 +1,5 @@
/* eslint-disable no-await-in-loop, no-restricted-syntax */ /* eslint-disable no-await-in-loop, no-restricted-syntax */
import { Page, PageScreenshotOptions } from 'playwright'; import { Page, PageScreenshotOptions, BrowserContextOptions } from 'playwright';
import path from 'path'; import path from 'path';
import { EventEmitter } from 'events'; import { EventEmitter } from 'events';
@@ -277,26 +277,28 @@ export default class Interpreter extends EventEmitter {
await page.close(); await page.close();
}, },
scrape: async (selector?: string) => { scrape: async (selector?: string) => {
await this.ensureScriptsLoaded(page);
// Check if 'scrape' function is available in the page context // Check if 'scrape' function is available in the page context
const isScrapeAvailable = await page.evaluate(() => typeof window.scrape === 'function'); // const isScrapeAvailable = await page.evaluate(() => typeof window.scrape === 'function');
if (!isScrapeAvailable) { // if (!isScrapeAvailable) {
// Inject the script that defines the 'scrape' function // // Inject the script that defines the 'scrape' function
await page.addScriptTag({ path: path.join(__dirname, 'browserSide', 'scraper.js') }); // await page.addScriptTag({ path: path.join(__dirname, 'browserSide', 'scraper.js') });
} // }
const scrapeResults: Record<string, string>[] = await page.evaluate((s) => window.scrape(s ?? null), selector); const scrapeResults: Record<string, string>[] = await page.evaluate((s) => window.scrape(s ?? null), selector);
await this.options.serializableCallback(scrapeResults); await this.options.serializableCallback(scrapeResults);
}, },
scrapeSchema: async (schema: Record<string, string>) => { scrapeSchema: async (schema: Record<string, string>) => {
await this.ensureScriptsLoaded(page);
// Check if 'scrapeSchema' function is available in the page context // Check if 'scrapeSchema' function is available in the page context
const isScrapeSchemaAvailable = await page.evaluate(() => typeof window.scrapeSchema === 'function'); // const isScrapeSchemaAvailable = await page.evaluate(() => typeof window.scrapeSchema === 'function');
if (!isScrapeSchemaAvailable) { // if (!isScrapeSchemaAvailable) {
// Inject the script that defines the 'scrapeSchema' function // // Inject the script that defines the 'scrapeSchema' function
await page.addScriptTag({ path: path.join(__dirname, 'browserSide', 'scraper.js') }); // await page.addScriptTag({ path: path.join(__dirname, 'browserSide', 'scraper.js') });
} // }
const handleLists = await Promise.all( const handleLists = await Promise.all(
Object.values(schema).map((selector) => page.$$(selector)), Object.values(schema).map((selector) => page.$$(selector)),
@@ -429,6 +431,13 @@ export default class Interpreter extends EventEmitter {
} }
} }
private async ensureScriptsLoaded(page: Page) {
const isScriptLoaded = await page.evaluate(() => typeof window.scrape === 'function' && typeof window.scrapeSchema === 'function');
if (!isScriptLoaded) {
await page.addInitScript({ path: path.join(__dirname, 'browserSide', 'scraper.js') });
}
}
/** /**
* Spawns a browser context and runs given workflow. * Spawns a browser context and runs given workflow.
* \ * \
@@ -446,15 +455,16 @@ export default class Interpreter extends EventEmitter {
*/ */
this.initializedWorkflow = Preprocessor.initWorkflow(this.workflow, params); this.initializedWorkflow = Preprocessor.initWorkflow(this.workflow, params);
await this.ensureScriptsLoaded(page);
// @ts-ignore // @ts-ignore
// if (await page.evaluate(() => !<any>window.scrape)) { // if (await page.evaluate(() => !<any>window.scrape)) {
// page.context().addInitScript({ path: path.join(__dirname, 'browserSide', 'scraper.js') }); // page.context().addInitScript({ path: path.join(__dirname, 'browserSide', 'scraper.js') });
// } // }
// Enable CSP bypass for the page // await page.context({
await page.context().setExtraHTTPHeaders({ // bypassCSP: true,
'Content-Security-Policy': '' // })
});
this.stopper = () => { this.stopper = () => {
this.stopper = null; this.stopper = null;