From 2800e566e91e6971192379fa414a92749d67202e Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Sun, 27 Oct 2024 01:09:00 +0530 Subject: [PATCH] feat: set proxy --- maxun-core/src/interpret.ts | 73 ++++++++++++++++++++++++++++++++++--- 1 file changed, 68 insertions(+), 5 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index a84df57b..366b16ee 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -1,5 +1,5 @@ /* eslint-disable no-await-in-loop, no-restricted-syntax */ -import { Page, PageScreenshotOptions } from 'playwright'; +import { Page, PageScreenshotOptions, Browser, BrowserContext } from 'playwright'; import { PlaywrightBlocker } from '@cliqz/adblocker-playwright'; import fetch from 'cross-fetch'; import path from 'path'; @@ -15,6 +15,7 @@ import { arrayToObject } from './utils/utils'; import Concurrency from './utils/concurrency'; import Preprocessor from './preprocessor'; import log, { Level } from './utils/logger'; +import { ProxyConfig } from './proxy'; /** * Defines optional intepreter options (passed in constructor) @@ -29,6 +30,8 @@ interface InterpreterOptions { activeId: Function, debugMessage: Function, }> + proxy?: ProxyConfig | null; + onProxyError?: (error: Error, proxy: ProxyConfig) => Promise; } @@ -50,6 +53,12 @@ export default class Interpreter extends EventEmitter { private blocker: PlaywrightBlocker | null = null; + private browser: Browser | null = null; + + private contexts: BrowserContext[] = []; + + private currentProxy: ProxyConfig | null = null; + constructor(workflow: WorkflowFile, options?: Partial) { super(); this.workflow = workflow.workflow; @@ -61,8 +70,15 @@ export default class Interpreter extends EventEmitter { binaryCallback: () => { log('Received binary data, thrashing them.', Level.WARN); }, debug: false, debugChannel: {}, + proxy: null, + onProxyError: async (error: Error, proxy: ProxyConfig) => { + this.log(`Proxy error: ${error.message}`, Level.ERROR); + return null; + }, + ...options, }; + this.currentProxy = this.options.proxy; this.concurrency = new Concurrency(this.options.maxConcurrency); this.log = (...args) => log(...args); @@ -89,6 +105,41 @@ export default class Interpreter extends EventEmitter { }) } + public updateProxy(proxyConfig: ProxyConfig | null): void { + this.currentProxy = proxyConfig; + this.log(`Proxy configuration updated`, Level.LOG); + } + + private async createProxyContext(browser: Browser): Promise { + if (!this.currentProxy) { + return browser.newContext(); + } + + try { + const context = await browser.newContext({ + proxy: this.currentProxy + }); + this.contexts.push(context); + return context; + } catch (error) { + if (this.options.onProxyError) { + const newProxy = await this.options.onProxyError(error as Error, this.currentProxy); + if (newProxy) { + this.currentProxy = newProxy; + return this.createProxyContext(browser); + } + } + throw error; + } + } + + // create a new page with proxy + private async createProxyPage(context: BrowserContext): Promise { + const page = await context.newPage(); + await page.setViewportSize({ width: 900, height: 400 }); + return page; + } + private async applyAdBlocker(page: Page): Promise { if (this.blocker) { await this.blocker.enableBlockingInPage(page); @@ -280,13 +331,13 @@ export default class Interpreter extends EventEmitter { // @ts-ignore (elements) => elements.map((a) => a.href).filter((x) => x), ); - const context = page.context(); + const context = await this.createProxyContext(page.context().browser()!); for (const link of links) { // eslint-disable-next-line this.concurrency.addJob(async () => { try { - const newPage = await context.newPage(); + const newPage = await this.createProxyPage(context); await newPage.setViewportSize({ width: 900, height: 400 }); await newPage.goto(link); await newPage.waitForLoadState('networkidle'); @@ -592,16 +643,28 @@ export default class Interpreter extends EventEmitter { */ this.initializedWorkflow = Preprocessor.initWorkflow(this.workflow, params); - await this.ensureScriptsLoaded(page); + // Create a new context with proxy configuration + const context = await this.createProxyContext(page.context().browser()!); + + // Create a new page with proxy + const proxyPage = await this.createProxyPage(context); + + // Copy over the current page's URL and state + await proxyPage.goto(page.url()); + + await this.ensureScriptsLoaded(proxyPage); this.stopper = () => { this.stopper = null; }; - this.concurrency.addJob(() => this.runLoop(page, this.initializedWorkflow!)); + this.concurrency.addJob(() => this.runLoop(proxyPage, this.initializedWorkflow!)); await this.concurrency.waitForCompletion(); + await Promise.all(this.contexts.map(ctx => ctx.close())); + this.contexts = []; + this.stopper = null; }