Merge branch 'develop' into db-fixes
This commit is contained in:
@@ -1,6 +1,4 @@
|
||||
import { Router, Request, Response } from 'express';
|
||||
import { chromium } from "playwright-extra";
|
||||
import stealthPlugin from 'puppeteer-extra-plugin-stealth';
|
||||
import { requireAPIKey } from "../middlewares/api";
|
||||
import Robot from "../models/Robot";
|
||||
import Run from "../models/Run";
|
||||
@@ -20,8 +18,6 @@ import { addAirtableUpdateTask, airtableUpdateTasks, processAirtableUpdates } fr
|
||||
import { sendWebhook } from "../routes/webhook";
|
||||
import { convertPageToHTML, convertPageToMarkdown } from '../markdownify/scrape';
|
||||
|
||||
chromium.use(stealthPlugin());
|
||||
|
||||
const router = Router();
|
||||
|
||||
const formatRecording = (recordingData: any) => {
|
||||
@@ -676,6 +672,16 @@ async function executeRun(id: string, userId: string, requestedFormats?: string[
|
||||
};
|
||||
}
|
||||
|
||||
browser = browserPool.getRemoteBrowser(plainRun.browserId);
|
||||
if (!browser) {
|
||||
throw new Error('Could not access browser');
|
||||
}
|
||||
|
||||
let currentPage = await browser.getCurrentPage();
|
||||
if (!currentPage) {
|
||||
throw new Error('Could not create a new page');
|
||||
}
|
||||
|
||||
if (recording.recording_meta.type === 'scrape') {
|
||||
logger.log('info', `Executing scrape robot for API run ${id}`);
|
||||
|
||||
@@ -705,7 +711,7 @@ async function executeRun(id: string, userId: string, requestedFormats?: string[
|
||||
const SCRAPE_TIMEOUT = 120000;
|
||||
|
||||
if (formats.includes('markdown')) {
|
||||
const markdownPromise = convertPageToMarkdown(url);
|
||||
const markdownPromise = convertPageToMarkdown(url, currentPage);
|
||||
const timeoutPromise = new Promise<never>((_, reject) => {
|
||||
setTimeout(() => reject(new Error(`Markdown conversion timed out after ${SCRAPE_TIMEOUT/1000}s`)), SCRAPE_TIMEOUT);
|
||||
});
|
||||
@@ -714,7 +720,7 @@ async function executeRun(id: string, userId: string, requestedFormats?: string[
|
||||
}
|
||||
|
||||
if (formats.includes('html')) {
|
||||
const htmlPromise = convertPageToHTML(url);
|
||||
const htmlPromise = convertPageToHTML(url, currentPage);
|
||||
const timeoutPromise = new Promise<never>((_, reject) => {
|
||||
setTimeout(() => reject(new Error(`HTML conversion timed out after ${SCRAPE_TIMEOUT/1000}s`)), SCRAPE_TIMEOUT);
|
||||
});
|
||||
@@ -862,16 +868,6 @@ async function executeRun(id: string, userId: string, requestedFormats?: string[
|
||||
|
||||
plainRun.status = 'running';
|
||||
|
||||
browser = browserPool.getRemoteBrowser(plainRun.browserId);
|
||||
if (!browser) {
|
||||
throw new Error('Could not access browser');
|
||||
}
|
||||
|
||||
let currentPage = await browser.getCurrentPage();
|
||||
if (!currentPage) {
|
||||
throw new Error('Could not create a new page');
|
||||
}
|
||||
|
||||
const workflow = AddGeneratedFlags(recording.recording);
|
||||
|
||||
browser.interpreter.setRunId(plainRun.runId);
|
||||
|
||||
156
server/src/browser-management/browserConnection.ts
Normal file
156
server/src/browser-management/browserConnection.ts
Normal file
@@ -0,0 +1,156 @@
|
||||
import { chromium } from 'playwright-core';
|
||||
import type { Browser } from 'playwright-core';
|
||||
import logger from '../logger';
|
||||
|
||||
/**
|
||||
* Configuration for connection retry logic
|
||||
*/
|
||||
const CONNECTION_CONFIG = {
|
||||
maxRetries: 3,
|
||||
retryDelay: 2000,
|
||||
connectionTimeout: 30000,
|
||||
};
|
||||
|
||||
/**
|
||||
* Get the WebSocket endpoint from the browser service health check
|
||||
* @returns Promise<string> - The WebSocket endpoint URL with browser ID
|
||||
*/
|
||||
async function getBrowserServiceEndpoint(): Promise<string> {
|
||||
const healthPort = process.env.BROWSER_HEALTH_PORT || '3002';
|
||||
const healthHost = process.env.BROWSER_WS_HOST || 'localhost';
|
||||
const healthEndpoint = `http://${healthHost}:${healthPort}/health`;
|
||||
|
||||
try {
|
||||
logger.debug(`Fetching WebSocket endpoint from: ${healthEndpoint}`);
|
||||
const response = await fetch(healthEndpoint);
|
||||
const data = await response.json();
|
||||
|
||||
if (data.status === 'healthy' && data.wsEndpoint) {
|
||||
logger.debug(`Got WebSocket endpoint: ${data.wsEndpoint}`);
|
||||
return data.wsEndpoint;
|
||||
}
|
||||
|
||||
throw new Error('Health check did not return a valid wsEndpoint');
|
||||
} catch (error: any) {
|
||||
logger.error(`Failed to fetch endpoint from health check: ${error.message}`);
|
||||
throw new Error(
|
||||
`Browser service is not accessible at ${healthEndpoint}. ` +
|
||||
`Make sure the browser service is running (docker-compose up browser)`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Launch a local browser as fallback when browser service is unavailable
|
||||
* @returns Promise<Browser> - Locally launched browser instance
|
||||
*/
|
||||
async function launchLocalBrowser(): Promise<Browser> {
|
||||
logger.warn('Attempting to launch local browser');
|
||||
logger.warn('Note: This requires Chromium binaries to be installed (npx playwright install chromium)');
|
||||
|
||||
try {
|
||||
const browser = await chromium.launch({
|
||||
headless: true,
|
||||
args: [
|
||||
'--disable-blink-features=AutomationControlled',
|
||||
'--disable-web-security',
|
||||
'--disable-features=IsolateOrigins,site-per-process',
|
||||
'--disable-site-isolation-trials',
|
||||
'--disable-extensions',
|
||||
'--no-sandbox',
|
||||
'--disable-dev-shm-usage',
|
||||
'--disable-gpu',
|
||||
'--force-color-profile=srgb',
|
||||
'--force-device-scale-factor=2',
|
||||
'--ignore-certificate-errors',
|
||||
'--mute-audio'
|
||||
],
|
||||
});
|
||||
|
||||
logger.info('Successfully launched local browser');
|
||||
return browser;
|
||||
} catch (error: any) {
|
||||
logger.error(`Failed to launch local browser: ${error.message}`);
|
||||
throw new Error(
|
||||
`Could not launch local browser. ` +
|
||||
`Please either:\n` +
|
||||
` 1. Start the browser service: docker-compose up browser\n` +
|
||||
` 2. Install Chromium binaries: npx playwright@1.57.0 install chromium`
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Connect to the remote browser service with retry logic, with fallback to local browser
|
||||
* @param retries - Number of connection attempts (default: 3)
|
||||
* @returns Promise<Browser> - Connected browser instance (remote or local)
|
||||
* @throws Error if both remote connection and local launch fail
|
||||
*/
|
||||
export async function connectToRemoteBrowser(retries?: number): Promise<Browser> {
|
||||
const maxRetries = retries ?? CONNECTION_CONFIG.maxRetries;
|
||||
|
||||
try {
|
||||
const wsEndpoint = await getBrowserServiceEndpoint();
|
||||
logger.info(`Connecting to browser service at ${wsEndpoint}...`);
|
||||
|
||||
for (let attempt = 1; attempt <= maxRetries; attempt++) {
|
||||
try {
|
||||
logger.debug(`Connection attempt ${attempt}/${maxRetries}`);
|
||||
|
||||
const browser = await chromium.connect(wsEndpoint, {
|
||||
timeout: CONNECTION_CONFIG.connectionTimeout,
|
||||
});
|
||||
|
||||
logger.info('Successfully connected to browser service');
|
||||
return browser;
|
||||
} catch (error: any) {
|
||||
logger.warn(
|
||||
`Connection attempt ${attempt}/${maxRetries} failed: ${error.message}`
|
||||
);
|
||||
|
||||
if (attempt === maxRetries) {
|
||||
logger.error(
|
||||
`Failed to connect to browser service after ${maxRetries} attempts`
|
||||
);
|
||||
throw new Error(`Remote connection failed: ${error.message}`);
|
||||
}
|
||||
|
||||
logger.debug(`Waiting ${CONNECTION_CONFIG.retryDelay}ms before retry...`);
|
||||
await new Promise(resolve => setTimeout(resolve, CONNECTION_CONFIG.retryDelay));
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error('Failed to connect to browser service');
|
||||
} catch (error: any) {
|
||||
logger.warn(`Browser service connection failed: ${error.message}`);
|
||||
logger.warn('Falling back to local browser launch...');
|
||||
|
||||
return await launchLocalBrowser();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if browser service is healthy
|
||||
* @returns Promise<boolean> - true if service is healthy
|
||||
*/
|
||||
export async function checkBrowserServiceHealth(): Promise<boolean> {
|
||||
try {
|
||||
const healthPort = process.env.BROWSER_HEALTH_PORT || '3002';
|
||||
const healthHost = process.env.BROWSER_WS_HOST || 'localhost';
|
||||
const healthEndpoint = `http://${healthHost}:${healthPort}/health`;
|
||||
|
||||
const response = await fetch(healthEndpoint);
|
||||
const data = await response.json();
|
||||
|
||||
if (data.status === 'healthy') {
|
||||
logger.info('Browser service health check passed');
|
||||
return true;
|
||||
}
|
||||
|
||||
logger.warn('Browser service health check failed:', data);
|
||||
return false;
|
||||
} catch (error: any) {
|
||||
logger.error('Browser service health check error:', error.message);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
@@ -2,11 +2,9 @@ import {
|
||||
Page,
|
||||
Browser,
|
||||
CDPSession,
|
||||
BrowserContext,
|
||||
} from 'playwright';
|
||||
BrowserContext
|
||||
} from 'playwright-core';
|
||||
import { Socket } from "socket.io";
|
||||
import { chromium } from 'playwright-extra';
|
||||
import stealthPlugin from 'puppeteer-extra-plugin-stealth';
|
||||
import { PlaywrightBlocker } from '@cliqz/adblocker-playwright';
|
||||
import fetch from 'cross-fetch';
|
||||
import logger from '../../logger';
|
||||
@@ -17,6 +15,7 @@ import { getDecryptedProxyConfig } from '../../routes/proxy';
|
||||
import { getInjectableScript } from 'idcac-playwright';
|
||||
import { FingerprintInjector } from "fingerprint-injector";
|
||||
import { FingerprintGenerator } from "fingerprint-generator";
|
||||
import { connectToRemoteBrowser } from '../browserConnection';
|
||||
|
||||
declare global {
|
||||
interface Window {
|
||||
@@ -39,8 +38,6 @@ interface ProcessedSnapshot {
|
||||
baseUrl: string;
|
||||
}
|
||||
|
||||
chromium.use(stealthPlugin());
|
||||
|
||||
const MEMORY_CONFIG = {
|
||||
gcInterval: 20000, // Check memory more frequently (20s instead of 60s)
|
||||
maxHeapSize: 1536 * 1024 * 1024, // 1.5GB
|
||||
@@ -460,26 +457,10 @@ export class RemoteBrowser {
|
||||
const initializationPromise = (async () => {
|
||||
while (!success && retryCount < MAX_RETRIES) {
|
||||
try {
|
||||
this.browser = <Browser>(await chromium.launch({
|
||||
headless: true,
|
||||
args: [
|
||||
"--disable-blink-features=AutomationControlled",
|
||||
"--disable-web-security",
|
||||
"--disable-features=IsolateOrigins,site-per-process",
|
||||
"--disable-site-isolation-trials",
|
||||
"--disable-extensions",
|
||||
"--no-sandbox",
|
||||
"--disable-dev-shm-usage",
|
||||
"--disable-gpu",
|
||||
"--force-color-profile=srgb",
|
||||
"--force-device-scale-factor=2",
|
||||
"--ignore-certificate-errors",
|
||||
"--mute-audio"
|
||||
],
|
||||
}));
|
||||
this.browser = await connectToRemoteBrowser();
|
||||
|
||||
if (!this.browser || this.browser.isConnected() === false) {
|
||||
throw new Error('Browser failed to launch or is not connected');
|
||||
throw new Error('Browser failed to launch or is not connected');
|
||||
}
|
||||
|
||||
this.emitLoadingProgress(20, 0);
|
||||
|
||||
@@ -7,7 +7,7 @@ import { Socket } from 'socket.io';
|
||||
import logger from "../logger";
|
||||
import { Coordinates, ScrollDeltas, KeyboardInput, DatePickerEventData } from '../types';
|
||||
import { browserPool } from "../server";
|
||||
import { Page } from "playwright";
|
||||
import { Page } from "playwright-core";
|
||||
import { CustomActions } from "../../../src/shared/types";
|
||||
import { WhereWhatPair } from "maxun-core";
|
||||
import { RemoteBrowser } from './classes/RemoteBrowser';
|
||||
|
||||
@@ -1,17 +1,35 @@
|
||||
import { chromium } from "playwright";
|
||||
import { connectToRemoteBrowser } from "../browser-management/browserConnection";
|
||||
import { parseMarkdown } from "./markdown";
|
||||
import logger from "../logger";
|
||||
|
||||
async function gotoWithFallback(page: any, url: string) {
|
||||
try {
|
||||
return await page.goto(url, {
|
||||
waitUntil: "networkidle",
|
||||
timeout: 100000,
|
||||
});
|
||||
} catch (err) {
|
||||
// fallback: JS-heavy or unstable sites
|
||||
return await page.goto(url, {
|
||||
waitUntil: "domcontentloaded",
|
||||
timeout: 100000,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetches a webpage, strips scripts/styles/images/etc,
|
||||
* returns clean Markdown using parser.
|
||||
* @param url - The URL to convert
|
||||
* @param existingPage - Optional existing Playwright page instance to reuse
|
||||
*/
|
||||
export async function convertPageToMarkdown(url: string): Promise<string> {
|
||||
const browser = await chromium.launch();
|
||||
const browser = await connectToRemoteBrowser();
|
||||
const page = await browser.newPage();
|
||||
|
||||
await page.goto(url, { waitUntil: "networkidle", timeout: 100000 });
|
||||
|
||||
await page.addInitScript(() => {
|
||||
const cleanedHtml = await page.evaluate(() => {
|
||||
const selectors = [
|
||||
"script",
|
||||
"style",
|
||||
@@ -42,14 +60,16 @@ export async function convertPageToMarkdown(url: string): Promise<string> {
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
// Re-extract HTML after cleanup
|
||||
const cleanedHtml = await page.evaluate(() => {
|
||||
return document.documentElement.outerHTML;
|
||||
});
|
||||
|
||||
await browser.close();
|
||||
if (shouldCloseBrowser && browser) {
|
||||
logger.log('info', `[Scrape] Closing browser instance created for markdown conversion`);
|
||||
await browser.close();
|
||||
} else {
|
||||
logger.log('info', `[Scrape] Keeping existing browser instance open after markdown conversion`);
|
||||
}
|
||||
|
||||
// Convert cleaned HTML → Markdown
|
||||
const markdown = await parseMarkdown(cleanedHtml, url);
|
||||
@@ -59,14 +79,16 @@ export async function convertPageToMarkdown(url: string): Promise<string> {
|
||||
/**
|
||||
* Fetches a webpage, strips scripts/styles/images/etc,
|
||||
* returns clean HTML.
|
||||
* @param url - The URL to convert
|
||||
* @param existingPage - Optional existing Playwright page instance to reuse
|
||||
*/
|
||||
export async function convertPageToHTML(url: string): Promise<string> {
|
||||
const browser = await chromium.launch();
|
||||
const browser = await connectToRemoteBrowser();
|
||||
const page = await browser.newPage();
|
||||
|
||||
await page.goto(url, { waitUntil: "networkidle", timeout: 100000 });
|
||||
|
||||
await page.addInitScript(() => {
|
||||
const cleanedHtml = await page.evaluate(() => {
|
||||
const selectors = [
|
||||
"script",
|
||||
"style",
|
||||
@@ -97,14 +119,16 @@ export async function convertPageToHTML(url: string): Promise<string> {
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
// Re-extract HTML after cleanup
|
||||
const cleanedHtml = await page.evaluate(() => {
|
||||
return document.documentElement.outerHTML;
|
||||
});
|
||||
|
||||
await browser.close();
|
||||
if (shouldCloseBrowser && browser) {
|
||||
logger.log('info', `[Scrape] Closing browser instance created for HTML conversion`);
|
||||
await browser.close();
|
||||
} else {
|
||||
logger.log('info', `[Scrape] Keeping existing browser instance open after HTML conversion`);
|
||||
}
|
||||
|
||||
// Return cleaned HTML directly
|
||||
return cleanedHtml;
|
||||
|
||||
@@ -13,7 +13,7 @@ import { WorkflowFile } from 'maxun-core';
|
||||
import Run from './models/Run';
|
||||
import Robot from './models/Robot';
|
||||
import { browserPool } from './server';
|
||||
import { Page } from 'playwright';
|
||||
import { Page } from 'playwright-core';
|
||||
import { capture } from './utils/analytics';
|
||||
import { addGoogleSheetUpdateTask, googleSheetUpdateTasks, processGoogleSheetUpdates } from './workflow-management/integrations/gsheet';
|
||||
import { addAirtableUpdateTask, airtableUpdateTasks, processAirtableUpdates } from './workflow-management/integrations/airtable';
|
||||
@@ -192,7 +192,7 @@ async function processRunExecution(job: Job<ExecuteRunData>) {
|
||||
|
||||
logger.log('info', `Browser ${browserId} found and ready for execution`);
|
||||
|
||||
try {
|
||||
try {
|
||||
// Find the recording
|
||||
const recording = await Robot.findOne({ where: { 'recording_meta.id': plainRun.robotMetaId }, raw: true });
|
||||
|
||||
@@ -200,6 +200,30 @@ async function processRunExecution(job: Job<ExecuteRunData>) {
|
||||
throw new Error(`Recording for run ${data.runId} not found`);
|
||||
}
|
||||
|
||||
let currentPage = browser.getCurrentPage();
|
||||
|
||||
const pageWaitStart = Date.now();
|
||||
let lastPageLogTime = 0;
|
||||
let pageAttempts = 0;
|
||||
const MAX_PAGE_ATTEMPTS = 15;
|
||||
|
||||
while (!currentPage && (Date.now() - pageWaitStart) < BROWSER_PAGE_TIMEOUT && pageAttempts < MAX_PAGE_ATTEMPTS) {
|
||||
const currentTime = Date.now();
|
||||
pageAttempts++;
|
||||
|
||||
if (currentTime - lastPageLogTime > 5000) {
|
||||
logger.log('info', `Page not ready for browser ${browserId}, waiting... (${Math.round((currentTime - pageWaitStart) / 1000)}s elapsed)`);
|
||||
lastPageLogTime = currentTime;
|
||||
}
|
||||
|
||||
await new Promise(resolve => setTimeout(resolve, 1000));
|
||||
currentPage = browser.getCurrentPage();
|
||||
}
|
||||
|
||||
if (!currentPage) {
|
||||
throw new Error(`No current page available for browser ${browserId} after ${BROWSER_PAGE_TIMEOUT/1000}s timeout`);
|
||||
}
|
||||
|
||||
if (recording.recording_meta.type === 'scrape') {
|
||||
logger.log('info', `Executing scrape robot for run ${data.runId}`);
|
||||
|
||||
@@ -224,7 +248,7 @@ async function processRunExecution(job: Job<ExecuteRunData>) {
|
||||
const SCRAPE_TIMEOUT = 120000;
|
||||
|
||||
if (formats.includes('markdown')) {
|
||||
const markdownPromise = convertPageToMarkdown(url);
|
||||
const markdownPromise = convertPageToMarkdown(url, currentPage);
|
||||
const timeoutPromise = new Promise<never>((_, reject) => {
|
||||
setTimeout(() => reject(new Error(`Markdown conversion timed out after ${SCRAPE_TIMEOUT/1000}s`)), SCRAPE_TIMEOUT);
|
||||
});
|
||||
@@ -233,7 +257,7 @@ async function processRunExecution(job: Job<ExecuteRunData>) {
|
||||
}
|
||||
|
||||
if (formats.includes('html')) {
|
||||
const htmlPromise = convertPageToHTML(url);
|
||||
const htmlPromise = convertPageToHTML(url, currentPage);
|
||||
const timeoutPromise = new Promise<never>((_, reject) => {
|
||||
setTimeout(() => reject(new Error(`HTML conversion timed out after ${SCRAPE_TIMEOUT/1000}s`)), SCRAPE_TIMEOUT);
|
||||
});
|
||||
@@ -347,30 +371,6 @@ async function processRunExecution(job: Job<ExecuteRunData>) {
|
||||
}
|
||||
};
|
||||
|
||||
let currentPage = browser.getCurrentPage();
|
||||
|
||||
const pageWaitStart = Date.now();
|
||||
let lastPageLogTime = 0;
|
||||
let pageAttempts = 0;
|
||||
const MAX_PAGE_ATTEMPTS = 15;
|
||||
|
||||
while (!currentPage && (Date.now() - pageWaitStart) < BROWSER_PAGE_TIMEOUT && pageAttempts < MAX_PAGE_ATTEMPTS) {
|
||||
const currentTime = Date.now();
|
||||
pageAttempts++;
|
||||
|
||||
if (currentTime - lastPageLogTime > 5000) {
|
||||
logger.log('info', `Page not ready for browser ${browserId}, waiting... (${Math.round((currentTime - pageWaitStart) / 1000)}s elapsed)`);
|
||||
lastPageLogTime = currentTime;
|
||||
}
|
||||
|
||||
await new Promise(resolve => setTimeout(resolve, 1000));
|
||||
currentPage = browser.getCurrentPage();
|
||||
}
|
||||
|
||||
if (!currentPage) {
|
||||
throw new Error(`No current page available for browser ${browserId} after ${BROWSER_PAGE_TIMEOUT/1000}s timeout`);
|
||||
}
|
||||
|
||||
logger.log('info', `Starting workflow execution for run ${data.runId}`);
|
||||
|
||||
await run.update({
|
||||
|
||||
@@ -1,19 +0,0 @@
|
||||
import { Router } from 'express';
|
||||
import logger from "../logger";
|
||||
// import { loadIntegrations, saveIntegrations } from '../workflow-management/integrations/gsheet';
|
||||
import { requireSignIn } from '../middlewares/auth';
|
||||
|
||||
export const router = Router();
|
||||
|
||||
router.post('/upload-credentials', requireSignIn, async (req, res) => {
|
||||
try {
|
||||
const { fileName, credentials, spreadsheetId, range } = req.body;
|
||||
if (!fileName || !credentials || !spreadsheetId || !range) {
|
||||
return res.status(400).json({ message: 'Credentials, Spreadsheet ID, and Range are required.' });
|
||||
}
|
||||
// *** TEMPORARILY WE STORE CREDENTIALS HERE ***
|
||||
} catch (error: any) {
|
||||
logger.log('error', `Error saving credentials: ${error.message}`);
|
||||
return res.status(500).json({ message: 'Failed to save credentials.', error: error.message });
|
||||
}
|
||||
});
|
||||
@@ -1,10 +1,8 @@
|
||||
import { Router, Request, Response } from 'express';
|
||||
import { chromium } from 'playwright-extra';
|
||||
import stealthPlugin from 'puppeteer-extra-plugin-stealth';
|
||||
import { connectToRemoteBrowser } from '../browser-management/browserConnection';
|
||||
import User from '../models/User';
|
||||
import { encrypt, decrypt } from '../utils/auth';
|
||||
import { requireSignIn } from '../middlewares/auth';
|
||||
chromium.use(stealthPlugin());
|
||||
|
||||
export const router = Router();
|
||||
|
||||
@@ -86,11 +84,7 @@ router.get('/test', requireSignIn, async (req: Request, res: Response) => {
|
||||
}),
|
||||
};
|
||||
|
||||
const browser = await chromium.launch({
|
||||
headless: true,
|
||||
proxy: proxyOptions,
|
||||
args:["--ignore-certificate-errors"]
|
||||
});
|
||||
const browser = await connectToRemoteBrowser();
|
||||
const page = await browser.newPage();
|
||||
await page.goto('https://example.com');
|
||||
await browser.close();
|
||||
|
||||
@@ -13,14 +13,11 @@ import {
|
||||
destroyRemoteBrowser,
|
||||
canCreateBrowserInState,
|
||||
} from '../browser-management/controller';
|
||||
import { chromium } from 'playwright-extra';
|
||||
import stealthPlugin from 'puppeteer-extra-plugin-stealth';
|
||||
import logger from "../logger";
|
||||
import { requireSignIn } from '../middlewares/auth';
|
||||
import { pgBossClient } from '../storage/pgboss';
|
||||
|
||||
export const router = Router();
|
||||
chromium.use(stealthPlugin());
|
||||
|
||||
export interface AuthenticatedRequest extends Request {
|
||||
user?: any;
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
import { Router } from 'express';
|
||||
import logger from "../logger";
|
||||
import { createRemoteBrowserForRun, destroyRemoteBrowser, getActiveBrowserIdByState } from "../browser-management/controller";
|
||||
import { chromium } from 'playwright-extra';
|
||||
import stealthPlugin from 'puppeteer-extra-plugin-stealth';
|
||||
import { browserPool } from "../server";
|
||||
import { v4 as uuid } from "uuid";
|
||||
import moment from 'moment-timezone';
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import {BrowserType, LaunchOptions} from "playwright";
|
||||
import {BrowserType, LaunchOptions} from "playwright-core";
|
||||
|
||||
/**
|
||||
* Interpreter settings properties including recording parameters.
|
||||
|
||||
@@ -2,7 +2,7 @@ import { Action, ActionType, Coordinates, TagName, DatePickerEventData } from ".
|
||||
import { WhereWhatPair, WorkflowFile } from 'maxun-core';
|
||||
import logger from "../../logger";
|
||||
import { Socket } from "socket.io";
|
||||
import { Page } from "playwright";
|
||||
import { Page } from "playwright-core";
|
||||
import {
|
||||
getElementInformation,
|
||||
getRect,
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import Interpreter, { WorkflowFile } from "maxun-core";
|
||||
import logger from "../../logger";
|
||||
import { Socket } from "socket.io";
|
||||
import { Page } from "playwright";
|
||||
import { Page } from "playwright-core";
|
||||
import { InterpreterSettings } from "../../types";
|
||||
import { decrypt } from "../../utils/auth";
|
||||
import Run from "../../models/Run";
|
||||
|
||||
@@ -1,6 +1,4 @@
|
||||
import { v4 as uuid } from "uuid";
|
||||
import { chromium } from 'playwright-extra';
|
||||
import stealthPlugin from 'puppeteer-extra-plugin-stealth';
|
||||
import { io, Socket } from "socket.io-client";
|
||||
import { createRemoteBrowserForRun, destroyRemoteBrowser } from '../../browser-management/controller';
|
||||
import logger from '../../logger';
|
||||
@@ -12,11 +10,10 @@ import { getDecryptedProxyConfig } from "../../routes/proxy";
|
||||
import { BinaryOutputService } from "../../storage/mino";
|
||||
import { capture } from "../../utils/analytics";
|
||||
import { WorkflowFile } from "maxun-core";
|
||||
import { Page } from "playwright";
|
||||
import { Page } from "playwright-core";
|
||||
import { sendWebhook } from "../../routes/webhook";
|
||||
import { addAirtableUpdateTask, airtableUpdateTasks, processAirtableUpdates } from "../integrations/airtable";
|
||||
import { convertPageToMarkdown, convertPageToHTML } from "../../markdownify/scrape";
|
||||
chromium.use(stealthPlugin());
|
||||
|
||||
async function createWorkflowAndStoreMetadata(id: string, userId: string) {
|
||||
try {
|
||||
@@ -220,6 +217,16 @@ async function executeRun(id: string, userId: string) {
|
||||
}
|
||||
}
|
||||
|
||||
browser = browserPool.getRemoteBrowser(plainRun.browserId);
|
||||
if (!browser) {
|
||||
throw new Error('Could not access browser');
|
||||
}
|
||||
|
||||
let currentPage = await browser.getCurrentPage();
|
||||
if (!currentPage) {
|
||||
throw new Error('Could not create a new page');
|
||||
}
|
||||
|
||||
if (recording.recording_meta.type === 'scrape') {
|
||||
logger.log('info', `Executing scrape robot for scheduled run ${id}`);
|
||||
|
||||
@@ -266,7 +273,7 @@ async function executeRun(id: string, userId: string) {
|
||||
|
||||
// Markdown conversion
|
||||
if (formats.includes("markdown")) {
|
||||
const markdownPromise = convertPageToMarkdown(url);
|
||||
const markdownPromise = convertPageToMarkdown(url, currentPage);
|
||||
const timeoutPromise = new Promise<never>((_, reject) => {
|
||||
setTimeout(() => reject(new Error(`Markdown conversion timed out after ${SCRAPE_TIMEOUT/1000}s`)), SCRAPE_TIMEOUT);
|
||||
});
|
||||
@@ -275,7 +282,7 @@ async function executeRun(id: string, userId: string) {
|
||||
}
|
||||
|
||||
if (formats.includes("html")) {
|
||||
const htmlPromise = convertPageToHTML(url);
|
||||
const htmlPromise = convertPageToHTML(url, currentPage);
|
||||
const timeoutPromise = new Promise<never>((_, reject) => {
|
||||
setTimeout(() => reject(new Error(`HTML conversion timed out after ${SCRAPE_TIMEOUT/1000}s`)), SCRAPE_TIMEOUT);
|
||||
});
|
||||
@@ -412,16 +419,6 @@ async function executeRun(id: string, userId: string) {
|
||||
logger.log('warn', `Failed to send run-started notification for run ${plainRun.runId}: ${socketError.message}`);
|
||||
}
|
||||
|
||||
browser = browserPool.getRemoteBrowser(plainRun.browserId);
|
||||
if (!browser) {
|
||||
throw new Error('Could not access browser');
|
||||
}
|
||||
|
||||
let currentPage = await browser.getCurrentPage();
|
||||
if (!currentPage) {
|
||||
throw new Error('Could not create a new page');
|
||||
}
|
||||
|
||||
const workflow = AddGeneratedFlags(recording.recording);
|
||||
|
||||
// Set run ID for real-time data persistence
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
import { Page } from "playwright";
|
||||
import { Page } from "playwright-core";
|
||||
import { Coordinates } from "../types";
|
||||
import { WhereWhatPair, WorkflowFile } from "maxun-core";
|
||||
import logger from "../logger";
|
||||
|
||||
Reference in New Issue
Block a user