Merge branch 'develop' into db-fixes

This commit is contained in:
Rohit
2025-11-30 19:45:54 +05:30
committed by GitHub
49 changed files with 1880 additions and 759 deletions

View File

@@ -1,6 +1,4 @@
import { Router, Request, Response } from 'express';
import { chromium } from "playwright-extra";
import stealthPlugin from 'puppeteer-extra-plugin-stealth';
import { requireAPIKey } from "../middlewares/api";
import Robot from "../models/Robot";
import Run from "../models/Run";
@@ -20,8 +18,6 @@ import { addAirtableUpdateTask, airtableUpdateTasks, processAirtableUpdates } fr
import { sendWebhook } from "../routes/webhook";
import { convertPageToHTML, convertPageToMarkdown } from '../markdownify/scrape';
chromium.use(stealthPlugin());
const router = Router();
const formatRecording = (recordingData: any) => {
@@ -676,6 +672,16 @@ async function executeRun(id: string, userId: string, requestedFormats?: string[
};
}
browser = browserPool.getRemoteBrowser(plainRun.browserId);
if (!browser) {
throw new Error('Could not access browser');
}
let currentPage = await browser.getCurrentPage();
if (!currentPage) {
throw new Error('Could not create a new page');
}
if (recording.recording_meta.type === 'scrape') {
logger.log('info', `Executing scrape robot for API run ${id}`);
@@ -705,7 +711,7 @@ async function executeRun(id: string, userId: string, requestedFormats?: string[
const SCRAPE_TIMEOUT = 120000;
if (formats.includes('markdown')) {
const markdownPromise = convertPageToMarkdown(url);
const markdownPromise = convertPageToMarkdown(url, currentPage);
const timeoutPromise = new Promise<never>((_, reject) => {
setTimeout(() => reject(new Error(`Markdown conversion timed out after ${SCRAPE_TIMEOUT/1000}s`)), SCRAPE_TIMEOUT);
});
@@ -714,7 +720,7 @@ async function executeRun(id: string, userId: string, requestedFormats?: string[
}
if (formats.includes('html')) {
const htmlPromise = convertPageToHTML(url);
const htmlPromise = convertPageToHTML(url, currentPage);
const timeoutPromise = new Promise<never>((_, reject) => {
setTimeout(() => reject(new Error(`HTML conversion timed out after ${SCRAPE_TIMEOUT/1000}s`)), SCRAPE_TIMEOUT);
});
@@ -862,16 +868,6 @@ async function executeRun(id: string, userId: string, requestedFormats?: string[
plainRun.status = 'running';
browser = browserPool.getRemoteBrowser(plainRun.browserId);
if (!browser) {
throw new Error('Could not access browser');
}
let currentPage = await browser.getCurrentPage();
if (!currentPage) {
throw new Error('Could not create a new page');
}
const workflow = AddGeneratedFlags(recording.recording);
browser.interpreter.setRunId(plainRun.runId);