2025-05-28 14:22:43 +05:30
import { v4 as uuid } from "uuid" ;
2024-09-13 14:28:52 +05:30
import { io , Socket } from "socket.io-client" ;
2024-09-13 14:27:39 +05:30
import { createRemoteBrowserForRun , destroyRemoteBrowser } from '../../browser-management/controller' ;
2024-09-11 11:53:12 +05:30
import logger from '../../logger' ;
2025-10-21 00:43:08 +05:30
import { browserPool , io as serverIo } from "../../server" ;
2025-11-29 12:43:19 +05:30
import { addGoogleSheetUpdateTask , googleSheetUpdateTasks , processGoogleSheetUpdates } from "../integrations/gsheet" ;
2024-10-10 02:54:27 +05:30
import Robot from "../../models/Robot" ;
import Run from "../../models/Run" ;
import { getDecryptedProxyConfig } from "../../routes/proxy" ;
2024-10-15 22:22:03 +05:30
import { BinaryOutputService } from "../../storage/mino" ;
2024-10-29 03:46:13 +05:30
import { capture } from "../../utils/analytics" ;
2024-12-08 18:10:53 +05:30
import { WorkflowFile } from "maxun-core" ;
2025-11-30 17:41:44 +05:30
import { Page } from "playwright-core" ;
2025-05-27 23:37:36 +05:30
import { sendWebhook } from "../../routes/webhook" ;
2025-11-29 12:43:19 +05:30
import { addAirtableUpdateTask , airtableUpdateTasks , processAirtableUpdates } from "../integrations/airtable" ;
2025-12-05 21:58:23 +05:30
import { convertPageToMarkdown , convertPageToHTML , convertPageToScreenshot } from "../../markdownify/scrape" ;
2024-09-12 21:01:46 +05:30
2024-10-12 15:44:22 +05:30
async function createWorkflowAndStoreMetadata ( id : string , userId : string ) {
2024-10-13 04:55:51 +05:30
try {
2024-10-13 04:56:41 +05:30
const recording = await Robot . findOne ( {
where : {
'recording_meta.id' : id
} ,
raw : true
} ) ;
2024-10-08 23:34:53 +05:30
2024-10-13 04:56:41 +05:30
if ( ! recording || ! recording . recording_meta || ! recording . recording_meta . id ) {
return {
success : false ,
error : 'Recording not found'
} ;
}
2024-10-10 02:54:27 +05:30
2024-10-13 04:56:41 +05:30
const proxyConfig = await getDecryptedProxyConfig ( userId ) ;
let proxyOptions : any = { } ;
2024-10-10 02:54:27 +05:30
2024-10-13 04:56:41 +05:30
if ( proxyConfig . proxy_url ) {
proxyOptions = {
server : proxyConfig.proxy_url ,
. . . ( proxyConfig . proxy_username && proxyConfig . proxy_password && {
username : proxyConfig.proxy_username ,
password : proxyConfig.proxy_password ,
} ) ,
} ;
}
2024-10-08 23:34:53 +05:30
2025-10-21 00:43:08 +05:30
const browserId = createRemoteBrowserForRun ( userId ) ;
2024-10-13 04:56:41 +05:30
const runId = uuid ( ) ;
2024-10-10 02:54:27 +05:30
const run = await Run . create ( {
2024-10-29 03:46:13 +05:30
status : 'scheduled' ,
2024-10-10 02:54:27 +05:30
name : recording.recording_meta.name ,
robotId : recording.id ,
robotMetaId : recording.recording_meta.id ,
2024-09-11 14:49:05 +05:30
startedAt : new Date ( ) . toLocaleString ( ) ,
finishedAt : '' ,
2024-10-13 04:54:26 +05:30
browserId ,
2024-09-11 23:35:59 +05:30
interpreterSettings : { maxConcurrency : 1 , maxRepeats : 1 , debug : true } ,
2024-09-11 14:49:05 +05:30
log : '' ,
2024-10-13 04:54:26 +05:30
runId ,
2024-10-21 19:07:47 +05:30
runByScheduleId : uuid ( ) ,
2024-10-10 02:54:27 +05:30
serializableOutput : { } ,
binaryOutput : { } ,
2025-10-21 00:43:08 +05:30
retryCount : 0
2024-10-10 02:54:27 +05:30
} ) ;
2024-09-11 14:49:05 +05:30
2024-10-10 02:54:27 +05:30
const plainRun = run . toJSON ( ) ;
2024-09-12 00:57:01 +05:30
2025-10-21 00:43:08 +05:30
try {
const runScheduledData = {
runId : plainRun.runId ,
robotMetaId : plainRun.robotMetaId ,
robotName : plainRun.name ,
status : 'scheduled' ,
startedAt : plainRun.startedAt ,
runByUserId : plainRun.runByUserId ,
runByScheduleId : plainRun.runByScheduleId ,
runByAPI : plainRun.runByAPI || false ,
browserId : plainRun.browserId
} ;
serverIo . of ( '/queued-run' ) . to ( ` user- ${ userId } ` ) . emit ( 'run-scheduled' , runScheduledData ) ;
logger . log ( 'info' , ` Scheduled run notification sent for run: ${ plainRun . runId } to user- ${ userId } ` ) ;
} catch ( socketError : any ) {
logger . log ( 'warn' , ` Failed to send run-scheduled notification for run ${ plainRun . runId } : ${ socketError . message } ` ) ;
}
2024-09-12 21:01:46 +05:30
return {
browserId ,
2024-10-10 02:54:27 +05:30
runId : plainRun.runId ,
2024-09-12 21:01:46 +05:30
}
2024-09-11 11:53:12 +05:30
} catch ( e ) {
const { message } = e as Error ;
2024-10-10 02:54:27 +05:30
logger . log ( 'info' , ` Error while scheduling a run with id: ${ id } ` ) ;
2024-12-23 23:16:54 +05:30
console . log ( ` Error while scheduling a run with id: ${ id } : ` , message ) ;
2024-09-12 00:57:01 +05:30
return {
success : false ,
error : message ,
} ;
2024-09-11 11:53:12 +05:30
}
}
2025-11-29 12:43:19 +05:30
function withTimeout < T > ( promise : Promise < T > , timeoutMs : number , operation : string ) : Promise < T > {
return Promise . race ( [
promise ,
new Promise < T > ( ( _ , reject ) = >
setTimeout ( ( ) = > reject ( new Error ( ` ${ operation } timed out after ${ timeoutMs } ms ` ) ) , timeoutMs )
)
] ) ;
}
2025-10-21 00:43:08 +05:30
async function triggerIntegrationUpdates ( runId : string , robotMetaId : string ) : Promise < void > {
try {
2025-11-29 12:43:19 +05:30
addGoogleSheetUpdateTask ( runId , {
2025-10-21 00:43:08 +05:30
robotId : robotMetaId ,
runId : runId ,
status : 'pending' ,
retries : 5 ,
2025-11-29 12:43:19 +05:30
} ) ;
2025-10-21 00:43:08 +05:30
2025-11-29 12:43:19 +05:30
addAirtableUpdateTask ( runId , {
2025-10-21 00:43:08 +05:30
robotId : robotMetaId ,
runId : runId ,
status : 'pending' ,
retries : 5 ,
2025-11-29 12:43:19 +05:30
} ) ;
2025-10-21 00:43:08 +05:30
2025-11-29 12:43:19 +05:30
withTimeout ( processAirtableUpdates ( ) , 65000 , 'Airtable update' )
. catch ( err = > logger . log ( 'error' , ` Airtable update error: ${ err . message } ` ) ) ;
withTimeout ( processGoogleSheetUpdates ( ) , 65000 , 'Google Sheets update' )
. catch ( err = > logger . log ( 'error' , ` Google Sheets update error: ${ err . message } ` ) ) ;
2025-10-21 00:43:08 +05:30
} catch ( err : any ) {
logger . log ( 'error' , ` Failed to update integrations for run: ${ runId } : ${ err . message } ` ) ;
}
}
2024-12-08 18:10:53 +05:30
function AddGeneratedFlags ( workflow : WorkflowFile ) {
const copy = JSON . parse ( JSON . stringify ( workflow ) ) ;
for ( let i = 0 ; i < workflow . workflow . length ; i ++ ) {
copy . workflow [ i ] . what . unshift ( {
action : 'flag' ,
args : [ 'generated' ] ,
} ) ;
}
return copy ;
} ;
2025-03-06 02:41:11 +05:30
async function executeRun ( id : string , userId : string ) {
2025-10-21 00:43:08 +05:30
let browser : any = null ;
2024-09-12 00:57:01 +05:30
try {
2024-10-10 03:01:13 +05:30
const run = await Run . findOne ( { where : { runId : id } } ) ;
if ( ! run ) {
return {
success : false ,
error : 'Run not found'
}
}
2024-09-12 00:57:01 +05:30
2024-10-10 03:01:13 +05:30
const plainRun = run . toJSON ( ) ;
2024-09-12 00:57:01 +05:30
2025-09-10 00:17:04 +05:30
if ( run . status === 'aborted' || run . status === 'aborting' ) {
logger . log ( 'info' , ` Scheduled Run ${ id } has status ${ run . status } , skipping execution ` ) ;
return {
success : false ,
error : ` Run has status ${ run . status } `
}
}
if ( run . status === 'queued' ) {
logger . log ( 'info' , ` Scheduled Run ${ id } has status 'queued', skipping stale execution - will be handled by recovery ` ) ;
return {
success : false ,
error : 'Run is queued and will be handled by recovery'
}
}
const retryCount = plainRun . retryCount || 0 ;
if ( retryCount >= 3 ) {
logger . log ( 'warn' , ` Scheduled Run ${ id } has exceeded max retries ( ${ retryCount } /3), marking as failed ` ) ;
const recording = await Robot . findOne ( { where : { 'recording_meta.id' : plainRun . robotMetaId , userId } , raw : true } ) ;
await run . update ( {
status : 'failed' ,
finishedAt : new Date ( ) . toLocaleString ( ) ,
log : plainRun.log ? ` ${ plainRun . log } \ nMax retries exceeded (3/3) - Run failed after multiple attempts. ` : ` Max retries exceeded (3/3) - Run failed after multiple attempts. `
} ) ;
2025-10-21 00:43:08 +05:30
try {
const failureSocketData = {
runId : plainRun.runId ,
robotMetaId : plainRun.robotMetaId ,
robotName : recording ? recording . recording_meta . name : 'Unknown Robot' ,
status : 'failed' ,
finishedAt : new Date ( ) . toLocaleString ( )
} ;
serverIo . of ( run . browserId ) . emit ( 'run-completed' , failureSocketData ) ;
serverIo . of ( '/queued-run' ) . to ( ` user- ${ userId } ` ) . emit ( 'run-completed' , failureSocketData ) ;
} catch ( socketError : any ) {
logger . log ( 'warn' , ` Failed to emit failure event in main catch: ${ socketError . message } ` ) ;
}
2025-09-10 00:17:04 +05:30
return {
success : false ,
error : 'Max retries exceeded'
}
}
2024-10-10 03:01:13 +05:30
const recording = await Robot . findOne ( { where : { 'recording_meta.id' : plainRun . robotMetaId } , raw : true } ) ;
if ( ! recording ) {
return {
success : false ,
error : 'Recording not found'
}
}
2024-09-12 00:57:01 +05:30
2025-11-21 13:21:18 +05:30
browser = browserPool . getRemoteBrowser ( plainRun . browserId ) ;
if ( ! browser ) {
throw new Error ( 'Could not access browser' ) ;
}
let currentPage = await browser . getCurrentPage ( ) ;
if ( ! currentPage ) {
throw new Error ( 'Could not create a new page' ) ;
}
2025-11-20 18:49:39 +05:30
if ( recording . recording_meta . type === 'scrape' ) {
logger . log ( 'info' , ` Executing scrape robot for scheduled run ${ id } ` ) ;
const formats = recording . recording_meta . formats || [ 'markdown' ] ;
2025-11-20 13:19:12 +05:30
await run . update ( {
status : 'running' ,
2025-11-20 18:49:39 +05:30
log : ` Converting page to: ${ formats . join ( ', ' ) } `
2025-11-20 13:19:12 +05:30
} ) ;
try {
const runStartedData = {
runId : plainRun.runId ,
robotMetaId : plainRun.robotMetaId ,
robotName : recording.recording_meta.name ,
status : 'running' ,
startedAt : plainRun.startedAt
} ;
serverIo . of ( '/queued-run' ) . to ( ` user- ${ userId } ` ) . emit ( 'run-started' , runStartedData ) ;
2025-11-20 18:49:39 +05:30
logger . log (
'info' ,
` Markdown robot run started notification sent for run: ${ plainRun . runId } to user- ${ userId } `
) ;
2025-11-20 13:19:12 +05:30
} catch ( socketError : any ) {
2025-11-20 18:49:39 +05:30
logger . log (
'warn' ,
` Failed to send run-started notification for markdown robot run ${ plainRun . runId } : ${ socketError . message } `
) ;
2025-11-20 13:19:12 +05:30
}
try {
const url = recording . recording_meta . url ;
if ( ! url ) {
throw new Error ( 'No URL specified for markdown robot' ) ;
}
2025-11-20 18:49:39 +05:30
let markdown = '' ;
let html = '' ;
const serializableOutput : any = { } ;
2025-12-05 21:58:23 +05:30
const binaryOutput : any = { } ;
2025-11-20 18:49:39 +05:30
2025-11-29 12:43:19 +05:30
const SCRAPE_TIMEOUT = 120000 ;
2025-11-20 18:49:39 +05:30
// Markdown conversion
2025-11-29 12:43:19 +05:30
if ( formats . includes ( "markdown" ) ) {
2025-11-30 19:45:54 +05:30
const markdownPromise = convertPageToMarkdown ( url , currentPage ) ;
2025-11-29 12:43:19 +05:30
const timeoutPromise = new Promise < never > ( ( _ , reject ) = > {
setTimeout ( ( ) = > reject ( new Error ( ` Markdown conversion timed out after ${ SCRAPE_TIMEOUT / 1000 } s ` ) ) , SCRAPE_TIMEOUT ) ;
} ) ;
markdown = await Promise . race ( [ markdownPromise , timeoutPromise ] ) ;
2025-11-20 18:49:39 +05:30
serializableOutput . markdown = [ { content : markdown } ] ;
}
2025-11-29 12:43:19 +05:30
if ( formats . includes ( "html" ) ) {
2025-11-30 19:45:54 +05:30
const htmlPromise = convertPageToHTML ( url , currentPage ) ;
2025-11-29 12:43:19 +05:30
const timeoutPromise = new Promise < never > ( ( _ , reject ) = > {
setTimeout ( ( ) = > reject ( new Error ( ` HTML conversion timed out after ${ SCRAPE_TIMEOUT / 1000 } s ` ) ) , SCRAPE_TIMEOUT ) ;
} ) ;
html = await Promise . race ( [ htmlPromise , timeoutPromise ] ) ;
2025-11-20 18:49:39 +05:30
serializableOutput . html = [ { content : html } ] ;
}
2025-11-20 13:19:12 +05:30
2025-12-05 21:58:23 +05:30
if ( formats . includes ( "screenshot-visible" ) ) {
const screenshotPromise = convertPageToScreenshot ( url , currentPage , false ) ;
const timeoutPromise = new Promise < never > ( ( _ , reject ) = > {
setTimeout ( ( ) = > reject ( new Error ( ` Screenshot conversion timed out after ${ SCRAPE_TIMEOUT / 1000 } s ` ) ) , SCRAPE_TIMEOUT ) ;
} ) ;
const screenshotBuffer = await Promise . race ( [ screenshotPromise , timeoutPromise ] ) ;
if ( ! binaryOutput [ 'screenshot-visible' ] ) {
binaryOutput [ 'screenshot-visible' ] = {
data : screenshotBuffer.toString ( 'base64' ) ,
mimeType : 'image/png'
} ;
}
}
// Screenshot - full page
if ( formats . includes ( "screenshot-fullpage" ) ) {
const screenshotPromise = convertPageToScreenshot ( url , currentPage , true ) ;
const timeoutPromise = new Promise < never > ( ( _ , reject ) = > {
setTimeout ( ( ) = > reject ( new Error ( ` Screenshot conversion timed out after ${ SCRAPE_TIMEOUT / 1000 } s ` ) ) , SCRAPE_TIMEOUT ) ;
} ) ;
const screenshotBuffer = await Promise . race ( [ screenshotPromise , timeoutPromise ] ) ;
if ( ! binaryOutput [ 'screenshot-fullpage' ] ) {
binaryOutput [ 'screenshot-fullpage' ] = {
data : screenshotBuffer.toString ( 'base64' ) ,
mimeType : 'image/png'
} ;
}
}
2025-11-20 13:19:12 +05:30
await run . update ( {
status : 'success' ,
finishedAt : new Date ( ) . toLocaleString ( ) ,
2025-11-20 18:49:39 +05:30
log : ` ${ formats . join ( ', ' ) } conversion completed successfully ` ,
serializableOutput ,
2025-12-05 21:58:23 +05:30
binaryOutput ,
2025-11-20 13:19:12 +05:30
} ) ;
2025-12-05 21:58:23 +05:30
let uploadedBinaryOutput : Record < string , string > = { } ;
if ( Object . keys ( binaryOutput ) . length > 0 ) {
const binaryOutputService = new BinaryOutputService ( 'maxun-run-screenshots' ) ;
uploadedBinaryOutput = await binaryOutputService . uploadAndStoreBinaryOutput ( run , binaryOutput ) ;
await run . update ( { binaryOutput : uploadedBinaryOutput } ) ;
}
2025-11-20 13:19:12 +05:30
logger . log ( 'info' , ` Markdown robot execution completed for scheduled run ${ id } ` ) ;
2025-11-20 18:49:39 +05:30
// Run-completed socket notifications
2025-11-20 13:19:12 +05:30
try {
const completionData = {
runId : plainRun.runId ,
robotMetaId : plainRun.robotMetaId ,
robotName : recording.recording_meta.name ,
status : 'success' ,
finishedAt : new Date ( ) . toLocaleString ( )
} ;
serverIo . of ( plainRun . browserId ) . emit ( 'run-completed' , completionData ) ;
serverIo . of ( '/queued-run' ) . to ( ` user- ${ userId } ` ) . emit ( 'run-completed' , completionData ) ;
} catch ( socketError : any ) {
2025-11-20 18:49:39 +05:30
logger . log (
'warn' ,
` Failed to send run-completed notification for markdown robot run ${ id } : ${ socketError . message } `
) ;
2025-11-20 13:19:12 +05:30
}
2025-11-20 18:49:39 +05:30
// Webhook payload
const webhookPayload : any = {
2025-11-20 13:19:12 +05:30
robot_id : plainRun.robotMetaId ,
run_id : plainRun.runId ,
robot_name : recording.recording_meta.name ,
status : 'success' ,
started_at : plainRun.startedAt ,
finished_at : new Date ( ) . toLocaleString ( ) ,
metadata : {
browser_id : plainRun.browserId ,
user_id : userId ,
}
} ;
2025-11-20 18:49:39 +05:30
if ( formats . includes ( 'markdown' ) ) webhookPayload . markdown = markdown ;
if ( formats . includes ( 'html' ) ) webhookPayload . html = html ;
2025-12-05 21:58:23 +05:30
if ( uploadedBinaryOutput [ 'screenshot-visible' ] ) webhookPayload . screenshot_visible = uploadedBinaryOutput [ 'screenshot-visible' ] ;
if ( uploadedBinaryOutput [ 'screenshot-fullpage' ] ) webhookPayload . screenshot_fullpage = uploadedBinaryOutput [ 'screenshot-fullpage' ] ;
2025-11-20 18:49:39 +05:30
2025-11-20 13:19:12 +05:30
try {
await sendWebhook ( plainRun . robotMetaId , 'run_completed' , webhookPayload ) ;
2025-11-20 18:49:39 +05:30
logger . log (
'info' ,
` Webhooks sent successfully for markdown robot scheduled run ${ plainRun . runId } `
) ;
2025-11-20 13:19:12 +05:30
} catch ( webhookError : any ) {
2025-11-20 18:49:39 +05:30
logger . log (
'warn' ,
` Failed to send webhooks for markdown robot run ${ plainRun . runId } : ${ webhookError . message } `
) ;
2025-11-20 13:19:12 +05:30
}
2025-11-20 19:40:48 +05:30
capture ( "maxun-oss-run-created-scheduled" , {
runId : plainRun.runId ,
user_id : userId ,
status : "success" ,
robot_type : "scrape" ,
formats
} ) ;
2025-11-20 13:19:12 +05:30
await destroyRemoteBrowser ( plainRun . browserId , userId ) ;
return true ;
2025-11-20 18:49:39 +05:30
2025-11-20 13:19:12 +05:30
} catch ( error : any ) {
2025-11-20 18:49:39 +05:30
logger . log ( 'error' , ` ${ formats . join ( ', ' ) } conversion failed for scheduled run ${ id } : ${ error . message } ` ) ;
2025-11-20 13:19:12 +05:30
await run . update ( {
status : 'failed' ,
finishedAt : new Date ( ) . toLocaleString ( ) ,
2025-11-20 18:49:39 +05:30
log : ` ${ formats . join ( ', ' ) } conversion failed: ${ error . message } ` ,
2025-11-20 13:19:12 +05:30
} ) ;
try {
const failureData = {
runId : plainRun.runId ,
robotMetaId : plainRun.robotMetaId ,
robotName : recording.recording_meta.name ,
status : 'failed' ,
finishedAt : new Date ( ) . toLocaleString ( )
} ;
serverIo . of ( plainRun . browserId ) . emit ( 'run-completed' , failureData ) ;
serverIo . of ( '/queued-run' ) . to ( ` user- ${ userId } ` ) . emit ( 'run-completed' , failureData ) ;
} catch ( socketError : any ) {
2025-11-20 18:49:39 +05:30
logger . log (
'warn' ,
` Failed to send run-failed notification for markdown robot run ${ id } : ${ socketError . message } `
) ;
2025-11-20 13:19:12 +05:30
}
2025-11-20 19:40:48 +05:30
capture ( "maxun-oss-run-created-scheduled" , {
runId : plainRun.runId ,
user_id : userId ,
status : "failed" ,
robot_type : "scrape" ,
formats
} ) ;
2025-11-20 13:19:12 +05:30
await destroyRemoteBrowser ( plainRun . browserId , userId ) ;
throw error ;
}
}
2024-10-10 03:01:13 +05:30
plainRun . status = 'running' ;
2025-10-21 00:43:08 +05:30
try {
const runStartedData = {
runId : plainRun.runId ,
robotMetaId : plainRun.robotMetaId ,
robotName : recording ? recording . recording_meta . name : 'Unknown Robot' ,
status : 'running' ,
startedAt : plainRun.startedAt
} ;
2025-11-20 13:19:12 +05:30
2025-10-21 00:43:08 +05:30
serverIo . of ( '/queued-run' ) . to ( ` user- ${ userId } ` ) . emit ( 'run-started' , runStartedData ) ;
logger . log ( 'info' , ` Run started notification sent for run: ${ plainRun . runId } to user- ${ userId } ` ) ;
} catch ( socketError : any ) {
logger . log ( 'warn' , ` Failed to send run-started notification for run ${ plainRun . runId } : ${ socketError . message } ` ) ;
}
2024-12-08 18:10:53 +05:30
const workflow = AddGeneratedFlags ( recording . recording ) ;
2025-09-10 00:17:04 +05:30
// Set run ID for real-time data persistence
browser . interpreter . setRunId ( id ) ;
2025-11-29 12:43:19 +05:30
const INTERPRETATION_TIMEOUT = 600000 ;
const interpretationPromise = browser . interpreter . InterpretRecording (
2024-12-08 18:10:53 +05:30
workflow , currentPage , ( newPage : Page ) = > currentPage = newPage , plainRun . interpreterSettings
) ;
2024-10-15 22:22:14 +05:30
2025-11-29 12:43:19 +05:30
const timeoutPromise = new Promise < never > ( ( _ , reject ) = > {
setTimeout ( ( ) = > reject ( new Error ( ` Workflow interpretation timed out after ${ INTERPRETATION_TIMEOUT / 1000 } s ` ) ) , INTERPRETATION_TIMEOUT ) ;
} ) ;
const interpretationInfo = await Promise . race ( [ interpretationPromise , timeoutPromise ] ) ;
2025-10-21 00:43:08 +05:30
const binaryOutputService = new BinaryOutputService ( 'maxun-run-screenshots' ) ;
const uploadedBinaryOutput = await binaryOutputService . uploadAndStoreBinaryOutput ( run , interpretationInfo . binaryOutput ) ;
const finalRun = await Run . findByPk ( run . id ) ;
const categorizedOutput = {
scrapeSchema : finalRun?.serializableOutput?.scrapeSchema || { } ,
scrapeList : finalRun?.serializableOutput?.scrapeList || { } ,
2026-01-02 15:46:10 +05:30
crawl : finalRun?.serializableOutput?.crawl || { } ,
search : finalRun?.serializableOutput?.search || { }
2025-10-21 00:43:08 +05:30
} ;
2025-03-08 17:09:33 +05:30
await destroyRemoteBrowser ( plainRun . browserId , userId ) ;
2024-09-12 00:57:01 +05:30
2024-10-10 03:01:13 +05:30
await run . update ( {
2024-09-19 19:38:31 +05:30
status : 'success' ,
2024-09-12 00:57:01 +05:30
finishedAt : new Date ( ) . toLocaleString ( ) ,
log : interpretationInfo.log.join ( '\n' ) ,
2025-10-21 00:43:08 +05:30
binaryOutput : uploadedBinaryOutput
2024-10-10 03:01:13 +05:30
} ) ;
2024-09-12 00:57:01 +05:30
2025-09-10 00:17:04 +05:30
// Get metrics from persisted data for analytics and webhooks
2025-05-27 23:37:36 +05:30
let totalSchemaItemsExtracted = 0 ;
let totalListItemsExtracted = 0 ;
2024-10-29 04:26:31 +05:30
let extractedScreenshotsCount = 0 ;
2025-05-27 23:37:36 +05:30
2025-10-21 00:43:08 +05:30
if ( categorizedOutput ) {
if ( categorizedOutput . scrapeSchema ) {
Object . values ( categorizedOutput . scrapeSchema ) . forEach ( ( schemaResult : any ) = > {
if ( Array . isArray ( schemaResult ) ) {
totalSchemaItemsExtracted += schemaResult . length ;
} else if ( schemaResult && typeof schemaResult === 'object' ) {
totalSchemaItemsExtracted += 1 ;
}
} ) ;
2025-09-10 00:17:04 +05:30
}
2025-10-21 00:43:08 +05:30
if ( categorizedOutput . scrapeList ) {
Object . values ( categorizedOutput . scrapeList ) . forEach ( ( listResult : any ) = > {
if ( Array . isArray ( listResult ) ) {
totalListItemsExtracted += listResult . length ;
}
} ) ;
2025-09-10 00:17:04 +05:30
}
2025-05-27 23:37:36 +05:30
}
2025-10-21 00:43:08 +05:30
if ( run . binaryOutput ) {
extractedScreenshotsCount = Object . keys ( run . binaryOutput ) . length ;
}
2025-05-27 23:37:36 +05:30
const totalRowsExtracted = totalSchemaItemsExtracted + totalListItemsExtracted ;
2024-10-28 04:49:54 +05:30
2024-10-29 03:46:13 +05:30
capture (
'maxun-oss-run-created-scheduled' ,
{
2024-10-28 04:17:17 +05:30
runId : id ,
created_at : new Date ( ) . toISOString ( ) ,
status : 'success' ,
2024-10-29 04:26:19 +05:30
totalRowsExtracted ,
2025-05-27 23:37:36 +05:30
schemaItemsExtracted : totalSchemaItemsExtracted ,
listItemsExtracted : totalListItemsExtracted ,
2024-10-29 04:26:31 +05:30
extractedScreenshotsCount ,
2025-12-12 02:54:30 +05:30
is_llm : ( recording . recording_meta as any ) . isLLM ,
2024-10-28 04:17:17 +05:30
}
2024-10-29 03:46:13 +05:30
) ;
2024-10-28 04:17:17 +05:30
2025-10-21 00:43:08 +05:30
try {
const completionData = {
runId : plainRun.runId ,
robotMetaId : plainRun.robotMetaId ,
robotName : recording.recording_meta.name ,
status : 'success' ,
finishedAt : new Date ( ) . toLocaleString ( )
} ;
serverIo . of ( plainRun . browserId ) . emit ( 'run-completed' , completionData ) ;
serverIo . of ( '/queued-run' ) . to ( ` user- ${ userId } ` ) . emit ( 'run-completed' , completionData ) ;
} catch ( emitError : any ) {
logger . log ( 'warn' , ` Failed to emit success event: ${ emitError . message } ` ) ;
}
2025-05-27 23:37:36 +05:30
const webhookPayload = {
robot_id : plainRun.robotMetaId ,
run_id : plainRun.runId ,
robot_name : recording.recording_meta.name ,
status : 'success' ,
started_at : plainRun.startedAt ,
finished_at : new Date ( ) . toLocaleString ( ) ,
extracted_data : {
2025-10-21 00:43:08 +05:30
captured_texts : Object.keys ( categorizedOutput . scrapeSchema || { } ) . length > 0
? Object . entries ( categorizedOutput . scrapeSchema ) . reduce ( ( acc , [ name , value ] ) = > {
acc [ name ] = Array . isArray ( value ) ? value : [ value ] ;
return acc ;
} , { } as Record < string , any [ ] > )
: { } ,
captured_lists : categorizedOutput.scrapeList ,
2026-01-02 15:46:10 +05:30
crawl_data : categorizedOutput.crawl ,
search_data : categorizedOutput.search ,
2025-05-27 23:37:36 +05:30
captured_texts_count : totalSchemaItemsExtracted ,
captured_lists_count : totalListItemsExtracted ,
screenshots_count : extractedScreenshotsCount
} ,
metadata : {
browser_id : plainRun.browserId ,
2025-10-21 00:43:08 +05:30
user_id : userId ,
2025-05-27 23:37:36 +05:30
}
2024-09-19 19:41:19 +05:30
} ;
2025-05-27 23:37:36 +05:30
try {
await sendWebhook ( plainRun . robotMetaId , 'run_completed' , webhookPayload ) ;
logger . log ( 'info' , ` Webhooks sent successfully for completed run ${ plainRun . runId } ` ) ;
} catch ( webhookError : any ) {
logger . log ( 'error' , ` Failed to send webhooks for run ${ plainRun . runId } : ${ webhookError . message } ` ) ;
}
2025-10-21 00:43:08 +05:30
await triggerIntegrationUpdates ( plainRun . runId , plainRun . robotMetaId ) ;
2024-09-12 19:35:27 +05:30
return true ;
2024-09-12 00:57:01 +05:30
} catch ( error : any ) {
2024-11-29 22:13:45 +05:30
logger . log ( 'info' , ` Error while running a robot with id: ${ id } - ${ error . message } ` ) ;
2024-10-28 04:17:17 +05:30
const run = await Run . findOne ( { where : { runId : id } } ) ;
if ( run ) {
2025-11-29 12:43:19 +05:30
if ( browser ) {
try {
if ( browser . interpreter ) {
await browser . interpreter . clearState ( ) ;
}
await destroyRemoteBrowser ( run . browserId , userId ) ;
} catch ( cleanupError : any ) {
logger . error ( ` Failed to cleanup browser in error handler: ${ cleanupError . message } ` ) ;
}
}
2024-10-28 04:17:17 +05:30
await run . update ( {
status : 'failed' ,
finishedAt : new Date ( ) . toLocaleString ( ) ,
} ) ;
2025-05-27 23:37:36 +05:30
2025-05-27 23:46:56 +05:30
const recording = await Robot . findOne ( { where : { 'recording_meta.id' : run . robotMetaId } , raw : true } ) ;
2025-05-27 23:37:36 +05:30
// Trigger webhooks for run failure
const failedWebhookPayload = {
robot_id : run.robotMetaId ,
run_id : run.runId ,
2025-05-27 23:46:56 +05:30
robot_name : recording ? recording . recording_meta . name : 'Unknown Robot' ,
2025-05-27 23:37:36 +05:30
status : 'failed' ,
started_at : run.startedAt ,
finished_at : new Date ( ) . toLocaleString ( ) ,
error : {
2025-05-27 23:46:56 +05:30
message : error.message ,
stack : error.stack ,
type : error . name || 'ExecutionError'
2025-05-27 23:37:36 +05:30
} ,
metadata : {
browser_id : run.browserId ,
user_id : userId ,
}
} ;
try {
await sendWebhook ( run . robotMetaId , 'run_failed' , failedWebhookPayload ) ;
logger . log ( 'info' , ` Failure webhooks sent successfully for run ${ run . runId } ` ) ;
} catch ( webhookError : any ) {
logger . log ( 'error' , ` Failed to send failure webhooks for run ${ run . runId } : ${ webhookError . message } ` ) ;
}
2025-10-21 00:43:08 +05:30
try {
const failureSocketData = {
runId : run.runId ,
robotMetaId : run.robotMetaId ,
robotName : recording ? recording . recording_meta . name : 'Unknown Robot' ,
status : 'failed' ,
finishedAt : new Date ( ) . toLocaleString ( )
} ;
serverIo . of ( run . browserId ) . emit ( 'run-completed' , failureSocketData ) ;
serverIo . of ( '/queued-run' ) . to ( ` user- ${ userId } ` ) . emit ( 'run-completed' , failureSocketData ) ;
} catch ( socketError : any ) {
logger . log ( 'warn' , ` Failed to emit failure event in main catch: ${ socketError . message } ` ) ;
}
2025-12-12 02:54:30 +05:30
capture (
'maxun-oss-run-created-scheduled' ,
{
runId : id ,
created_at : new Date ( ) . toISOString ( ) ,
status : 'failed' ,
is_llm : ( recording ? . recording_meta as any ) ? . isLLM ,
}
) ;
2024-10-28 04:17:17 +05:30
}
2024-09-12 19:35:27 +05:30
return false ;
2024-09-12 00:57:01 +05:30
}
}
2024-09-12 21:01:46 +05:30
2025-11-29 12:43:19 +05:30
async function readyForRunHandler ( browserId : string , id : string , userId : string , socket : Socket ) {
2024-09-12 21:02:07 +05:30
try {
2025-03-06 02:41:11 +05:30
const interpretation = await executeRun ( id , userId ) ;
2024-09-12 21:02:07 +05:30
if ( interpretation ) {
2024-10-10 03:03:14 +05:30
logger . log ( 'info' , ` Interpretation of ${ id } succeeded ` ) ;
2024-09-12 21:02:07 +05:30
} else {
2024-10-10 03:03:14 +05:30
logger . log ( 'error' , ` Interpretation of ${ id } failed ` ) ;
2025-03-08 17:09:33 +05:30
await destroyRemoteBrowser ( browserId , userId ) ;
2024-09-12 21:02:07 +05:30
}
2024-10-10 03:03:14 +05:30
resetRecordingState ( browserId , id ) ;
2024-09-12 21:02:07 +05:30
} catch ( error : any ) {
2024-09-12 21:08:36 +05:30
logger . error ( ` Error during readyForRunHandler: ${ error . message } ` ) ;
2025-03-08 17:09:33 +05:30
await destroyRemoteBrowser ( browserId , userId ) ;
2025-11-29 12:43:19 +05:30
} finally {
cleanupSocketConnection ( socket , browserId , id ) ;
2024-09-12 21:02:07 +05:30
}
}
2024-10-10 03:03:14 +05:30
function resetRecordingState ( browserId : string , id : string ) {
2024-09-12 21:02:27 +05:30
browserId = '' ;
2024-10-10 03:03:14 +05:30
id = '' ;
2024-09-12 21:02:27 +05:30
}
2024-09-12 21:01:46 +05:30
2024-10-10 03:22:04 +05:30
export async function handleRunRecording ( id : string , userId : string ) {
2025-11-29 12:43:19 +05:30
let socket : Socket | null = null ;
2024-09-12 21:01:46 +05:30
try {
2024-10-12 15:44:22 +05:30
const result = await createWorkflowAndStoreMetadata ( id , userId ) ;
2024-09-12 21:01:46 +05:30
const { browserId , runId : newRunId } = result ;
2024-10-10 03:26:12 +05:30
if ( ! browserId || ! newRunId || ! userId ) {
throw new Error ( 'browserId or runId or userId is undefined' ) ;
2024-09-12 21:01:46 +05:30
}
2025-11-29 12:43:19 +05:30
const CONNECTION_TIMEOUT = 30000 ;
socket = io ( ` ${ process . env . BACKEND_URL ? process . env . BACKEND_URL : 'http://localhost:5000' } / ${ browserId } ` , {
2024-09-12 21:01:46 +05:30
transports : [ 'websocket' ] ,
2025-11-29 12:43:19 +05:30
rejectUnauthorized : false ,
timeout : CONNECTION_TIMEOUT ,
2024-09-12 21:01:46 +05:30
} ) ;
2025-11-29 12:43:19 +05:30
const readyHandler = ( ) = > readyForRunHandler ( browserId , newRunId , userId , socket ! ) ;
2024-09-12 21:01:46 +05:30
2025-11-29 12:43:19 +05:30
socket . on ( 'ready-for-run' , readyHandler ) ;
socket . on ( 'connect_error' , ( error : Error ) = > {
logger . error ( ` Socket connection error for scheduled run ${ newRunId } : ${ error . message } ` ) ;
cleanupSocketConnection ( socket ! , browserId , newRunId ) ;
} ) ;
2024-09-12 21:01:46 +05:30
socket . on ( 'disconnect' , ( ) = > {
2025-11-29 12:43:19 +05:30
cleanupSocketConnection ( socket ! , browserId , newRunId ) ;
2024-09-12 21:01:46 +05:30
} ) ;
2025-11-29 12:43:19 +05:30
logger . log ( 'info' , ` Running robot: ${ id } ` ) ;
2024-09-12 21:01:46 +05:30
} catch ( error : any ) {
2024-09-12 21:08:36 +05:30
logger . error ( 'Error running recording:' , error ) ;
2025-11-29 12:43:19 +05:30
if ( socket ) {
cleanupSocketConnection ( socket , '' , '' ) ;
}
2024-09-12 21:01:46 +05:30
}
}
2025-11-29 12:43:19 +05:30
function cleanupSocketConnection ( socket : Socket , browserId : string , id : string ) {
try {
socket . removeAllListeners ( ) ;
socket . disconnect ( ) ;
if ( browserId ) {
const namespace = serverIo . of ( browserId ) ;
namespace . removeAllListeners ( ) ;
namespace . disconnectSockets ( true ) ;
const nsps = ( serverIo as any ) . _nsps ;
if ( nsps && nsps . has ( ` / ${ browserId } ` ) ) {
nsps . delete ( ` / ${ browserId } ` ) ;
logger . log ( 'debug' , ` Deleted namespace / ${ browserId } from io._nsps Map ` ) ;
}
}
logger . log ( 'info' , ` Cleaned up socket connection for browserId: ${ browserId } , runId: ${ id } ` ) ;
} catch ( error : any ) {
logger . error ( ` Error cleaning up socket connection: ${ error . message } ` ) ;
}
2024-09-12 21:01:46 +05:30
}
2024-10-12 15:44:22 +05:30
export { createWorkflowAndStoreMetadata } ;