From 0d45d1d7f1f4bbc4ee0e2eedd832797a050a86cf Mon Sep 17 00:00:00 2001 From: Rohit Rajan Date: Thu, 20 Nov 2025 13:19:12 +0530 Subject: [PATCH] feat: markdownify manual, scheduled, api runs --- server/src/api/record.ts | 110 ++++++++++++++++- server/src/pgboss-worker.ts | 95 +++++++++++++- .../workflow-management/scheduler/index.ts | 116 +++++++++++++++++- 3 files changed, 316 insertions(+), 5 deletions(-) diff --git a/server/src/api/record.ts b/server/src/api/record.ts index 29d1f261..f55e2b3f 100644 --- a/server/src/api/record.ts +++ b/server/src/api/record.ts @@ -344,7 +344,8 @@ function formatRunResponse(run: any) { runByAPI: run.runByAPI, data: { textData: {}, - listData: {} + listData: {}, + markdown: '' }, screenshots: [] as any[], }; @@ -359,6 +360,10 @@ function formatRunResponse(run: any) { formattedRun.data.listData = output.scrapeList; } + if (output.markdown && Array.isArray(output.markdown)) { + formattedRun.data.markdown = output.markdown[0]?.content || ''; + } + if (run.binaryOutput) { Object.keys(run.binaryOutput).forEach(key => { if (run.binaryOutput[key]) { @@ -651,6 +656,106 @@ async function executeRun(id: string, userId: string) { }; } + if (recording.recording_meta.type === 'markdown') { + logger.log('info', `Executing markdown robot for API run ${id}`); + + await run.update({ + status: 'running', + log: 'Converting page to markdown' + }); + + try { + const { convertPageToMarkdown } = await import('../markdownify/scrape'); + const url = recording.recording_meta.url; + + if (!url) { + throw new Error('No URL specified for markdown robot'); + } + + const markdown = await convertPageToMarkdown(url); + + await run.update({ + status: 'success', + finishedAt: new Date().toLocaleString(), + log: 'Markdown conversion completed successfully', + serializableOutput: { + markdown: [{ content: markdown }] + }, + binaryOutput: {}, + }); + + logger.log('info', `Markdown robot execution completed for API run ${id}`); + + try { + const completionData = { + runId: plainRun.runId, + robotMetaId: plainRun.robotMetaId, + robotName: recording.recording_meta.name, + status: 'success', + finishedAt: new Date().toLocaleString() + }; + + serverIo.of('/queued-run').to(`user-${userId}`).emit('run-completed', completionData); + } catch (socketError: any) { + logger.log('warn', `Failed to send run-completed notification for markdown robot run ${id}: ${socketError.message}`); + } + + const webhookPayload = { + robot_id: plainRun.robotMetaId, + run_id: plainRun.runId, + robot_name: recording.recording_meta.name, + status: 'success', + started_at: plainRun.startedAt, + finished_at: new Date().toLocaleString(), + markdown: markdown, + metadata: { + browser_id: plainRun.browserId, + user_id: userId, + } + }; + + try { + await sendWebhook(plainRun.robotMetaId, 'run_completed', webhookPayload); + logger.log('info', `Webhooks sent successfully for markdown robot API run ${plainRun.runId}`); + } catch (webhookError: any) { + logger.log('warn', `Failed to send webhooks for markdown robot run ${plainRun.runId}: ${webhookError.message}`); + } + + await destroyRemoteBrowser(plainRun.browserId, userId); + + return { + success: true, + interpretationInfo: run.toJSON() + }; + } catch (error: any) { + logger.log('error', `Markdown conversion failed for API run ${id}: ${error.message}`); + + await run.update({ + status: 'failed', + finishedAt: new Date().toLocaleString(), + log: `Markdown conversion failed: ${error.message}`, + }); + + try { + const failureData = { + runId: plainRun.runId, + robotMetaId: plainRun.robotMetaId, + robotName: recording.recording_meta.name, + status: 'failed', + finishedAt: new Date().toLocaleString() + }; + + serverIo.of('/queued-run').to(`user-${userId}`).emit('run-completed', failureData); + } catch (socketError: any) { + logger.log('warn', `Failed to send run-failed notification for markdown robot run ${id}: ${socketError.message}`); + } + + await destroyRemoteBrowser(plainRun.browserId, userId); + + throw error; + } + } + plainRun.status = 'running'; browser = browserPool.getRemoteBrowser(plainRun.browserId); @@ -889,12 +994,11 @@ async function waitForRunCompletion(runId: string, interval: number = 2000) { if (!run) throw new Error('Run not found'); if (run.status === 'success') { - return run.toJSON(); + return run; } else if (run.status === 'failed') { throw new Error('Run failed'); } - // Wait for the next polling interval await new Promise(resolve => setTimeout(resolve, interval)); } } diff --git a/server/src/pgboss-worker.ts b/server/src/pgboss-worker.ts index b9f41100..0fcd7f65 100644 --- a/server/src/pgboss-worker.ts +++ b/server/src/pgboss-worker.ts @@ -187,7 +187,100 @@ async function processRunExecution(job: Job) { if (!recording) { throw new Error(`Recording for run ${data.runId} not found`); } - + + if (recording.recording_meta.type === 'markdown') { + logger.log('info', `Executing markdown robot for run ${data.runId}`); + + await run.update({ + status: 'running', + log: 'Converting page to markdown' + }); + + try { + const { convertPageToMarkdown } = await import('./markdownify/scrape'); + const url = recording.recording_meta.url; + + if (!url) { + throw new Error('No URL specified for markdown robot'); + } + + const markdown = await convertPageToMarkdown(url); + + await run.update({ + status: 'success', + finishedAt: new Date().toLocaleString(), + log: 'Markdown conversion completed successfully', + serializableOutput: { + markdown: [{ content: markdown }] + }, + binaryOutput: {}, + }); + + logger.log('info', `Markdown robot execution completed for run ${data.runId}`); + + try { + const completionData = { + runId: data.runId, + robotMetaId: plainRun.robotMetaId, + robotName: recording.recording_meta.name, + status: 'success', + finishedAt: new Date().toLocaleString() + }; + + serverIo.of(browserId).emit('run-completed', completionData); + serverIo.of('/queued-run').to(`user-${data.userId}`).emit('run-completed', completionData); + } catch (socketError: any) { + logger.log('warn', `Failed to send run-completed notification for markdown robot run ${data.runId}: ${socketError.message}`); + } + + try { + const webhookPayload = { + runId: data.runId, + robotId: plainRun.robotMetaId, + robotName: recording.recording_meta.name, + status: 'success', + finishedAt: new Date().toLocaleString(), + markdown: markdown + }; + await sendWebhook(plainRun.robotMetaId, 'run_completed', webhookPayload); + logger.log('info', `Webhooks sent successfully for markdown robot run ${data.runId}`); + } catch (webhookError: any) { + logger.log('warn', `Failed to send webhooks for markdown robot run ${data.runId}: ${webhookError.message}`); + } + + await destroyRemoteBrowser(browserId, data.userId); + + return { success: true }; + } catch (error: any) { + logger.log('error', `Markdown conversion failed for run ${data.runId}: ${error.message}`); + + await run.update({ + status: 'failed', + finishedAt: new Date().toLocaleString(), + log: `Markdown conversion failed: ${error.message}`, + }); + + try { + const failureData = { + runId: data.runId, + robotMetaId: plainRun.robotMetaId, + robotName: recording.recording_meta.name, + status: 'failed', + finishedAt: new Date().toLocaleString() + }; + + serverIo.of(browserId).emit('run-completed', failureData); + serverIo.of('/queued-run').to(`user-${data.userId}`).emit('run-completed', failureData); + } catch (socketError: any) { + logger.log('warn', `Failed to send run-failed notification for markdown robot run ${data.runId}: ${socketError.message}`); + } + + await destroyRemoteBrowser(browserId, data.userId); + + throw error; + } + } + const isRunAborted = async (): Promise => { try { const currentRun = await Run.findOne({ where: { runId: data.runId } }); diff --git a/server/src/workflow-management/scheduler/index.ts b/server/src/workflow-management/scheduler/index.ts index 899cb7f6..ba47b3e0 100644 --- a/server/src/workflow-management/scheduler/index.ts +++ b/server/src/workflow-management/scheduler/index.ts @@ -207,6 +207,120 @@ async function executeRun(id: string, userId: string) { } } + if (recording.recording_meta.type === 'markdown') { + logger.log('info', `Executing markdown robot for scheduled run ${id}`); + + await run.update({ + status: 'running', + log: 'Converting page to markdown' + }); + + try { + const runStartedData = { + runId: plainRun.runId, + robotMetaId: plainRun.robotMetaId, + robotName: recording.recording_meta.name, + status: 'running', + startedAt: plainRun.startedAt + }; + + serverIo.of('/queued-run').to(`user-${userId}`).emit('run-started', runStartedData); + logger.log('info', `Markdown robot run started notification sent for run: ${plainRun.runId} to user-${userId}`); + } catch (socketError: any) { + logger.log('warn', `Failed to send run-started notification for markdown robot run ${plainRun.runId}: ${socketError.message}`); + } + + try { + const { convertPageToMarkdown } = await import('../../markdownify/scrape'); + const url = recording.recording_meta.url; + + if (!url) { + throw new Error('No URL specified for markdown robot'); + } + + const markdown = await convertPageToMarkdown(url); + + await run.update({ + status: 'success', + finishedAt: new Date().toLocaleString(), + log: 'Markdown conversion completed successfully', + serializableOutput: { + markdown: [{ content: markdown }] + }, + binaryOutput: {}, + }); + + logger.log('info', `Markdown robot execution completed for scheduled run ${id}`); + + try { + const completionData = { + runId: plainRun.runId, + robotMetaId: plainRun.robotMetaId, + robotName: recording.recording_meta.name, + status: 'success', + finishedAt: new Date().toLocaleString() + }; + + serverIo.of(plainRun.browserId).emit('run-completed', completionData); + serverIo.of('/queued-run').to(`user-${userId}`).emit('run-completed', completionData); + } catch (socketError: any) { + logger.log('warn', `Failed to send run-completed notification for markdown robot run ${id}: ${socketError.message}`); + } + + const webhookPayload = { + robot_id: plainRun.robotMetaId, + run_id: plainRun.runId, + robot_name: recording.recording_meta.name, + status: 'success', + started_at: plainRun.startedAt, + finished_at: new Date().toLocaleString(), + markdown: markdown, + metadata: { + browser_id: plainRun.browserId, + user_id: userId, + } + }; + + try { + await sendWebhook(plainRun.robotMetaId, 'run_completed', webhookPayload); + logger.log('info', `Webhooks sent successfully for markdown robot scheduled run ${plainRun.runId}`); + } catch (webhookError: any) { + logger.log('warn', `Failed to send webhooks for markdown robot run ${plainRun.runId}: ${webhookError.message}`); + } + + await destroyRemoteBrowser(plainRun.browserId, userId); + + return true; + } catch (error: any) { + logger.log('error', `Markdown conversion failed for scheduled run ${id}: ${error.message}`); + + await run.update({ + status: 'failed', + finishedAt: new Date().toLocaleString(), + log: `Markdown conversion failed: ${error.message}`, + }); + + try { + const failureData = { + runId: plainRun.runId, + robotMetaId: plainRun.robotMetaId, + robotName: recording.recording_meta.name, + status: 'failed', + finishedAt: new Date().toLocaleString() + }; + + serverIo.of(plainRun.browserId).emit('run-completed', failureData); + serverIo.of('/queued-run').to(`user-${userId}`).emit('run-completed', failureData); + } catch (socketError: any) { + logger.log('warn', `Failed to send run-failed notification for markdown robot run ${id}: ${socketError.message}`); + } + + await destroyRemoteBrowser(plainRun.browserId, userId); + + throw error; + } + } + plainRun.status = 'running'; try { @@ -217,7 +331,7 @@ async function executeRun(id: string, userId: string) { status: 'running', startedAt: plainRun.startedAt }; - + serverIo.of('/queued-run').to(`user-${userId}`).emit('run-started', runStartedData); logger.log('info', `Run started notification sent for run: ${plainRun.runId} to user-${userId}`); } catch (socketError: any) {