feat: rm post-run data processing

2025-09-10 00:17:04 +05:30
parent 1374591695
commit 8d10bab5bf
3 changed files with 150 additions and 279 deletions
--- a/server/src/api/record.ts
+++ b/server/src/api/record.ts
@@ -597,65 +597,53 @@ async function executeRun(id: string, userId: string) {
        }
        const workflow = AddGeneratedFlags(recording.recording);
        browser.interpreter.setRunId(id);
        const interpretationInfo = await browser.interpreter.InterpretRecording(
            workflow, currentPage, (newPage: Page) => currentPage = newPage, plainRun.interpreterSettings
        );
        const binaryOutputService = new BinaryOutputService('maxun-run-screenshots');
        const uploadedBinaryOutput = await binaryOutputService.uploadAndStoreBinaryOutput(run, interpretationInfo.binaryOutput);
        const categorizedOutput = {
            scrapeSchema: interpretationInfo.scrapeSchemaOutput || {},
            scrapeList: interpretationInfo.scrapeListOutput || {},
        };
        await destroyRemoteBrowser(plainRun.browserId, userId);
        const updatedRun = await run.update({
            ...run,
            status: 'success',
            finishedAt: new Date().toLocaleString(),
            browserId: plainRun.browserId,
            log: interpretationInfo.log.join('\n'),
            serializableOutput: {
                scrapeSchema: Object.values(categorizedOutput.scrapeSchema),
                scrapeList: Object.values(categorizedOutput.scrapeList),
            },
            binaryOutput: uploadedBinaryOutput,
        });
-      let totalSchemaItemsExtracted = 0;
+        let totalSchemaItemsExtracted = 0;
-      let totalListItemsExtracted = 0;
+        let totalListItemsExtracted = 0;
-      let extractedScreenshotsCount = 0;
+        let extractedScreenshotsCount = 0;
-      
+        
-      if (categorizedOutput.scrapeSchema) {
+        const finalRun = await Run.findOne({ where: { runId: id } });
-        Object.values(categorizedOutput.scrapeSchema).forEach((schemaResult: any) => {
+        if (finalRun) {
-          if (Array.isArray(schemaResult)) {
+            if (finalRun.serializableOutput) {
-            totalSchemaItemsExtracted += schemaResult.length;
+                if (finalRun.serializableOutput.scrapeSchema) {
-          } else if (schemaResult && typeof schemaResult === 'object') {
+                    Object.values(finalRun.serializableOutput.scrapeSchema).forEach((schemaResult: any) => {
-            totalSchemaItemsExtracted += 1;
+                        if (Array.isArray(schemaResult)) {
-          }
+                            totalSchemaItemsExtracted += schemaResult.length;
-        });
+                        } else if (schemaResult && typeof schemaResult === 'object') {
-      }
+                            totalSchemaItemsExtracted += 1;
-      
+                        }
-      if (categorizedOutput.scrapeList) {
+                    });
-        Object.values(categorizedOutput.scrapeList).forEach((listResult: any) => {
+                }
-          if (Array.isArray(listResult)) {
+                
-            totalListItemsExtracted += listResult.length;
+                if (finalRun.serializableOutput.scrapeList) {
-          }
+                    Object.values(finalRun.serializableOutput.scrapeList).forEach((listResult: any) => {
-        });
+                        if (Array.isArray(listResult)) {
-      }
+                            totalListItemsExtracted += listResult.length;
-      
+                        }
-      if (uploadedBinaryOutput) {
+                    });
-        extractedScreenshotsCount = Object.keys(uploadedBinaryOutput).length;
+                }
-      }
+            }
-      
+            
-      const totalRowsExtracted = totalSchemaItemsExtracted + totalListItemsExtracted;
+            if (finalRun.binaryOutput) {
-      
+                extractedScreenshotsCount = Object.keys(finalRun.binaryOutput).length;
-      console.log(`Extracted Schema Items Count: ${totalSchemaItemsExtracted}`);
+            }
-      console.log(`Extracted List Items Count: ${totalListItemsExtracted}`);
+        }
-      console.log(`Extracted Screenshots Count: ${extractedScreenshotsCount}`);
+        
-      console.log(`Total Rows Extracted: ${totalRowsExtracted}`);
+        const totalRowsExtracted = totalSchemaItemsExtracted + totalListItemsExtracted;
        capture('maxun-oss-run-created-api',{
                runId: id,
@@ -668,7 +656,6 @@ async function executeRun(id: string, userId: string) {
            }
        )
        // Trigger webhooks for run completion
        const webhookPayload = {
            robot_id: plainRun.robotMetaId,
            run_id: plainRun.runId,
@@ -677,8 +664,8 @@ async function executeRun(id: string, userId: string) {
            started_at: plainRun.startedAt,
            finished_at: new Date().toLocaleString(),
            extracted_data: {
-                captured_texts: Object.values(categorizedOutput.scrapeSchema).flat() || [],
+                captured_texts: finalRun?.serializableOutput?.scrapeSchema ? Object.values(finalRun.serializableOutput.scrapeSchema).flat() : [],
-                captured_lists: categorizedOutput.scrapeList,
+                captured_lists: finalRun?.serializableOutput?.scrapeList || {},
                total_rows: totalRowsExtracted,
                captured_texts_count: totalSchemaItemsExtracted,
                captured_lists_count: totalListItemsExtracted,
--- a/server/src/pgboss-worker.ts
+++ b/server/src/pgboss-worker.ts
@@ -14,11 +14,9 @@ import Run from './models/Run';
 import Robot from './models/Robot';
 import { browserPool } from './server';
 import { Page } from 'playwright';
 import { BinaryOutputService } from './storage/mino';
 import { capture } from './utils/analytics';
 import { googleSheetUpdateTasks, processGoogleSheetUpdates } from './workflow-management/integrations/gsheet';
 import { airtableUpdateTasks, processAirtableUpdates } from './workflow-management/integrations/airtable';
 import { RemoteBrowser } from './browser-management/classes/RemoteBrowser';
 import { io as serverIo } from "./server";
 import { sendWebhook } from './routes/webhook';
@@ -85,107 +83,6 @@ function AddGeneratedFlags(workflow: WorkflowFile) {
  return copy;
 };
 /**
 * Helper function to extract and process scraped data from browser interpreter
 */
 async function extractAndProcessScrapedData(
  browser: RemoteBrowser, 
  run: any
 ): Promise<{
  categorizedOutput: any;
  uploadedBinaryOutput: any;
  totalDataPointsExtracted: number;
  totalSchemaItemsExtracted: number;
  totalListItemsExtracted: number;
  extractedScreenshotsCount: number;
 }> {
  let categorizedOutput: {
    scrapeSchema: Record<string, any>;
    scrapeList: Record<string, any>;
  } = {
    scrapeSchema: {},
    scrapeList: {}
  };
  if ((browser?.interpreter?.serializableDataByType?.scrapeSchema ?? []).length > 0) {
    browser?.interpreter?.serializableDataByType?.scrapeSchema?.forEach((schemaItem: any, index: any) => {
      categorizedOutput.scrapeSchema[`schema-${index}`] = schemaItem;
    });
  }
  if ((browser?.interpreter?.serializableDataByType?.scrapeList ?? []).length > 0) {
    browser?.interpreter?.serializableDataByType?.scrapeList?.forEach((listItem: any, index: any) => {
      categorizedOutput.scrapeList[`list-${index}`] = listItem;
    });
  }
  const binaryOutput = browser?.interpreter?.binaryData?.reduce(
    (reducedObject: Record<string, any>, item: any, index: number): Record<string, any> => {
      return {
        [`item-${index}`]: item,
        ...reducedObject,
      };
    }, 
    {}
  ) || {};
  let totalDataPointsExtracted = 0;
  let totalSchemaItemsExtracted = 0;
  let totalListItemsExtracted = 0;
  let extractedScreenshotsCount = 0;
  if (categorizedOutput.scrapeSchema) {
    Object.values(categorizedOutput.scrapeSchema).forEach((schemaResult: any) => {
      if (Array.isArray(schemaResult)) {
        schemaResult.forEach(obj => {
          if (obj && typeof obj === 'object') {
            totalDataPointsExtracted += Object.keys(obj).length;
          }
        });
        totalSchemaItemsExtracted += schemaResult.length;
      } else if (schemaResult && typeof schemaResult === 'object') {
        totalDataPointsExtracted += Object.keys(schemaResult).length;
        totalSchemaItemsExtracted += 1;
      }
    });
  }
  if (categorizedOutput.scrapeList) {
    Object.values(categorizedOutput.scrapeList).forEach((listResult: any) => {
      if (Array.isArray(listResult)) {
        listResult.forEach(obj => {
          if (obj && typeof obj === 'object') {
            totalDataPointsExtracted += Object.keys(obj).length;
          }
        });
        totalListItemsExtracted += listResult.length;
      }
    });
  }
  if (binaryOutput) {
    extractedScreenshotsCount = Object.keys(binaryOutput).length;
    totalDataPointsExtracted += extractedScreenshotsCount;
  }
  const binaryOutputService = new BinaryOutputService('maxun-run-screenshots');
  const uploadedBinaryOutput = await binaryOutputService.uploadAndStoreBinaryOutput(
    run,
    binaryOutput
  );
  return {
    categorizedOutput: {
      scrapeSchema: categorizedOutput.scrapeSchema || {},
      scrapeList: categorizedOutput.scrapeList || {}
    },
    uploadedBinaryOutput,
    totalDataPointsExtracted,
    totalSchemaItemsExtracted,
    totalListItemsExtracted,
    extractedScreenshotsCount
  };
 }
 // Helper function to handle integration updates
 async function triggerIntegrationUpdates(runId: string, robotMetaId: string): Promise<void> {
@@ -234,6 +131,11 @@ async function processRunExecution(job: Job<ExecuteRunData>) {
      return { success: true }; 
    }
    if (run.status === 'queued') {
      logger.log('info', `Run ${data.runId} has status 'queued', skipping stale execution job - processQueuedRuns will handle it`);
      return { success: true };
    }
    const plainRun = run.toJSON();
    const browserId = data.browserId || plainRun.browserId;
@@ -309,6 +211,9 @@ async function processRunExecution(job: Job<ExecuteRunData>) {
      // Execute the workflow
      const workflow = AddGeneratedFlags(recording.recording);
      browser.interpreter.setRunId(data.runId);
      const interpretationInfo = await browser.interpreter.InterpretRecording(
        workflow, 
        currentPage, 
@@ -326,79 +231,49 @@ async function processRunExecution(job: Job<ExecuteRunData>) {
      logger.log('info', `Workflow execution completed for run ${data.runId}`);
      const binaryOutputService = new BinaryOutputService('maxun-run-screenshots');
      const uploadedBinaryOutput = await binaryOutputService.uploadAndStoreBinaryOutput(run, interpretationInfo.binaryOutput);
      const categorizedOutput = {
        scrapeSchema: interpretationInfo.scrapeSchemaOutput || {},
        scrapeList: interpretationInfo.scrapeListOutput || {}
      };
      if (await isRunAborted()) {
        logger.log('info', `Run ${data.runId} was aborted while processing results, not updating status`);
        return { success: true };
      }
      await run.update({
        ...run,
        status: 'success',
        finishedAt: new Date().toLocaleString(),
-        browserId: plainRun.browserId,
+        log: interpretationInfo.log.join('\n')
        log: interpretationInfo.log.join('\n'),
        serializableOutput: {
          scrapeSchema: Object.values(categorizedOutput.scrapeSchema),
          scrapeList: Object.values(categorizedOutput.scrapeList),
        },
        binaryOutput: uploadedBinaryOutput,
      });
      // Track extraction metrics
      let totalDataPointsExtracted = 0;
      let totalSchemaItemsExtracted = 0;
      let totalListItemsExtracted = 0;
      let extractedScreenshotsCount = 0;
-      if (categorizedOutput.scrapeSchema) {
+      const updatedRun = await Run.findOne({ where: { runId: data.runId } });
-        Object.values(categorizedOutput.scrapeSchema).forEach((schemaResult: any) => {
+      if (updatedRun) {
-          if (Array.isArray(schemaResult)) {
+        if (updatedRun.serializableOutput) {
-            schemaResult.forEach(obj => {
+          if (updatedRun.serializableOutput.scrapeSchema) {
-              if (obj && typeof obj === 'object') {
+            Object.values(updatedRun.serializableOutput.scrapeSchema).forEach((schemaResult: any) => {
-                totalDataPointsExtracted += Object.keys(obj).length;
+              if (Array.isArray(schemaResult)) {
                totalSchemaItemsExtracted += schemaResult.length;
              } else if (schemaResult && typeof schemaResult === 'object') {
                totalSchemaItemsExtracted += 1;
              }
            });
            totalSchemaItemsExtracted += schemaResult.length;
          } else if (schemaResult && typeof schemaResult === 'object') {
            totalDataPointsExtracted += Object.keys(schemaResult).length;
            totalSchemaItemsExtracted += 1;
          }
-        });
+          
-      }
+          if (updatedRun.serializableOutput.scrapeList) {
-
+            Object.values(updatedRun.serializableOutput.scrapeList).forEach((listResult: any) => {
-      if (categorizedOutput.scrapeList) {
+              if (Array.isArray(listResult)) {
-        Object.values(categorizedOutput.scrapeList).forEach((listResult: any) => {
+                totalListItemsExtracted += listResult.length;
          if (Array.isArray(listResult)) {
            listResult.forEach(obj => {
              if (obj && typeof obj === 'object') {
                totalDataPointsExtracted += Object.keys(obj).length;
              }
            });
            totalListItemsExtracted += listResult.length;
          }
-        });
+        }
-      }
+        
-
+        if (updatedRun.binaryOutput) {
-      if (uploadedBinaryOutput) {
+          extractedScreenshotsCount = Object.keys(updatedRun.binaryOutput).length;
-        extractedScreenshotsCount = Object.keys(uploadedBinaryOutput).length;
+        }
        totalDataPointsExtracted += extractedScreenshotsCount; 
      }
      const totalRowsExtracted = totalSchemaItemsExtracted + totalListItemsExtracted;
      console.log(`Extracted Schema Items Count: ${totalSchemaItemsExtracted}`);
      console.log(`Extracted List Items Count: ${totalListItemsExtracted}`);
      console.log(`Extracted Screenshots Count: ${extractedScreenshotsCount}`);
      console.log(`Total Rows Extracted: ${totalRowsExtracted}`);
      console.log(`Total Data Points Extracted: ${totalDataPointsExtracted}`);
      // Capture metrics
      capture(
@@ -415,7 +290,6 @@ async function processRunExecution(job: Job<ExecuteRunData>) {
        }
      );
      // Trigger webhooks for run completion
      const webhookPayload = {
        robot_id: plainRun.robotMetaId,
        run_id: data.runId,
@@ -424,13 +298,12 @@ async function processRunExecution(job: Job<ExecuteRunData>) {
        started_at: plainRun.startedAt,
        finished_at: new Date().toLocaleString(),
        extracted_data: {
-          captured_texts: Object.values(categorizedOutput.scrapeSchema).flat() || [],
+          captured_texts: updatedRun?.serializableOutput?.scrapeSchema ? Object.values(updatedRun.serializableOutput.scrapeSchema).flat() : [],
-          captured_lists: categorizedOutput.scrapeList,
+          captured_lists: updatedRun?.serializableOutput?.scrapeList || {},
          total_rows: totalRowsExtracted,
          captured_texts_count: totalSchemaItemsExtracted,
          captured_lists_count: totalListItemsExtracted,
          screenshots_count: extractedScreenshotsCount,
          total_data_points_extracted: totalDataPointsExtracted,
        },
        metadata: {
          browser_id: plainRun.browserId,
@@ -475,30 +348,18 @@ async function processRunExecution(job: Job<ExecuteRunData>) {
      };
      try {
-        if (browser && browser.interpreter) {
+        const hasData = (run.serializableOutput && 
-          const hasSchemaData = (browser.interpreter.serializableDataByType?.scrapeSchema ?? []).length > 0;
+          ((run.serializableOutput.scrapeSchema && run.serializableOutput.scrapeSchema.length > 0) ||
-          const hasListData = (browser.interpreter.serializableDataByType?.scrapeList ?? []).length > 0;
+           (run.serializableOutput.scrapeList && run.serializableOutput.scrapeList.length > 0))) ||
-          const hasBinaryData = (browser.interpreter.binaryData ?? []).length > 0;
+          (run.binaryOutput && Object.keys(run.binaryOutput).length > 0);
-          if (hasSchemaData || hasListData || hasBinaryData) {
+        if (hasData) {
-            logger.log('info', `Extracting partial data from failed run ${data.runId}`);
+          logger.log('info', `Partial data found in failed run ${data.runId}, triggering integration updates`);
-
+          await triggerIntegrationUpdates(plainRun.runId, plainRun.robotMetaId);
-            partialData = await extractAndProcessScrapedData(browser, run);
+          partialDataExtracted = true;
            partialUpdateData.serializableOutput = {
              scrapeSchema: Object.values(partialData.categorizedOutput.scrapeSchema),
              scrapeList: Object.values(partialData.categorizedOutput.scrapeList),
            };
            partialUpdateData.binaryOutput = partialData.uploadedBinaryOutput;
            partialDataExtracted = true; 
            logger.log('info', `Partial data extracted for failed run ${data.runId}: ${partialData.totalDataPointsExtracted} data points`);
            await triggerIntegrationUpdates(plainRun.runId, plainRun.robotMetaId);
          }
        }
-      } catch (partialDataError: any) {
+      } catch (dataCheckError: any) {
-        logger.log('warn', `Failed to extract partial data for run ${data.runId}: ${partialDataError.message}`);
+        logger.log('warn', `Failed to check for partial data in run ${data.runId}: ${dataCheckError.message}`);
      }
      await run.update(partialUpdateData);
@@ -652,7 +513,9 @@ async function processRunExecution(job: Job<ExecuteRunData>) {
 async function abortRun(runId: string, userId: string): Promise<boolean> {
  try {
-    const run = await Run.findOne({ where: { runId: runId } });
+    const run = await Run.findOne({ 
      where: { runId: runId }
    });
    if (!run) {
      logger.log('warn', `Run ${runId} not found or does not belong to user ${userId}`);
@@ -702,24 +565,18 @@ async function abortRun(runId: string, userId: string): Promise<boolean> {
      return true;
    }
    let currentLog = 'Run aborted by user';
    const extractedData = await extractAndProcessScrapedData(browser, run);
    console.log(`Total Data Points Extracted in aborted run: ${extractedData.totalDataPointsExtracted}`);
    await run.update({
      status: 'aborted',
      finishedAt: new Date().toLocaleString(),
-      browserId: plainRun.browserId,
+      log: 'Run aborted by user'
      log: currentLog,
      serializableOutput: {
        scrapeSchema: Object.values(extractedData.categorizedOutput.scrapeSchema),
        scrapeList: Object.values(extractedData.categorizedOutput.scrapeList),
      },
      binaryOutput: extractedData.uploadedBinaryOutput,
    });
-    if (extractedData.totalDataPointsExtracted > 0) {
+    const hasData = (run.serializableOutput && 
      ((run.serializableOutput.scrapeSchema && run.serializableOutput.scrapeSchema.length > 0) ||
       (run.serializableOutput.scrapeList && run.serializableOutput.scrapeList.length > 0))) ||
      (run.binaryOutput && Object.keys(run.binaryOutput).length > 0);
    if (hasData) {
      await triggerIntegrationUpdates(runId, plainRun.robotMetaId);
    }
--- a/server/src/workflow-management/scheduler/index.ts
+++ b/server/src/workflow-management/scheduler/index.ts
@@ -106,6 +106,39 @@ async function executeRun(id: string, userId: string) {
    const plainRun = run.toJSON();
    if (run.status === 'aborted' || run.status === 'aborting') {
      logger.log('info', `Scheduled Run ${id} has status ${run.status}, skipping execution`);
      return {
        success: false,
        error: `Run has status ${run.status}`
      }
    }
    if (run.status === 'queued') {
      logger.log('info', `Scheduled Run ${id} has status 'queued', skipping stale execution - will be handled by recovery`);
      return {
        success: false,
        error: 'Run is queued and will be handled by recovery'
      }
    }
    const retryCount = plainRun.retryCount || 0;
    if (retryCount >= 3) {
      logger.log('warn', `Scheduled Run ${id} has exceeded max retries (${retryCount}/3), marking as failed`);
      const recording = await Robot.findOne({ where: { 'recording_meta.id': plainRun.robotMetaId, userId }, raw: true });
      await run.update({
        status: 'failed',
        finishedAt: new Date().toLocaleString(),
        log: plainRun.log ? `${plainRun.log}\nMax retries exceeded (3/3) - Run failed after multiple attempts.` : `Max retries exceeded (3/3) - Run failed after multiple attempts.`
      });
      return {
        success: false,
        error: 'Max retries exceeded'
      }
    }
    const recording = await Robot.findOne({ where: { 'recording_meta.id': plainRun.robotMetaId }, raw: true });
    if (!recording) {
      return {
@@ -127,58 +160,52 @@ async function executeRun(id: string, userId: string) {
    }
    const workflow = AddGeneratedFlags(recording.recording);
    // Set run ID for real-time data persistence
    browser.interpreter.setRunId(id);
    const interpretationInfo = await browser.interpreter.InterpretRecording(
      workflow, currentPage, (newPage: Page) => currentPage = newPage, plainRun.interpreterSettings
    );
    const binaryOutputService = new BinaryOutputService('maxun-run-screenshots');
    const uploadedBinaryOutput = await binaryOutputService.uploadAndStoreBinaryOutput(run, interpretationInfo.binaryOutput);
    const categorizedOutput = {
      scrapeSchema: interpretationInfo.scrapeSchemaOutput || {},
      scrapeList: interpretationInfo.scrapeListOutput || {},
    };
    await destroyRemoteBrowser(plainRun.browserId, userId);
    await run.update({
      ...run,
      status: 'success',
      finishedAt: new Date().toLocaleString(),
      browserId: plainRun.browserId,
      log: interpretationInfo.log.join('\n'),
      serializableOutput: {
        scrapeSchema: Object.values(categorizedOutput.scrapeSchema),
        scrapeList: Object.values(categorizedOutput.scrapeList),
      },
      binaryOutput: uploadedBinaryOutput,
    });
-    // Track extraction metrics
+    // Get metrics from persisted data for analytics and webhooks
    let totalSchemaItemsExtracted = 0;
    let totalListItemsExtracted = 0;
    let extractedScreenshotsCount = 0;
-    if (categorizedOutput.scrapeSchema) {
+    const updatedRun = await Run.findOne({ where: { runId: id } });
-      Object.values(categorizedOutput.scrapeSchema).forEach((schemaResult: any) => {
+    if (updatedRun) {
-        if (Array.isArray(schemaResult)) {
+      if (updatedRun.serializableOutput) {
-          totalSchemaItemsExtracted += schemaResult.length;
+        if (updatedRun.serializableOutput.scrapeSchema) {
-        } else if (schemaResult && typeof schemaResult === 'object') {
+          Object.values(updatedRun.serializableOutput.scrapeSchema).forEach((schemaResult: any) => {
-          totalSchemaItemsExtracted += 1;
+            if (Array.isArray(schemaResult)) {
              totalSchemaItemsExtracted += schemaResult.length;
            } else if (schemaResult && typeof schemaResult === 'object') {
              totalSchemaItemsExtracted += 1;
            }
          });
        }
-      });
+        
-    }
+        if (updatedRun.serializableOutput.scrapeList) {
-    
+          Object.values(updatedRun.serializableOutput.scrapeList).forEach((listResult: any) => {
-    if (categorizedOutput.scrapeList) {
+            if (Array.isArray(listResult)) {
-      Object.values(categorizedOutput.scrapeList).forEach((listResult: any) => {
+              totalListItemsExtracted += listResult.length;
-        if (Array.isArray(listResult)) {
+            }
-          totalListItemsExtracted += listResult.length;
+          });
        }
-      });
+      }
-    }
+      
-    
+      if (updatedRun.binaryOutput) {
-    if (uploadedBinaryOutput) {
+        extractedScreenshotsCount = Object.keys(updatedRun.binaryOutput).length;
-      extractedScreenshotsCount = Object.keys(uploadedBinaryOutput).length;
+      }
    }
    const totalRowsExtracted = totalSchemaItemsExtracted + totalListItemsExtracted;
@@ -204,8 +231,8 @@ async function executeRun(id: string, userId: string) {
      started_at: plainRun.startedAt,
      finished_at: new Date().toLocaleString(),
      extracted_data: {
-        captured_texts: Object.values(categorizedOutput.scrapeSchema).flat() || [],
+        captured_texts: updatedRun?.serializableOutput?.scrapeSchema ? Object.values(updatedRun.serializableOutput.scrapeSchema).flat() : [],
-        captured_lists: categorizedOutput.scrapeList,
+        captured_lists: updatedRun?.serializableOutput?.scrapeList || {},
        total_rows: totalRowsExtracted,
        captured_texts_count: totalSchemaItemsExtracted,
        captured_lists_count: totalListItemsExtracted,