diff --git a/server/src/workflow-management/integrations/airtable.ts b/server/src/workflow-management/integrations/airtable.ts index 401bc11d..5f72c836 100644 --- a/server/src/workflow-management/integrations/airtable.ts +++ b/server/src/workflow-management/integrations/airtable.ts @@ -44,65 +44,100 @@ async function refreshAirtableToken(refreshToken: string) { } } + function mergeRelatedData(serializableOutput: SerializableOutput, binaryOutput: Record) { - const mergedRecords: Record[] = []; + const allRecords: Record[] = []; - const maxLength = Math.max( - ...[ - ...(serializableOutput.scrapeSchema ?? []).map(arr => arr?.length ?? 0), - ...(serializableOutput.scrapeList ?? []).map(arr => arr?.length ?? 0), - 0 - ] - ); - - for (let i = 0; i < maxLength; i++) { - mergedRecords.push({}); - } + const schemaData: Array<{key: string, value: any}> = []; + const listData: any[] = []; + const screenshotData: Array<{key: string, url: string}> = []; + // Collect schema data if (serializableOutput.scrapeSchema) { for (const schemaArray of serializableOutput.scrapeSchema) { if (!Array.isArray(schemaArray)) continue; - - for (let i = 0; i < schemaArray.length; i++) { - if (i >= mergedRecords.length) break; - mergedRecords[i] = { ...mergedRecords[i], ...schemaArray[i] }; + for (const schemaItem of schemaArray) { + Object.entries(schemaItem).forEach(([key, value]) => { + if (key && key.trim() !== '' && value !== null && value !== undefined && value !== '') { + schemaData.push({key, value}); + } + }); } } } + // Collect list data if (serializableOutput.scrapeList) { for (const listArray of serializableOutput.scrapeList) { if (!Array.isArray(listArray)) continue; - - for (let i = 0; i < listArray.length; i++) { - if (i >= mergedRecords.length) break; - mergedRecords[i] = { ...mergedRecords[i], ...listArray[i] }; - } - } - } - - if (binaryOutput && Object.keys(binaryOutput).length > 0) { - for (let i = 0; i < mergedRecords.length; i++) { - const screenshotKey = `item-${i}`; - if (binaryOutput[screenshotKey]) { - mergedRecords[i].Screenshot = binaryOutput[screenshotKey]; - mergedRecords[i].Key = screenshotKey; - } - } - - for (const [key, url] of Object.entries(binaryOutput)) { - if (mergedRecords.some(record => record.Key === key)) { - continue; - } - - mergedRecords.push({ - "Key": key, - "Screenshot": url + listArray.forEach(listItem => { + const hasContent = Object.values(listItem).some(value => + value !== null && value !== undefined && value !== '' + ); + if (hasContent) { + listData.push(listItem); + } }); } } - return mergedRecords; + // Collect screenshot data + if (binaryOutput && Object.keys(binaryOutput).length > 0) { + Object.entries(binaryOutput).forEach(([key, url]) => { + if (key && key.trim() !== '' && url && url.trim() !== '') { + screenshotData.push({key, url}); + } + }); + } + + // Mix all data types together to create consecutive records + const maxLength = Math.max(schemaData.length, listData.length, screenshotData.length); + + for (let i = 0; i < maxLength; i++) { + const record: Record = {}; + + if (i < schemaData.length) { + record.Label = schemaData[i].key; + record.Value = schemaData[i].value; + } + + if (i < listData.length) { + Object.entries(listData[i]).forEach(([key, value]) => { + if (value !== null && value !== undefined && value !== '') { + record[key] = value; + } + }); + } + + if (i < screenshotData.length) { + record.Key = screenshotData[i].key; + record.Screenshot = screenshotData[i].url; + } + + if (Object.keys(record).length > 0) { + allRecords.push(record); + } + } + + for (let i = maxLength; i < schemaData.length; i++) { + allRecords.push({ + Label: schemaData[i].key, + Value: schemaData[i].value + }); + } + + for (let i = maxLength; i < listData.length; i++) { + allRecords.push(listData[i]); + } + + for (let i = maxLength; i < screenshotData.length; i++) { + allRecords.push({ + Key: screenshotData[i].key, + Screenshot: screenshotData[i].url + }); + } + + return allRecords; } export async function updateAirtable(robotId: string, runId: string) { @@ -210,11 +245,13 @@ export async function writeDataToAirtable( const airtable = new Airtable({ apiKey: accessToken }); const base = airtable.base(baseId); + await deleteEmptyRecords(base, tableName); + const processedData = data.map(item => { const cleanedItem: Record = {}; for (const [key, value] of Object.entries(item)) { - if (value === null || value === undefined) { + if (value === null || value === undefined || value === '') { cleanedItem[key] = ''; } else if (typeof value === 'object' && !Array.isArray(value)) { cleanedItem[key] = JSON.stringify(value); @@ -224,113 +261,55 @@ export async function writeDataToAirtable( } return cleanedItem; + }).filter(record => { + return Object.values(record).some(value => value !== null && value !== undefined && value !== ''); }); - const existingFields = await getExistingFields(base, tableName); - console.log(`Found ${existingFields.length} existing fields in Airtable: ${existingFields.join(', ')}`); + if (processedData.length === 0) { + console.log('No valid data to write after filtering. Skipping.'); + return; + } - const dataFields = [...new Set(processedData.flatMap(row => Object.keys(row)))]; + const dataFields = [...new Set(processedData.flatMap(row => Object.keys(row)))]; console.log(`Found ${dataFields.length} fields in data: ${dataFields.join(', ')}`); + const existingFields = await getExistingFields(base, tableName); const missingFields = dataFields.filter(field => !existingFields.includes(field)); - const hasNewColumns = missingFields.length > 0; - console.log(`Found ${missingFields.length} new fields: ${missingFields.join(', ')}`); - - for (const field of missingFields) { - const sampleRow = processedData.find(row => field in row); - if (sampleRow) { - const sampleValue = sampleRow[field]; - try { - await createAirtableField(baseId, tableName, field, sampleValue, accessToken, tableId); - console.log(`Successfully created field: ${field}`); - - await new Promise(resolve => setTimeout(resolve, 200)); - } catch (fieldError: any) { - console.warn(`Warning: Could not create field "${field}": ${fieldError.message}`); - } - } - } - let existingRecords: Array<{ id: string, fields: Record }> = []; - - if (hasNewColumns) { - existingRecords = await fetchAllRecords(base, tableName); - console.log(`Found ${existingRecords.length} existing records in Airtable`); - } - - if (hasNewColumns && existingRecords.length > 0) { - const recordsToUpdate = []; - const recordsToCreate = []; + if (missingFields.length > 0) { + console.log(`Creating ${missingFields.length} new fields: ${missingFields.join(', ')}`); - const newColumnData = processedData.map(record => { - const newColumnsOnly: Record = {}; - missingFields.forEach(field => { - if (field in record) { - newColumnsOnly[field] = record[field]; - } - }); - return newColumnsOnly; - }); - - for (let i = 0; i < Math.min(existingRecords.length, newColumnData.length); i++) { - if (Object.keys(newColumnData[i]).length > 0) { - recordsToUpdate.push({ - id: existingRecords[i].id, - fields: newColumnData[i] - }); - } - } - - const existingColumnsBeingUpdated = dataFields.filter(field => - existingFields.includes(field) && !missingFields.includes(field) - ); - - if (existingColumnsBeingUpdated.length > 0) { - recordsToCreate.push(...processedData.map(record => ({ fields: record }))); - console.log(`Will append ${recordsToCreate.length} new records with all data`); - } else { - if (processedData.length > existingRecords.length) { - const additionalRecords = processedData.slice(existingRecords.length); - recordsToCreate.push(...additionalRecords.map(record => ({ fields: record }))); - console.log(`Will append ${recordsToCreate.length} additional records`); - } - } - - if (recordsToUpdate.length > 0) { - console.log(`Updating ${recordsToUpdate.length} existing records with new columns`); - const BATCH_SIZE = 10; - for (let i = 0; i < recordsToUpdate.length; i += BATCH_SIZE) { - const batch = recordsToUpdate.slice(i, i + BATCH_SIZE); - console.log(`Updating batch ${Math.floor(i/BATCH_SIZE) + 1} of ${Math.ceil(recordsToUpdate.length/BATCH_SIZE)}`); - + for (const field of missingFields) { + const sampleRow = processedData.find(row => field in row && row[field] !== ''); + if (sampleRow) { + const sampleValue = sampleRow[field]; try { - await retryableAirtableUpdate(base, tableName, batch); - } catch (batchError: any) { - console.error(`Error updating batch: ${batchError.message}`); - throw batchError; + await createAirtableField(baseId, tableName, field, sampleValue, accessToken, tableId); + console.log(`Successfully created field: ${field}`); + await new Promise(resolve => setTimeout(resolve, 200)); + } catch (fieldError: any) { + console.warn(`Warning: Could not create field "${field}": ${fieldError.message}`); } - - await new Promise(resolve => setTimeout(resolve, 500)); } } - } else { - console.log(`Appending all ${processedData.length} records to Airtable`); - const recordsToCreate = processedData.map(record => ({ fields: record })); + } + + console.log(`Appending all ${processedData.length} records to Airtable`); + const recordsToCreate = processedData.map(record => ({ fields: record })); + + const BATCH_SIZE = 10; + for (let i = 0; i < recordsToCreate.length; i += BATCH_SIZE) { + const batch = recordsToCreate.slice(i, i + BATCH_SIZE); + console.log(`Creating batch ${Math.floor(i/BATCH_SIZE) + 1} of ${Math.ceil(recordsToCreate.length/BATCH_SIZE)}`); - const BATCH_SIZE = 10; - for (let i = 0; i < recordsToCreate.length; i += BATCH_SIZE) { - const batch = recordsToCreate.slice(i, i + BATCH_SIZE); - console.log(`Creating batch ${Math.floor(i/BATCH_SIZE) + 1} of ${Math.ceil(recordsToCreate.length/BATCH_SIZE)}`); - - try { - await retryableAirtableCreate(base, tableName, batch); - } catch (batchError: any) { - console.error(`Error creating batch: ${batchError.message}`); - throw batchError; - } - - await new Promise(resolve => setTimeout(resolve, 500)); + try { + await retryableAirtableCreate(base, tableName, batch); + } catch (batchError: any) { + console.error(`Error creating batch: ${batchError.message}`); + throw batchError; } + + await new Promise(resolve => setTimeout(resolve, 500)); } await deleteEmptyRecords(base, tableName); @@ -343,20 +322,6 @@ export async function writeDataToAirtable( } } -async function fetchAllRecords(base: Airtable.Base, tableName: string): Promise }>> { - try { - console.log(`Fetching all records from ${tableName}...`); - const records = await base(tableName).select().all(); - return records.map(record => ({ - id: record.id, - fields: record.fields - })); - } catch (error: any) { - console.warn(`Warning: Could not fetch all records: ${error.message}`); - return []; - } -} - async function deleteEmptyRecords(base: Airtable.Base, tableName: string): Promise { console.log('Checking for empty records to clear...'); @@ -407,23 +372,6 @@ async function retryableAirtableCreate( } } -async function retryableAirtableUpdate( - base: Airtable.Base, - tableName: string, - batch: any[], - retries = MAX_RETRIES -): Promise { - try { - await base(tableName).update(batch); - } catch (error) { - if (retries > 0) { - await new Promise(resolve => setTimeout(resolve, BASE_API_DELAY)); - return retryableAirtableUpdate(base, tableName, batch, retries - 1); - } - throw error; - } -} - // Helper functions async function getExistingFields(base: Airtable.Base, tableName: string): Promise { try { diff --git a/server/src/workflow-management/integrations/gsheet.ts b/server/src/workflow-management/integrations/gsheet.ts index 402ace5d..2a29bdcc 100644 --- a/server/src/workflow-management/integrations/gsheet.ts +++ b/server/src/workflow-management/integrations/gsheet.ts @@ -114,7 +114,16 @@ async function processOutputType( await ensureSheetExists(spreadsheetId, sheetName, robotConfig); - await writeDataToSheet(robotId, spreadsheetId, data, sheetName, robotConfig); + let formattedData = data; + if (outputType === 'Text' && data.length > 0) { + const schemaItem = data[0]; + formattedData = Object.entries(schemaItem).map(([key, value]) => ({ + Label: key, + Value: value + })); + } + + await writeDataToSheet(robotId, spreadsheetId, formattedData, sheetName, robotConfig); console.log(`Data written to ${sheetName} sheet for ${outputType} data`); } } diff --git a/src/components/run/InterpretationLog.tsx b/src/components/run/InterpretationLog.tsx index 0f1f1185..faf965e9 100644 --- a/src/components/run/InterpretationLog.tsx +++ b/src/components/run/InterpretationLog.tsx @@ -379,49 +379,63 @@ export const InterpretationLog: React.FC = ({ isOpen, se )} {(activeTab === availableTabs.findIndex(tab => tab.id === 'captureText') || singleContentType === 'captureText') && captureTextData.length > 0 && ( - - - - - {getCaptureTextColumns.map((column) => ( + + +
+ + - {column} + Label - ))} - - - - {captureTextData.map((row, idx) => ( - - {getCaptureTextColumns.map((column) => ( + + Value + + + + + {getCaptureTextColumns.map((column, index) => ( + + + {column} + - {row[column]} + {captureTextData[0][column]} - ))} - - ))} - -
-
+ + ))} + + + + )} diff --git a/src/components/run/RunContent.tsx b/src/components/run/RunContent.tsx index 25eb5063..38491854 100644 --- a/src/components/run/RunContent.tsx +++ b/src/components/run/RunContent.tsx @@ -27,6 +27,7 @@ import TableHead from '@mui/material/TableHead'; import TableRow from '@mui/material/TableRow'; import 'highlight.js/styles/github.css'; import { useTranslation } from "react-i18next"; +import { useThemeMode } from "../../context/theme-provider"; interface RunContentProps { row: Data, @@ -54,6 +55,8 @@ export const RunContent = ({ row, currentLog, interpretationInProgress, logEndRe const [legacyColumns, setLegacyColumns] = useState([]); const [isLegacyData, setIsLegacyData] = useState(false); + const { darkMode } = useThemeMode(); + useEffect(() => { setTab(tab); }, [interpretationInProgress]); @@ -191,17 +194,27 @@ export const RunContent = ({ row, currentLog, interpretationInProgress, logEndRe }; // Function to convert table data to CSV format - const convertToCSV = (data: any[], columns: string[]): string => { - const header = columns.join(','); - const rows = data.map(row => - columns.map(col => JSON.stringify(row[col] || "", null, 2)).join(',') - ); - return [header, ...rows].join('\n'); + const convertToCSV = (data: any[], columns: string[], isSchemaData: boolean = false): string => { + if (isSchemaData) { + // For schema data, export as Label-Value pairs + const header = 'Label,Value'; + const rows = columns.map(column => + `"${column}","${data[0][column] || ""}"` + ); + return [header, ...rows].join('\n'); + } else { + // For regular table data, export as normal table + const header = columns.join(','); + const rows = data.map(row => + columns.map(col => JSON.stringify(row[col] || "", null, 2)).join(',') + ); + return [header, ...rows].join('\n'); + } }; // Function to download a specific dataset as CSV - const downloadCSV = (data: any[], columns: string[], filename: string) => { - const csvContent = convertToCSV(data, columns); + const downloadCSV = (data: any[], columns: string[], filename: string, isSchemaData: boolean = false) => { + const csvContent = convertToCSV(data, columns, isSchemaData); const blob = new Blob([csvContent], { type: 'text/csv;charset=utf-8;' }); const url = URL.createObjectURL(blob); @@ -262,8 +275,7 @@ export const RunContent = ({ row, currentLog, interpretationInProgress, logEndRe if (!currentData || currentData.length === 0) return null; - const downloadData = isPaginatedList ? currentData : data; - const downloadColumns = isPaginatedList ? currentColumns : columns; + const isSchemaData = title.toLowerCase().includes('text') || title.toLowerCase().includes('schema'); return ( @@ -283,7 +295,7 @@ export const RunContent = ({ row, currentLog, interpretationInProgress, logEndRe @@ -338,6 +350,7 @@ export const RunContent = ({ row, currentLog, interpretationInProgress, logEndRe onClick={() => navigateListTable('next')} disabled={currentListIndex === listData.length - 1} sx={{ + borderColor: '#FF00C3', color: currentListIndex === listData.length - 1 ? 'gray' : '#FF00C3', '&.Mui-disabled': { borderColor: 'rgba(0, 0, 0, 0.12)' @@ -353,21 +366,66 @@ export const RunContent = ({ row, currentLog, interpretationInProgress, logEndRe - {(isPaginatedList ? currentColumns : columns).map((column) => ( - {column} - ))} + {isSchemaData ? ( + <> + + Label + + + Value + + + ) : ( + (isPaginatedList ? currentColumns : columns).map((column) => ( + + {column} + + )) + )} - {(isPaginatedList ? currentData : data).map((row, index) => ( - - {(isPaginatedList ? currentColumns : columns).map((column) => ( - - {row[column] === undefined || row[column] === "" ? "-" : row[column]} + {isSchemaData ? ( + currentColumns.map((column) => ( + + + {column} - ))} - - ))} + + {currentData[0][column] === undefined || currentData[0][column] === "" ? "-" : currentData[0][column]} + + + )) + ) : ( + currentData.map((row, index) => ( + + {(isPaginatedList ? currentColumns : columns).map((column) => ( + + {row[column] === undefined || row[column] === "" ? "-" : row[column]} + + ))} + + )) + )}