Merge pull request #680 from getmaxun/captext-ui

feat: display capture text data in vertical (column-wise) format
This commit is contained in:
Karishma Shukla
2025-07-15 22:13:35 +05:30
committed by GitHub
4 changed files with 251 additions and 222 deletions

View File

@@ -44,65 +44,100 @@ async function refreshAirtableToken(refreshToken: string) {
} }
} }
function mergeRelatedData(serializableOutput: SerializableOutput, binaryOutput: Record<string, string>) { function mergeRelatedData(serializableOutput: SerializableOutput, binaryOutput: Record<string, string>) {
const mergedRecords: Record<string, any>[] = []; const allRecords: Record<string, any>[] = [];
const maxLength = Math.max( const schemaData: Array<{key: string, value: any}> = [];
...[ const listData: any[] = [];
...(serializableOutput.scrapeSchema ?? []).map(arr => arr?.length ?? 0), const screenshotData: Array<{key: string, url: string}> = [];
...(serializableOutput.scrapeList ?? []).map(arr => arr?.length ?? 0),
0
]
);
for (let i = 0; i < maxLength; i++) {
mergedRecords.push({});
}
// Collect schema data
if (serializableOutput.scrapeSchema) { if (serializableOutput.scrapeSchema) {
for (const schemaArray of serializableOutput.scrapeSchema) { for (const schemaArray of serializableOutput.scrapeSchema) {
if (!Array.isArray(schemaArray)) continue; if (!Array.isArray(schemaArray)) continue;
for (const schemaItem of schemaArray) {
for (let i = 0; i < schemaArray.length; i++) { Object.entries(schemaItem).forEach(([key, value]) => {
if (i >= mergedRecords.length) break; if (key && key.trim() !== '' && value !== null && value !== undefined && value !== '') {
mergedRecords[i] = { ...mergedRecords[i], ...schemaArray[i] }; schemaData.push({key, value});
}
});
} }
} }
} }
// Collect list data
if (serializableOutput.scrapeList) { if (serializableOutput.scrapeList) {
for (const listArray of serializableOutput.scrapeList) { for (const listArray of serializableOutput.scrapeList) {
if (!Array.isArray(listArray)) continue; if (!Array.isArray(listArray)) continue;
listArray.forEach(listItem => {
for (let i = 0; i < listArray.length; i++) { const hasContent = Object.values(listItem).some(value =>
if (i >= mergedRecords.length) break; value !== null && value !== undefined && value !== ''
mergedRecords[i] = { ...mergedRecords[i], ...listArray[i] }; );
} if (hasContent) {
} listData.push(listItem);
} }
if (binaryOutput && Object.keys(binaryOutput).length > 0) {
for (let i = 0; i < mergedRecords.length; i++) {
const screenshotKey = `item-${i}`;
if (binaryOutput[screenshotKey]) {
mergedRecords[i].Screenshot = binaryOutput[screenshotKey];
mergedRecords[i].Key = screenshotKey;
}
}
for (const [key, url] of Object.entries(binaryOutput)) {
if (mergedRecords.some(record => record.Key === key)) {
continue;
}
mergedRecords.push({
"Key": key,
"Screenshot": url
}); });
} }
} }
return mergedRecords; // Collect screenshot data
if (binaryOutput && Object.keys(binaryOutput).length > 0) {
Object.entries(binaryOutput).forEach(([key, url]) => {
if (key && key.trim() !== '' && url && url.trim() !== '') {
screenshotData.push({key, url});
}
});
}
// Mix all data types together to create consecutive records
const maxLength = Math.max(schemaData.length, listData.length, screenshotData.length);
for (let i = 0; i < maxLength; i++) {
const record: Record<string, any> = {};
if (i < schemaData.length) {
record.Label = schemaData[i].key;
record.Value = schemaData[i].value;
}
if (i < listData.length) {
Object.entries(listData[i]).forEach(([key, value]) => {
if (value !== null && value !== undefined && value !== '') {
record[key] = value;
}
});
}
if (i < screenshotData.length) {
record.Key = screenshotData[i].key;
record.Screenshot = screenshotData[i].url;
}
if (Object.keys(record).length > 0) {
allRecords.push(record);
}
}
for (let i = maxLength; i < schemaData.length; i++) {
allRecords.push({
Label: schemaData[i].key,
Value: schemaData[i].value
});
}
for (let i = maxLength; i < listData.length; i++) {
allRecords.push(listData[i]);
}
for (let i = maxLength; i < screenshotData.length; i++) {
allRecords.push({
Key: screenshotData[i].key,
Screenshot: screenshotData[i].url
});
}
return allRecords;
} }
export async function updateAirtable(robotId: string, runId: string) { export async function updateAirtable(robotId: string, runId: string) {
@@ -210,11 +245,13 @@ export async function writeDataToAirtable(
const airtable = new Airtable({ apiKey: accessToken }); const airtable = new Airtable({ apiKey: accessToken });
const base = airtable.base(baseId); const base = airtable.base(baseId);
await deleteEmptyRecords(base, tableName);
const processedData = data.map(item => { const processedData = data.map(item => {
const cleanedItem: Record<string, any> = {}; const cleanedItem: Record<string, any> = {};
for (const [key, value] of Object.entries(item)) { for (const [key, value] of Object.entries(item)) {
if (value === null || value === undefined) { if (value === null || value === undefined || value === '') {
cleanedItem[key] = ''; cleanedItem[key] = '';
} else if (typeof value === 'object' && !Array.isArray(value)) { } else if (typeof value === 'object' && !Array.isArray(value)) {
cleanedItem[key] = JSON.stringify(value); cleanedItem[key] = JSON.stringify(value);
@@ -224,113 +261,55 @@ export async function writeDataToAirtable(
} }
return cleanedItem; return cleanedItem;
}).filter(record => {
return Object.values(record).some(value => value !== null && value !== undefined && value !== '');
}); });
const existingFields = await getExistingFields(base, tableName); if (processedData.length === 0) {
console.log(`Found ${existingFields.length} existing fields in Airtable: ${existingFields.join(', ')}`); console.log('No valid data to write after filtering. Skipping.');
return;
}
const dataFields = [...new Set(processedData.flatMap(row => Object.keys(row)))]; const dataFields = [...new Set(processedData.flatMap(row => Object.keys(row)))];
console.log(`Found ${dataFields.length} fields in data: ${dataFields.join(', ')}`); console.log(`Found ${dataFields.length} fields in data: ${dataFields.join(', ')}`);
const existingFields = await getExistingFields(base, tableName);
const missingFields = dataFields.filter(field => !existingFields.includes(field)); const missingFields = dataFields.filter(field => !existingFields.includes(field));
const hasNewColumns = missingFields.length > 0;
console.log(`Found ${missingFields.length} new fields: ${missingFields.join(', ')}`);
for (const field of missingFields) { if (missingFields.length > 0) {
const sampleRow = processedData.find(row => field in row); console.log(`Creating ${missingFields.length} new fields: ${missingFields.join(', ')}`);
if (sampleRow) {
const sampleValue = sampleRow[field];
try {
await createAirtableField(baseId, tableName, field, sampleValue, accessToken, tableId);
console.log(`Successfully created field: ${field}`);
await new Promise(resolve => setTimeout(resolve, 200));
} catch (fieldError: any) {
console.warn(`Warning: Could not create field "${field}": ${fieldError.message}`);
}
}
}
let existingRecords: Array<{ id: string, fields: Record<string, any> }> = [];
if (hasNewColumns) {
existingRecords = await fetchAllRecords(base, tableName);
console.log(`Found ${existingRecords.length} existing records in Airtable`);
}
if (hasNewColumns && existingRecords.length > 0) {
const recordsToUpdate = [];
const recordsToCreate = [];
const newColumnData = processedData.map(record => {
const newColumnsOnly: Record<string, any> = {};
missingFields.forEach(field => {
if (field in record) {
newColumnsOnly[field] = record[field];
}
});
return newColumnsOnly;
});
for (let i = 0; i < Math.min(existingRecords.length, newColumnData.length); i++) {
if (Object.keys(newColumnData[i]).length > 0) {
recordsToUpdate.push({
id: existingRecords[i].id,
fields: newColumnData[i]
});
}
}
const existingColumnsBeingUpdated = dataFields.filter(field =>
existingFields.includes(field) && !missingFields.includes(field)
);
if (existingColumnsBeingUpdated.length > 0) {
recordsToCreate.push(...processedData.map(record => ({ fields: record })));
console.log(`Will append ${recordsToCreate.length} new records with all data`);
} else {
if (processedData.length > existingRecords.length) {
const additionalRecords = processedData.slice(existingRecords.length);
recordsToCreate.push(...additionalRecords.map(record => ({ fields: record })));
console.log(`Will append ${recordsToCreate.length} additional records`);
}
}
if (recordsToUpdate.length > 0) {
console.log(`Updating ${recordsToUpdate.length} existing records with new columns`);
const BATCH_SIZE = 10;
for (let i = 0; i < recordsToUpdate.length; i += BATCH_SIZE) {
const batch = recordsToUpdate.slice(i, i + BATCH_SIZE);
console.log(`Updating batch ${Math.floor(i/BATCH_SIZE) + 1} of ${Math.ceil(recordsToUpdate.length/BATCH_SIZE)}`);
for (const field of missingFields) {
const sampleRow = processedData.find(row => field in row && row[field] !== '');
if (sampleRow) {
const sampleValue = sampleRow[field];
try { try {
await retryableAirtableUpdate(base, tableName, batch); await createAirtableField(baseId, tableName, field, sampleValue, accessToken, tableId);
} catch (batchError: any) { console.log(`Successfully created field: ${field}`);
console.error(`Error updating batch: ${batchError.message}`); await new Promise(resolve => setTimeout(resolve, 200));
throw batchError; } catch (fieldError: any) {
console.warn(`Warning: Could not create field "${field}": ${fieldError.message}`);
} }
await new Promise(resolve => setTimeout(resolve, 500));
} }
} }
} else { }
console.log(`Appending all ${processedData.length} records to Airtable`);
const recordsToCreate = processedData.map(record => ({ fields: record }));
const BATCH_SIZE = 10; console.log(`Appending all ${processedData.length} records to Airtable`);
for (let i = 0; i < recordsToCreate.length; i += BATCH_SIZE) { const recordsToCreate = processedData.map(record => ({ fields: record }));
const batch = recordsToCreate.slice(i, i + BATCH_SIZE);
console.log(`Creating batch ${Math.floor(i/BATCH_SIZE) + 1} of ${Math.ceil(recordsToCreate.length/BATCH_SIZE)}`);
try { const BATCH_SIZE = 10;
await retryableAirtableCreate(base, tableName, batch); for (let i = 0; i < recordsToCreate.length; i += BATCH_SIZE) {
} catch (batchError: any) { const batch = recordsToCreate.slice(i, i + BATCH_SIZE);
console.error(`Error creating batch: ${batchError.message}`); console.log(`Creating batch ${Math.floor(i/BATCH_SIZE) + 1} of ${Math.ceil(recordsToCreate.length/BATCH_SIZE)}`);
throw batchError;
}
await new Promise(resolve => setTimeout(resolve, 500)); try {
await retryableAirtableCreate(base, tableName, batch);
} catch (batchError: any) {
console.error(`Error creating batch: ${batchError.message}`);
throw batchError;
} }
await new Promise(resolve => setTimeout(resolve, 500));
} }
await deleteEmptyRecords(base, tableName); await deleteEmptyRecords(base, tableName);
@@ -343,20 +322,6 @@ export async function writeDataToAirtable(
} }
} }
async function fetchAllRecords(base: Airtable.Base, tableName: string): Promise<Array<{ id: string, fields: Record<string, any> }>> {
try {
console.log(`Fetching all records from ${tableName}...`);
const records = await base(tableName).select().all();
return records.map(record => ({
id: record.id,
fields: record.fields
}));
} catch (error: any) {
console.warn(`Warning: Could not fetch all records: ${error.message}`);
return [];
}
}
async function deleteEmptyRecords(base: Airtable.Base, tableName: string): Promise<void> { async function deleteEmptyRecords(base: Airtable.Base, tableName: string): Promise<void> {
console.log('Checking for empty records to clear...'); console.log('Checking for empty records to clear...');
@@ -407,23 +372,6 @@ async function retryableAirtableCreate(
} }
} }
async function retryableAirtableUpdate(
base: Airtable.Base,
tableName: string,
batch: any[],
retries = MAX_RETRIES
): Promise<void> {
try {
await base(tableName).update(batch);
} catch (error) {
if (retries > 0) {
await new Promise(resolve => setTimeout(resolve, BASE_API_DELAY));
return retryableAirtableUpdate(base, tableName, batch, retries - 1);
}
throw error;
}
}
// Helper functions // Helper functions
async function getExistingFields(base: Airtable.Base, tableName: string): Promise<string[]> { async function getExistingFields(base: Airtable.Base, tableName: string): Promise<string[]> {
try { try {

View File

@@ -114,7 +114,16 @@ async function processOutputType(
await ensureSheetExists(spreadsheetId, sheetName, robotConfig); await ensureSheetExists(spreadsheetId, sheetName, robotConfig);
await writeDataToSheet(robotId, spreadsheetId, data, sheetName, robotConfig); let formattedData = data;
if (outputType === 'Text' && data.length > 0) {
const schemaItem = data[0];
formattedData = Object.entries(schemaItem).map(([key, value]) => ({
Label: key,
Value: value
}));
}
await writeDataToSheet(robotId, spreadsheetId, formattedData, sheetName, robotConfig);
console.log(`Data written to ${sheetName} sheet for ${outputType} data`); console.log(`Data written to ${sheetName} sheet for ${outputType} data`);
} }
} }

View File

@@ -379,49 +379,63 @@ export const InterpretationLog: React.FC<InterpretationLogProps> = ({ isOpen, se
)} )}
{(activeTab === availableTabs.findIndex(tab => tab.id === 'captureText') || singleContentType === 'captureText') && captureTextData.length > 0 && ( {(activeTab === availableTabs.findIndex(tab => tab.id === 'captureText') || singleContentType === 'captureText') && captureTextData.length > 0 && (
<TableContainer component={Paper} sx={{ boxShadow: 'none', borderRadius: 0 }}> <Box sx={{ p: 2 }}>
<Table> <TableContainer component={Paper} sx={{ boxShadow: 'none', borderRadius: 0 }}>
<TableHead> <Table>
<TableRow> <TableHead>
{getCaptureTextColumns.map((column) => ( <TableRow>
<TableCell <TableCell
key={column}
sx={{ sx={{
borderBottom: '1px solid', borderBottom: '1px solid',
borderColor: darkMode ? '#3a4453' : '#dee2e6', borderColor: darkMode ? '#3a4453' : '#dee2e6',
backgroundColor: darkMode ? '#2a3441' : '#f8f9fa' backgroundColor: darkMode ? '#2a3441' : '#f8f9fa',
}} }}
> >
{column} Label
</TableCell> </TableCell>
))} <TableCell
</TableRow> sx={{
</TableHead> borderBottom: '1px solid',
<TableBody> borderColor: darkMode ? '#3a4453' : '#dee2e6',
{captureTextData.map((row, idx) => ( backgroundColor: darkMode ? '#2a3441' : '#f8f9fa',
<TableRow }}
key={idx} >
sx={{ Value
borderBottom: '1px solid', </TableCell>
borderColor: darkMode ? '#3a4453' : '#dee2e6' </TableRow>
}} </TableHead>
> <TableBody>
{getCaptureTextColumns.map((column) => ( {getCaptureTextColumns.map((column, index) => (
<TableRow
key={column}
sx={{
borderBottom: index < getCaptureTextColumns.length - 1 ? '1px solid' : 'none',
borderColor: darkMode ? '#3a4453' : '#dee2e6'
}}
>
<TableCell
sx={{
borderBottom: 'none',
py: 2,
fontWeight: 500
}}
>
{column}
</TableCell>
<TableCell <TableCell
key={column}
sx={{ sx={{
borderBottom: 'none', borderBottom: 'none',
py: 2 py: 2
}} }}
> >
{row[column]} {captureTextData[0][column]}
</TableCell> </TableCell>
))} </TableRow>
</TableRow> ))}
))} </TableBody>
</TableBody> </Table>
</Table> </TableContainer>
</TableContainer> </Box>
)} )}
</Box> </Box>
</> </>

View File

@@ -27,6 +27,7 @@ import TableHead from '@mui/material/TableHead';
import TableRow from '@mui/material/TableRow'; import TableRow from '@mui/material/TableRow';
import 'highlight.js/styles/github.css'; import 'highlight.js/styles/github.css';
import { useTranslation } from "react-i18next"; import { useTranslation } from "react-i18next";
import { useThemeMode } from "../../context/theme-provider";
interface RunContentProps { interface RunContentProps {
row: Data, row: Data,
@@ -54,6 +55,8 @@ export const RunContent = ({ row, currentLog, interpretationInProgress, logEndRe
const [legacyColumns, setLegacyColumns] = useState<string[]>([]); const [legacyColumns, setLegacyColumns] = useState<string[]>([]);
const [isLegacyData, setIsLegacyData] = useState<boolean>(false); const [isLegacyData, setIsLegacyData] = useState<boolean>(false);
const { darkMode } = useThemeMode();
useEffect(() => { useEffect(() => {
setTab(tab); setTab(tab);
}, [interpretationInProgress]); }, [interpretationInProgress]);
@@ -191,17 +194,27 @@ export const RunContent = ({ row, currentLog, interpretationInProgress, logEndRe
}; };
// Function to convert table data to CSV format // Function to convert table data to CSV format
const convertToCSV = (data: any[], columns: string[]): string => { const convertToCSV = (data: any[], columns: string[], isSchemaData: boolean = false): string => {
const header = columns.join(','); if (isSchemaData) {
const rows = data.map(row => // For schema data, export as Label-Value pairs
columns.map(col => JSON.stringify(row[col] || "", null, 2)).join(',') const header = 'Label,Value';
); const rows = columns.map(column =>
return [header, ...rows].join('\n'); `"${column}","${data[0][column] || ""}"`
);
return [header, ...rows].join('\n');
} else {
// For regular table data, export as normal table
const header = columns.join(',');
const rows = data.map(row =>
columns.map(col => JSON.stringify(row[col] || "", null, 2)).join(',')
);
return [header, ...rows].join('\n');
}
}; };
// Function to download a specific dataset as CSV // Function to download a specific dataset as CSV
const downloadCSV = (data: any[], columns: string[], filename: string) => { const downloadCSV = (data: any[], columns: string[], filename: string, isSchemaData: boolean = false) => {
const csvContent = convertToCSV(data, columns); const csvContent = convertToCSV(data, columns, isSchemaData);
const blob = new Blob([csvContent], { type: 'text/csv;charset=utf-8;' }); const blob = new Blob([csvContent], { type: 'text/csv;charset=utf-8;' });
const url = URL.createObjectURL(blob); const url = URL.createObjectURL(blob);
@@ -262,8 +275,7 @@ export const RunContent = ({ row, currentLog, interpretationInProgress, logEndRe
if (!currentData || currentData.length === 0) return null; if (!currentData || currentData.length === 0) return null;
const downloadData = isPaginatedList ? currentData : data; const isSchemaData = title.toLowerCase().includes('text') || title.toLowerCase().includes('schema');
const downloadColumns = isPaginatedList ? currentColumns : columns;
return ( return (
<Accordion defaultExpanded sx={{ mb: 2 }}> <Accordion defaultExpanded sx={{ mb: 2 }}>
@@ -283,7 +295,7 @@ export const RunContent = ({ row, currentLog, interpretationInProgress, logEndRe
<Box> <Box>
<Button <Button
component="a" component="a"
onClick={() => downloadJSON(downloadData, jsonFilename)} onClick={() => downloadJSON(currentData, jsonFilename)}
sx={{ sx={{
color: '#FF00C3', color: '#FF00C3',
textTransform: 'none', textTransform: 'none',
@@ -297,12 +309,12 @@ export const RunContent = ({ row, currentLog, interpretationInProgress, logEndRe
} }
}} }}
> >
Download as JSON {t('run_content.captured_data.download_json', 'Download as JSON')}
</Button> </Button>
<Button <Button
component="a" component="a"
onClick={() => downloadCSV(downloadData, downloadColumns, csvFilename)} onClick={() => downloadCSV(currentData, currentColumns, csvFilename, isSchemaData)}
sx={{ sx={{
color: '#FF00C3', color: '#FF00C3',
textTransform: 'none', textTransform: 'none',
@@ -315,7 +327,7 @@ export const RunContent = ({ row, currentLog, interpretationInProgress, logEndRe
} }
}} }}
> >
Download as CSV {t('run_content.captured_data.download_csv', 'Download as CSV')}
</Button> </Button>
</Box> </Box>
@@ -338,6 +350,7 @@ export const RunContent = ({ row, currentLog, interpretationInProgress, logEndRe
onClick={() => navigateListTable('next')} onClick={() => navigateListTable('next')}
disabled={currentListIndex === listData.length - 1} disabled={currentListIndex === listData.length - 1}
sx={{ sx={{
borderColor: '#FF00C3',
color: currentListIndex === listData.length - 1 ? 'gray' : '#FF00C3', color: currentListIndex === listData.length - 1 ? 'gray' : '#FF00C3',
'&.Mui-disabled': { '&.Mui-disabled': {
borderColor: 'rgba(0, 0, 0, 0.12)' borderColor: 'rgba(0, 0, 0, 0.12)'
@@ -353,21 +366,66 @@ export const RunContent = ({ row, currentLog, interpretationInProgress, logEndRe
<Table stickyHeader aria-label="sticky table"> <Table stickyHeader aria-label="sticky table">
<TableHead> <TableHead>
<TableRow> <TableRow>
{(isPaginatedList ? currentColumns : columns).map((column) => ( {isSchemaData ? (
<TableCell key={column}>{column}</TableCell> <>
))} <TableCell
sx={{
borderBottom: '1px solid',
borderColor: darkMode ? '#3a4453' : '#dee2e6',
backgroundColor: darkMode ? '#2a3441' : '#f8f9fa'
}}
>
Label
</TableCell>
<TableCell
sx={{
borderBottom: '1px solid',
borderColor: darkMode ? '#3a4453' : '#dee2e6',
backgroundColor: darkMode ? '#2a3441' : '#f8f9fa'
}}
>
Value
</TableCell>
</>
) : (
(isPaginatedList ? currentColumns : columns).map((column) => (
<TableCell
key={column}
sx={{
borderBottom: '1px solid',
borderColor: darkMode ? '#3a4453' : '#dee2e6',
backgroundColor: darkMode ? '#2a3441' : '#f8f9fa'
}}
>
{column}
</TableCell>
))
)}
</TableRow> </TableRow>
</TableHead> </TableHead>
<TableBody> <TableBody>
{(isPaginatedList ? currentData : data).map((row, index) => ( {isSchemaData ? (
<TableRow key={index}> currentColumns.map((column) => (
{(isPaginatedList ? currentColumns : columns).map((column) => ( <TableRow key={column}>
<TableCell key={column}> <TableCell sx={{ fontWeight: 500 }}>
{row[column] === undefined || row[column] === "" ? "-" : row[column]} {column}
</TableCell> </TableCell>
))} <TableCell>
</TableRow> {currentData[0][column] === undefined || currentData[0][column] === "" ? "-" : currentData[0][column]}
))} </TableCell>
</TableRow>
))
) : (
currentData.map((row, index) => (
<TableRow key={index}>
{(isPaginatedList ? currentColumns : columns).map((column) => (
<TableCell key={column}>
{row[column] === undefined || row[column] === "" ? "-" : row[column]}
</TableCell>
))}
</TableRow>
))
)}
</TableBody> </TableBody>
</Table> </Table>
</TableContainer> </TableContainer>