feat: add max queue integration tasks

This commit is contained in:
Rohit Rajan
2025-11-29 11:11:02 +05:30
parent 66d31d2c65
commit 0e6bc22dce
2 changed files with 93 additions and 38 deletions

View File

@@ -18,8 +18,25 @@ interface SerializableOutput {
const MAX_RETRIES = 3; const MAX_RETRIES = 3;
const BASE_API_DELAY = 2000; const BASE_API_DELAY = 2000;
const MAX_QUEUE_SIZE = 1000;
export let airtableUpdateTasks: { [runId: string]: AirtableUpdateTask } = {}; export let airtableUpdateTasks: { [runId: string]: AirtableUpdateTask } = {};
let isProcessingAirtable = false;
export function addAirtableUpdateTask(runId: string, task: AirtableUpdateTask): boolean {
const currentSize = Object.keys(airtableUpdateTasks).length;
if (currentSize >= MAX_QUEUE_SIZE) {
logger.log('warn', `Airtable task queue full (${currentSize}/${MAX_QUEUE_SIZE}), dropping oldest task`);
const oldestKey = Object.keys(airtableUpdateTasks)[0];
if (oldestKey) {
delete airtableUpdateTasks[oldestKey];
}
}
airtableUpdateTasks[runId] = task;
return true;
}
async function refreshAirtableToken(refreshToken: string) { async function refreshAirtableToken(refreshToken: string) {
try { try {
@@ -44,15 +61,13 @@ async function refreshAirtableToken(refreshToken: string) {
} }
} }
function mergeRelatedData(serializableOutput: SerializableOutput, binaryOutput: Record<string, string>) { function mergeRelatedData(serializableOutput: SerializableOutput, binaryOutput: Record<string, string>) {
const allRecords: Record<string, any>[] = []; const allRecords: Record<string, any>[] = [];
const schemaData: Array<{ Group: string; Field: string; Value: any }> = []; const schemaData: Array<{ Group: string; Field: string; Value: any }> = [];
const listData: any[] = []; const listData: any[] = [];
const screenshotData: Array<{key: string, url: string}> = []; const screenshotData: Array<{ key: string; url: string }> = [];
// Collect schema data
if (serializableOutput.scrapeSchema) { if (serializableOutput.scrapeSchema) {
if (Array.isArray(serializableOutput.scrapeSchema)) { if (Array.isArray(serializableOutput.scrapeSchema)) {
for (const schemaArray of serializableOutput.scrapeSchema) { for (const schemaArray of serializableOutput.scrapeSchema) {
@@ -82,8 +97,7 @@ function mergeRelatedData(serializableOutput: SerializableOutput, binaryOutput:
} }
} }
} }
// Collect list data
if (serializableOutput.scrapeList) { if (serializableOutput.scrapeList) {
if (Array.isArray(serializableOutput.scrapeList)) { if (Array.isArray(serializableOutput.scrapeList)) {
for (const listArray of serializableOutput.scrapeList) { for (const listArray of serializableOutput.scrapeList) {
@@ -107,8 +121,8 @@ function mergeRelatedData(serializableOutput: SerializableOutput, binaryOutput:
} }
} }
} }
// Collect screenshot data // Collect screenshot data (handles both string and object forms safely)
// if (binaryOutput && Object.keys(binaryOutput).length > 0) { // if (binaryOutput && Object.keys(binaryOutput).length > 0) {
// Object.entries(binaryOutput).forEach(([key, rawValue]: [string, any]) => { // Object.entries(binaryOutput).forEach(([key, rawValue]: [string, any]) => {
// if (!key || key.trim() === "") return; // if (!key || key.trim() === "") return;
@@ -136,37 +150,38 @@ function mergeRelatedData(serializableOutput: SerializableOutput, binaryOutput:
// } // }
// }); // });
// } // }
// Mix all data types together to create consecutive records // --- Merge all types into Airtable rows ---
const maxLength = Math.max(schemaData.length, listData.length, screenshotData.length); const maxLength = Math.max(schemaData.length, listData.length, screenshotData.length);
for (let i = 0; i < maxLength; i++) { for (let i = 0; i < maxLength; i++) {
const record: Record<string, any> = {}; const record: Record<string, any> = {};
if (i < schemaData.length) { if (i < schemaData.length) {
record.Group = schemaData[i].Group; record.Group = schemaData[i].Group;
record.Label = schemaData[i].Field; record.Label = schemaData[i].Field;
record.Value = schemaData[i].Value; record.Value = schemaData[i].Value;
} }
if (i < listData.length) { if (i < listData.length) {
Object.entries(listData[i]).forEach(([key, value]) => { Object.entries(listData[i] || {}).forEach(([key, value]) => {
if (value !== null && value !== undefined && value !== '') { if (value !== null && value !== undefined && value !== "") {
record[key] = value; record[key] = value;
} }
}); });
} }
if (i < screenshotData.length) { if (i < screenshotData.length) {
record.Key = screenshotData[i].key; record.Key = screenshotData[i].key;
record.Screenshot = screenshotData[i].url; record.Screenshot = screenshotData[i].url;
} }
if (Object.keys(record).length > 0) { if (Object.keys(record).length > 0) {
allRecords.push(record); allRecords.push(record);
} }
} }
// Push leftovers
for (let i = maxLength; i < schemaData.length; i++) { for (let i = maxLength; i < schemaData.length; i++) {
allRecords.push({ Label: schemaData[i].Field, Value: schemaData[i].Value }); allRecords.push({ Label: schemaData[i].Field, Value: schemaData[i].Value });
} }
@@ -179,7 +194,7 @@ function mergeRelatedData(serializableOutput: SerializableOutput, binaryOutput:
Screenshot: screenshotData[i].url, Screenshot: screenshotData[i].url,
}); });
} }
return allRecords; return allRecords;
} }
@@ -497,10 +512,18 @@ function isValidUrl(str: string): boolean {
} }
export const processAirtableUpdates = async () => { export const processAirtableUpdates = async () => {
const maxProcessingTime = 60000; if (isProcessingAirtable) {
const startTime = Date.now(); logger.log('info', 'Airtable processing already in progress, skipping');
return;
}
while (Date.now() - startTime < maxProcessingTime) { isProcessingAirtable = true;
try {
const maxProcessingTime = 60000;
const startTime = Date.now();
while (Date.now() - startTime < maxProcessingTime) {
let hasPendingTasks = false; let hasPendingTasks = false;
for (const runId in airtableUpdateTasks) { for (const runId in airtableUpdateTasks) {
@@ -535,9 +558,12 @@ export const processAirtableUpdates = async () => {
break; break;
} }
console.log('Waiting for 5 seconds before checking again...'); console.log('Waiting for 5 seconds before checking again...');
await new Promise(resolve => setTimeout(resolve, 5000)); await new Promise(resolve => setTimeout(resolve, 5000));
} }
console.log('Airtable processing completed or timed out'); console.log('Airtable processing completed or timed out');
} finally {
isProcessingAirtable = false;
}
}; };

View File

@@ -11,13 +11,31 @@ interface GoogleSheetUpdateTask {
} }
interface SerializableOutput { interface SerializableOutput {
scrapeSchema?: any[]; scrapeSchema?: Record<string, any[]>;
scrapeList?: any[]; scrapeList?: Record<string, any[]>;
} }
const MAX_RETRIES = 5; const MAX_RETRIES = 5;
const MAX_QUEUE_SIZE = 1000;
export let googleSheetUpdateTasks: { [runId: string]: GoogleSheetUpdateTask } = {}; export let googleSheetUpdateTasks: { [runId: string]: GoogleSheetUpdateTask } = {};
let isProcessingGoogleSheets = false;
export function addGoogleSheetUpdateTask(runId: string, task: GoogleSheetUpdateTask): boolean {
const currentSize = Object.keys(googleSheetUpdateTasks).length;
if (currentSize >= MAX_QUEUE_SIZE) {
logger.log('warn', `Google Sheets task queue full (${currentSize}/${MAX_QUEUE_SIZE}), dropping oldest task`);
const oldestKey = Object.keys(googleSheetUpdateTasks)[0];
if (oldestKey) {
delete googleSheetUpdateTasks[oldestKey];
}
}
googleSheetUpdateTasks[runId] = task;
return true;
}
export async function updateGoogleSheet(robotId: string, runId: string) { export async function updateGoogleSheet(robotId: string, runId: string) {
try { try {
@@ -144,7 +162,7 @@ async function ensureSheetExists(spreadsheetId: string, sheetName: string, robot
fields: 'sheets.properties.title' fields: 'sheets.properties.title'
}); });
const existingSheets = response.data.sheets?.map(sheet => sheet.properties?.title) || []; const existingSheets = response.data.sheets?.map((sheet: any) => sheet.properties?.title) || [];
if (!existingSheets.includes(sheetName)) { if (!existingSheets.includes(sheetName)) {
await sheets.spreadsheets.batchUpdate({ await sheets.spreadsheets.batchUpdate({
@@ -219,7 +237,7 @@ export async function writeDataToSheet(
refresh_token: robot.google_refresh_token, refresh_token: robot.google_refresh_token,
}); });
oauth2Client.on('tokens', async (tokens) => { oauth2Client.once('tokens', async (tokens: any) => {
if (tokens.refresh_token || tokens.access_token) { if (tokens.refresh_token || tokens.access_token) {
const robotModel = await Robot.findOne({ where: { 'recording_meta.id': robotId } }); const robotModel = await Robot.findOne({ where: { 'recording_meta.id': robotId } });
if (robotModel) { if (robotModel) {
@@ -292,10 +310,18 @@ export async function writeDataToSheet(
} }
export const processGoogleSheetUpdates = async () => { export const processGoogleSheetUpdates = async () => {
const maxProcessingTime = 60000; if (isProcessingGoogleSheets) {
const startTime = Date.now(); logger.log('info', 'Google Sheets processing already in progress, skipping');
return;
}
while (Date.now() - startTime < maxProcessingTime) { isProcessingGoogleSheets = true;
try {
const maxProcessingTime = 60000;
const startTime = Date.now();
while (Date.now() - startTime < maxProcessingTime) {
let hasPendingTasks = false; let hasPendingTasks = false;
for (const runId in googleSheetUpdateTasks) { for (const runId in googleSheetUpdateTasks) {
@@ -328,9 +354,12 @@ export const processGoogleSheetUpdates = async () => {
break; break;
} }
console.log('Waiting for 5 seconds before checking again...'); console.log('Waiting for 5 seconds before checking again...');
await new Promise(resolve => setTimeout(resolve, 5000)); await new Promise(resolve => setTimeout(resolve, 5000));
} }
console.log('Google Sheets processing completed or timed out'); console.log('Google Sheets processing completed or timed out');
} finally {
isProcessingGoogleSheets = false;
}
}; };