feat: add max queue integration tasks
This commit is contained in:
@@ -18,8 +18,25 @@ interface SerializableOutput {
|
|||||||
|
|
||||||
const MAX_RETRIES = 3;
|
const MAX_RETRIES = 3;
|
||||||
const BASE_API_DELAY = 2000;
|
const BASE_API_DELAY = 2000;
|
||||||
|
const MAX_QUEUE_SIZE = 1000;
|
||||||
|
|
||||||
export let airtableUpdateTasks: { [runId: string]: AirtableUpdateTask } = {};
|
export let airtableUpdateTasks: { [runId: string]: AirtableUpdateTask } = {};
|
||||||
|
let isProcessingAirtable = false;
|
||||||
|
|
||||||
|
export function addAirtableUpdateTask(runId: string, task: AirtableUpdateTask): boolean {
|
||||||
|
const currentSize = Object.keys(airtableUpdateTasks).length;
|
||||||
|
|
||||||
|
if (currentSize >= MAX_QUEUE_SIZE) {
|
||||||
|
logger.log('warn', `Airtable task queue full (${currentSize}/${MAX_QUEUE_SIZE}), dropping oldest task`);
|
||||||
|
const oldestKey = Object.keys(airtableUpdateTasks)[0];
|
||||||
|
if (oldestKey) {
|
||||||
|
delete airtableUpdateTasks[oldestKey];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
airtableUpdateTasks[runId] = task;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
async function refreshAirtableToken(refreshToken: string) {
|
async function refreshAirtableToken(refreshToken: string) {
|
||||||
try {
|
try {
|
||||||
@@ -44,15 +61,13 @@ async function refreshAirtableToken(refreshToken: string) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
function mergeRelatedData(serializableOutput: SerializableOutput, binaryOutput: Record<string, string>) {
|
function mergeRelatedData(serializableOutput: SerializableOutput, binaryOutput: Record<string, string>) {
|
||||||
const allRecords: Record<string, any>[] = [];
|
const allRecords: Record<string, any>[] = [];
|
||||||
|
|
||||||
const schemaData: Array<{ Group: string; Field: string; Value: any }> = [];
|
const schemaData: Array<{ Group: string; Field: string; Value: any }> = [];
|
||||||
const listData: any[] = [];
|
const listData: any[] = [];
|
||||||
const screenshotData: Array<{key: string, url: string}> = [];
|
const screenshotData: Array<{ key: string; url: string }> = [];
|
||||||
|
|
||||||
// Collect schema data
|
|
||||||
if (serializableOutput.scrapeSchema) {
|
if (serializableOutput.scrapeSchema) {
|
||||||
if (Array.isArray(serializableOutput.scrapeSchema)) {
|
if (Array.isArray(serializableOutput.scrapeSchema)) {
|
||||||
for (const schemaArray of serializableOutput.scrapeSchema) {
|
for (const schemaArray of serializableOutput.scrapeSchema) {
|
||||||
@@ -82,8 +97,7 @@ function mergeRelatedData(serializableOutput: SerializableOutput, binaryOutput:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Collect list data
|
|
||||||
if (serializableOutput.scrapeList) {
|
if (serializableOutput.scrapeList) {
|
||||||
if (Array.isArray(serializableOutput.scrapeList)) {
|
if (Array.isArray(serializableOutput.scrapeList)) {
|
||||||
for (const listArray of serializableOutput.scrapeList) {
|
for (const listArray of serializableOutput.scrapeList) {
|
||||||
@@ -107,8 +121,8 @@ function mergeRelatedData(serializableOutput: SerializableOutput, binaryOutput:
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Collect screenshot data
|
// Collect screenshot data (handles both string and object forms safely)
|
||||||
// if (binaryOutput && Object.keys(binaryOutput).length > 0) {
|
// if (binaryOutput && Object.keys(binaryOutput).length > 0) {
|
||||||
// Object.entries(binaryOutput).forEach(([key, rawValue]: [string, any]) => {
|
// Object.entries(binaryOutput).forEach(([key, rawValue]: [string, any]) => {
|
||||||
// if (!key || key.trim() === "") return;
|
// if (!key || key.trim() === "") return;
|
||||||
@@ -136,37 +150,38 @@ function mergeRelatedData(serializableOutput: SerializableOutput, binaryOutput:
|
|||||||
// }
|
// }
|
||||||
// });
|
// });
|
||||||
// }
|
// }
|
||||||
|
|
||||||
// Mix all data types together to create consecutive records
|
// --- Merge all types into Airtable rows ---
|
||||||
const maxLength = Math.max(schemaData.length, listData.length, screenshotData.length);
|
const maxLength = Math.max(schemaData.length, listData.length, screenshotData.length);
|
||||||
|
|
||||||
for (let i = 0; i < maxLength; i++) {
|
for (let i = 0; i < maxLength; i++) {
|
||||||
const record: Record<string, any> = {};
|
const record: Record<string, any> = {};
|
||||||
|
|
||||||
if (i < schemaData.length) {
|
if (i < schemaData.length) {
|
||||||
record.Group = schemaData[i].Group;
|
record.Group = schemaData[i].Group;
|
||||||
record.Label = schemaData[i].Field;
|
record.Label = schemaData[i].Field;
|
||||||
record.Value = schemaData[i].Value;
|
record.Value = schemaData[i].Value;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (i < listData.length) {
|
if (i < listData.length) {
|
||||||
Object.entries(listData[i]).forEach(([key, value]) => {
|
Object.entries(listData[i] || {}).forEach(([key, value]) => {
|
||||||
if (value !== null && value !== undefined && value !== '') {
|
if (value !== null && value !== undefined && value !== "") {
|
||||||
record[key] = value;
|
record[key] = value;
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
if (i < screenshotData.length) {
|
if (i < screenshotData.length) {
|
||||||
record.Key = screenshotData[i].key;
|
record.Key = screenshotData[i].key;
|
||||||
record.Screenshot = screenshotData[i].url;
|
record.Screenshot = screenshotData[i].url;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (Object.keys(record).length > 0) {
|
if (Object.keys(record).length > 0) {
|
||||||
allRecords.push(record);
|
allRecords.push(record);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Push leftovers
|
||||||
for (let i = maxLength; i < schemaData.length; i++) {
|
for (let i = maxLength; i < schemaData.length; i++) {
|
||||||
allRecords.push({ Label: schemaData[i].Field, Value: schemaData[i].Value });
|
allRecords.push({ Label: schemaData[i].Field, Value: schemaData[i].Value });
|
||||||
}
|
}
|
||||||
@@ -179,7 +194,7 @@ function mergeRelatedData(serializableOutput: SerializableOutput, binaryOutput:
|
|||||||
Screenshot: screenshotData[i].url,
|
Screenshot: screenshotData[i].url,
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
return allRecords;
|
return allRecords;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -497,10 +512,18 @@ function isValidUrl(str: string): boolean {
|
|||||||
}
|
}
|
||||||
|
|
||||||
export const processAirtableUpdates = async () => {
|
export const processAirtableUpdates = async () => {
|
||||||
const maxProcessingTime = 60000;
|
if (isProcessingAirtable) {
|
||||||
const startTime = Date.now();
|
logger.log('info', 'Airtable processing already in progress, skipping');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
while (Date.now() - startTime < maxProcessingTime) {
|
isProcessingAirtable = true;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const maxProcessingTime = 60000;
|
||||||
|
const startTime = Date.now();
|
||||||
|
|
||||||
|
while (Date.now() - startTime < maxProcessingTime) {
|
||||||
let hasPendingTasks = false;
|
let hasPendingTasks = false;
|
||||||
|
|
||||||
for (const runId in airtableUpdateTasks) {
|
for (const runId in airtableUpdateTasks) {
|
||||||
@@ -535,9 +558,12 @@ export const processAirtableUpdates = async () => {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log('Waiting for 5 seconds before checking again...');
|
console.log('Waiting for 5 seconds before checking again...');
|
||||||
await new Promise(resolve => setTimeout(resolve, 5000));
|
await new Promise(resolve => setTimeout(resolve, 5000));
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log('Airtable processing completed or timed out');
|
console.log('Airtable processing completed or timed out');
|
||||||
|
} finally {
|
||||||
|
isProcessingAirtable = false;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
@@ -11,13 +11,31 @@ interface GoogleSheetUpdateTask {
|
|||||||
}
|
}
|
||||||
|
|
||||||
interface SerializableOutput {
|
interface SerializableOutput {
|
||||||
scrapeSchema?: any[];
|
scrapeSchema?: Record<string, any[]>;
|
||||||
scrapeList?: any[];
|
scrapeList?: Record<string, any[]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
const MAX_RETRIES = 5;
|
const MAX_RETRIES = 5;
|
||||||
|
const MAX_QUEUE_SIZE = 1000;
|
||||||
|
|
||||||
export let googleSheetUpdateTasks: { [runId: string]: GoogleSheetUpdateTask } = {};
|
export let googleSheetUpdateTasks: { [runId: string]: GoogleSheetUpdateTask } = {};
|
||||||
|
let isProcessingGoogleSheets = false;
|
||||||
|
|
||||||
|
export function addGoogleSheetUpdateTask(runId: string, task: GoogleSheetUpdateTask): boolean {
|
||||||
|
const currentSize = Object.keys(googleSheetUpdateTasks).length;
|
||||||
|
|
||||||
|
if (currentSize >= MAX_QUEUE_SIZE) {
|
||||||
|
logger.log('warn', `Google Sheets task queue full (${currentSize}/${MAX_QUEUE_SIZE}), dropping oldest task`);
|
||||||
|
const oldestKey = Object.keys(googleSheetUpdateTasks)[0];
|
||||||
|
if (oldestKey) {
|
||||||
|
delete googleSheetUpdateTasks[oldestKey];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
googleSheetUpdateTasks[runId] = task;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
export async function updateGoogleSheet(robotId: string, runId: string) {
|
export async function updateGoogleSheet(robotId: string, runId: string) {
|
||||||
try {
|
try {
|
||||||
@@ -144,7 +162,7 @@ async function ensureSheetExists(spreadsheetId: string, sheetName: string, robot
|
|||||||
fields: 'sheets.properties.title'
|
fields: 'sheets.properties.title'
|
||||||
});
|
});
|
||||||
|
|
||||||
const existingSheets = response.data.sheets?.map(sheet => sheet.properties?.title) || [];
|
const existingSheets = response.data.sheets?.map((sheet: any) => sheet.properties?.title) || [];
|
||||||
|
|
||||||
if (!existingSheets.includes(sheetName)) {
|
if (!existingSheets.includes(sheetName)) {
|
||||||
await sheets.spreadsheets.batchUpdate({
|
await sheets.spreadsheets.batchUpdate({
|
||||||
@@ -219,7 +237,7 @@ export async function writeDataToSheet(
|
|||||||
refresh_token: robot.google_refresh_token,
|
refresh_token: robot.google_refresh_token,
|
||||||
});
|
});
|
||||||
|
|
||||||
oauth2Client.on('tokens', async (tokens) => {
|
oauth2Client.once('tokens', async (tokens: any) => {
|
||||||
if (tokens.refresh_token || tokens.access_token) {
|
if (tokens.refresh_token || tokens.access_token) {
|
||||||
const robotModel = await Robot.findOne({ where: { 'recording_meta.id': robotId } });
|
const robotModel = await Robot.findOne({ where: { 'recording_meta.id': robotId } });
|
||||||
if (robotModel) {
|
if (robotModel) {
|
||||||
@@ -292,10 +310,18 @@ export async function writeDataToSheet(
|
|||||||
}
|
}
|
||||||
|
|
||||||
export const processGoogleSheetUpdates = async () => {
|
export const processGoogleSheetUpdates = async () => {
|
||||||
const maxProcessingTime = 60000;
|
if (isProcessingGoogleSheets) {
|
||||||
const startTime = Date.now();
|
logger.log('info', 'Google Sheets processing already in progress, skipping');
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
while (Date.now() - startTime < maxProcessingTime) {
|
isProcessingGoogleSheets = true;
|
||||||
|
|
||||||
|
try {
|
||||||
|
const maxProcessingTime = 60000;
|
||||||
|
const startTime = Date.now();
|
||||||
|
|
||||||
|
while (Date.now() - startTime < maxProcessingTime) {
|
||||||
let hasPendingTasks = false;
|
let hasPendingTasks = false;
|
||||||
|
|
||||||
for (const runId in googleSheetUpdateTasks) {
|
for (const runId in googleSheetUpdateTasks) {
|
||||||
@@ -328,9 +354,12 @@ export const processGoogleSheetUpdates = async () => {
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log('Waiting for 5 seconds before checking again...');
|
console.log('Waiting for 5 seconds before checking again...');
|
||||||
await new Promise(resolve => setTimeout(resolve, 5000));
|
await new Promise(resolve => setTimeout(resolve, 5000));
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log('Google Sheets processing completed or timed out');
|
console.log('Google Sheets processing completed or timed out');
|
||||||
|
} finally {
|
||||||
|
isProcessingGoogleSheets = false;
|
||||||
|
}
|
||||||
};
|
};
|
||||||
Reference in New Issue
Block a user