From 3977b6feb4b8e056b15234df231fdbbb6f7a11c5 Mon Sep 17 00:00:00 2001 From: Rohit Date: Fri, 28 Mar 2025 17:24:58 +0530 Subject: [PATCH 001/106] feat: schedule routes using pgboss queue --- server/src/routes/storage.ts | 32 ++++++++++++++------------------ 1 file changed, 14 insertions(+), 18 deletions(-) diff --git a/server/src/routes/storage.ts b/server/src/routes/storage.ts index af7850f0..603cde14 100644 --- a/server/src/routes/storage.ts +++ b/server/src/routes/storage.ts @@ -22,7 +22,7 @@ import { encrypt, decrypt } from '../utils/auth'; import { WorkflowFile } from 'maxun-core'; import { Page } from 'playwright'; import { airtableUpdateTasks, processAirtableUpdates } from '../workflow-management/integrations/airtable'; -import { pgBoss } from '../pgboss-worker'; +import { cancelScheduledWorkflow, pgBoss, scheduleWorkflow } from '../pgboss-worker'; chromium.use(stealthPlugin()); export const router = Router(); @@ -792,17 +792,13 @@ router.put('/schedule/:id/', requireSignIn, async (req: AuthenticatedRequest, re return res.status(401).json({ error: 'Unauthorized' }); } - // Create the job in the queue with the cron expression - const job = await workflowQueue.add( - 'run workflow', - { id, runId: uuid(), userId: req.user.id }, - { - repeat: { - pattern: cronExpression, - tz: timezone, - }, - } - ); + try { + await cancelScheduledWorkflow(id); + } catch (cancelError) { + logger.log('warn', `Failed to cancel existing schedule for robot ${id}: ${cancelError}`); + } + + const jobId = await scheduleWorkflow(id, req.user.id, cronExpression, timezone); const nextRunAt = computeNextRun(cronExpression, timezone); @@ -877,12 +873,12 @@ router.delete('/schedule/:id', requireSignIn, async (req: AuthenticatedRequest, return res.status(404).json({ error: 'Robot not found' }); } - // Remove existing job from queue if it exists - const existingJobs = await workflowQueue.getJobs(['delayed', 'waiting']); - for (const job of existingJobs) { - if (job.data.id === id) { - await job.remove(); - } + // Cancel the scheduled job in PgBoss + try { + await cancelScheduledWorkflow(id); + } catch (error) { + logger.log('error', `Error cancelling scheduled job for robot ${id}: ${error}`); + // Continue with robot update even if cancellation fails } // Delete the schedule from the robot From 5a0cff6d6a6a44f65e0653ac5711d220b24c5242 Mon Sep 17 00:00:00 2001 From: Karishma Date: Fri, 28 Mar 2025 21:03:01 +0530 Subject: [PATCH 002/106] feat: only install chromium binary --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c3d3b455..acf54028 100644 --- a/README.md +++ b/README.md @@ -65,7 +65,7 @@ npm install cd .. # make sure playwright is properly initialized -npx playwright install +npx playwright install chromium npx playwright install-deps # get back to the root directory From 7c15823e38ddc75dc556b14e63f84abef4027b86 Mon Sep 17 00:00:00 2001 From: Karishma Date: Fri, 28 Mar 2025 21:04:20 +0530 Subject: [PATCH 003/106] feat: install system deps only for chromium --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index acf54028..c7ee47db 100644 --- a/README.md +++ b/README.md @@ -66,7 +66,7 @@ cd .. # make sure playwright is properly initialized npx playwright install chromium -npx playwright install-deps +npx playwright install-deps chromium # get back to the root directory cd .. From f690c7fed042b187752c099d347bfa9ef6aefa1e Mon Sep 17 00:00:00 2001 From: Karishma Date: Fri, 28 Mar 2025 21:07:33 +0530 Subject: [PATCH 004/106] chore: better command --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c7ee47db..97a8e9c9 100644 --- a/README.md +++ b/README.md @@ -64,7 +64,7 @@ npm install # get back to the root directory cd .. -# make sure playwright is properly initialized +# install chromium and its dependencies npx playwright install chromium npx playwright install-deps chromium From 9b6006cafc16ddffe0a368dfb9a37e9fedd519db Mon Sep 17 00:00:00 2001 From: Karishma Date: Fri, 28 Mar 2025 21:08:09 +0530 Subject: [PATCH 005/106] feat: use single command --- README.md | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/README.md b/README.md index 97a8e9c9..ac1a09ca 100644 --- a/README.md +++ b/README.md @@ -65,8 +65,7 @@ npm install cd .. # install chromium and its dependencies -npx playwright install chromium -npx playwright install-deps chromium +npx playwright install --with-deps chromium # get back to the root directory cd .. From 28b3f650d6812c8eb906d31fdf19e917a2855791 Mon Sep 17 00:00:00 2001 From: Rohit Date: Sat, 29 Mar 2025 18:30:11 +0530 Subject: [PATCH 006/106] feat: add scheduler pgboss worker functions --- server/src/pgboss-worker.ts | 181 ++++++++++++++++++++++++++++++++++++ 1 file changed, 181 insertions(+) diff --git a/server/src/pgboss-worker.ts b/server/src/pgboss-worker.ts index b775c99c..542684b9 100644 --- a/server/src/pgboss-worker.ts +++ b/server/src/pgboss-worker.ts @@ -21,9 +21,13 @@ import { googleSheetUpdateTasks, processGoogleSheetUpdates } from './workflow-ma import { airtableUpdateTasks, processAirtableUpdates } from './workflow-management/integrations/airtable'; import { RemoteBrowser } from './browser-management/classes/RemoteBrowser'; import { io as serverIo } from "./server"; +import { computeNextRun } from './utils/schedule'; +import { handleRunRecording } from './workflow-management/scheduler'; const pgBossConnectionString = `postgres://${process.env.DB_USER}:${process.env.DB_PASSWORD}@${process.env.DB_HOST}:${process.env.DB_PORT}/${process.env.DB_NAME}`; +const registeredQueues = new Set(); + interface InitializeBrowserData { userId: string; } @@ -41,6 +45,12 @@ interface DestroyBrowserData { userId: string; } +interface ScheduledWorkflowData { + id: string; + runId: string; + userId: string; +} + interface ExecuteRunData { userId: string; runId: string; @@ -161,6 +171,69 @@ async function checkAndProcessQueuedRun(userId: string, browserId: string): Prom } } +/** + * Utility function to schedule a cron job using PgBoss + * @param id The robot ID + * @param userId The user ID + * @param cronExpression The cron expression for scheduling + * @param timezone The timezone for the cron expression + */ +export async function scheduleWorkflow(id: string, userId: string, cronExpression: string, timezone: string): Promise { + try { + const runId = require('uuidv4').uuid(); + + const queueName = `scheduled-workflow-${id}`; + + logger.log('info', `Scheduling workflow ${id} with cron expression ${cronExpression} in timezone ${timezone}`); + + await pgBoss.createQueue(queueName); + + await pgBoss.schedule(queueName, cronExpression, + { id, runId, userId }, + { tz: timezone } + ); + + logger.log('info', `Scheduled workflow job for robot ${id}`); + } catch (error: unknown) { + const errorMessage = error instanceof Error ? error.message : String(error); + logger.log('error', `Failed to schedule workflow: ${errorMessage}`); + throw error; + } +} + +/** + * Utility function to cancel a scheduled job + * @param robotId The robot ID + * @returns true if successful + */ +export async function cancelScheduledWorkflow(robotId: string) { + try { + const jobs = await pgBoss.getSchedules(); + + console.log("Scheduled JOBS", jobs); + + const matchingJobs = jobs.filter((job: any) => { + try { + const data = JSON.parse(job.data); + return data && data.id === robotId; + } catch { + return false; + } + }); + + for (const job of matchingJobs) { + logger.log('info', `Cancelling scheduled job ${job.name} for robot ${robotId}`); + await pgBoss.unschedule(job.name); + } + + return true; + } catch (error: unknown) { + const errorMessage = error instanceof Error ? error.message : String(error); + logger.log('error', `Failed to cancel scheduled workflow: ${errorMessage}`); + throw error; + } +} + /** * Modified processRunExecution function - only add browser reset */ @@ -359,6 +432,107 @@ async function processRunExecution(job: Job) { } } +/** + * Process a scheduled workflow job + */ +async function processScheduledWorkflow(job: Job) { + const { id, runId, userId } = job.data; + logger.log('info', `Processing scheduled workflow job for robotId: ${id}, runId: ${runId}, userId: ${userId}`); + + try { + // Execute the workflow using the existing handleRunRecording function + const result = await handleRunRecording(id, userId); + + // Update the robot's schedule with last run and next run times + const robot = await Robot.findOne({ where: { 'recording_meta.id': id } }); + if (robot && robot.schedule && robot.schedule.cronExpression && robot.schedule.timezone) { + // Update lastRunAt to the current time + const lastRunAt = new Date(); + + // Compute the next run date + const nextRunAt = computeNextRun(robot.schedule.cronExpression, robot.schedule.timezone) || undefined; + + await robot.update({ + schedule: { + ...robot.schedule, + lastRunAt, + nextRunAt, + }, + }); + + logger.log('info', `Updated robot ${id} schedule - next run at: ${nextRunAt}`); + } else { + logger.log('error', `Robot ${id} schedule, cronExpression, or timezone is missing.`); + } + + return { success: true }; + } catch (error: unknown) { + const errorMessage = error instanceof Error ? error.message : String(error); + logger.log('error', `Scheduled workflow job failed: ${errorMessage}`); + return { success: false }; + } +} + +/** + * Register a worker to handle scheduled workflow jobs + */ +async function registerScheduledWorkflowWorker() { + try { + // First, get a list of all existing robots + const robots = await Robot.findAll({ + attributes: ['recording_meta.id'], + raw: true + }); + + // Register a worker for each potential robot queue + for (const robot of robots) { + if (robot.recording_meta && robot.recording_meta.id) { + const queueName = `scheduled-workflow-${robot.recording_meta.id}`; + await registerWorkerForQueue(queueName); + } + } + + // Also register workers for any existing PgBoss queues that follow our naming pattern + const queues = await pgBoss.getQueues(); + for (const queue of queues) { + if (queue.name.startsWith('scheduled-workflow-') && + !queue.name.endsWith('_error') && + !queue.name.endsWith('_completed')) { + await registerWorkerForQueue(queue.name); + } + } + + logger.log('info', 'Scheduled workflow workers registered successfully'); + } catch (error: unknown) { + const errorMessage = error instanceof Error ? error.message : String(error); + logger.log('error', `Failed to register scheduled workflow workers: ${errorMessage}`); + } +} + +async function registerWorkerForQueue(queueName: string) { + try { + if (registeredQueues.has(queueName)) { + return; + } + + await pgBoss.work(queueName, async (job: Job | Job[]) => { + try { + const singleJob = Array.isArray(job) ? job[0] : job; + return await processScheduledWorkflow(singleJob); + } catch (error: unknown) { + const errorMessage = error instanceof Error ? error.message : String(error); + logger.log('error', `Scheduled workflow job failed in queue ${queueName}: ${errorMessage}`); + throw error; + } + }); + + registeredQueues.add(queueName); + logger.log('info', `Registered worker for queue: ${queueName}`); + } catch (error: unknown) { + const errorMessage = error instanceof Error ? error.message : String(error); + logger.log('error', `Failed to register worker for queue ${queueName}: ${errorMessage}`); + } +} async function registerRunExecutionWorker() { try { @@ -495,6 +669,9 @@ async function startWorkers() { // Register the run execution worker await registerRunExecutionWorker(); + // Register the scheduled workflow worker + await registerScheduledWorkflowWorker(); + logger.log('info', 'All recording workers registered successfully'); } catch (error: unknown) { const errorMessage = error instanceof Error ? error.message : String(error); @@ -506,6 +683,10 @@ async function startWorkers() { // Start all workers startWorkers(); +pgBoss.on('error', (error) => { + logger.log('error', `PgBoss error: ${error.message}`); +}); + // Handle graceful shutdown process.on('SIGTERM', async () => { logger.log('info', 'SIGTERM received, shutting down PgBoss...'); From 54c0a8768154dfc053108c6bd1e02a6d33a3311c Mon Sep 17 00:00:00 2001 From: Rohit Date: Sat, 29 Mar 2025 18:58:44 +0530 Subject: [PATCH 007/106] feat: rm job data parsing --- server/src/pgboss-worker.ts | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/server/src/pgboss-worker.ts b/server/src/pgboss-worker.ts index 542684b9..8373b680 100644 --- a/server/src/pgboss-worker.ts +++ b/server/src/pgboss-worker.ts @@ -209,12 +209,10 @@ export async function scheduleWorkflow(id: string, userId: string, cronExpressio export async function cancelScheduledWorkflow(robotId: string) { try { const jobs = await pgBoss.getSchedules(); - - console.log("Scheduled JOBS", jobs); const matchingJobs = jobs.filter((job: any) => { try { - const data = JSON.parse(job.data); + const data = job.data; return data && data.id === robotId; } catch { return false; From a4281e1876e2b39cba2ee72fc29915afb4e4003b Mon Sep 17 00:00:00 2001 From: amhsirak Date: Sat, 29 Mar 2025 20:04:03 +0530 Subject: [PATCH 008/106] hotfix: memory management --- server/src/browser-management/classes/RemoteBrowser.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/browser-management/classes/RemoteBrowser.ts b/server/src/browser-management/classes/RemoteBrowser.ts index c7bc3ef2..72098a1f 100644 --- a/server/src/browser-management/classes/RemoteBrowser.ts +++ b/server/src/browser-management/classes/RemoteBrowser.ts @@ -414,7 +414,7 @@ export class RemoteBrowser { } } - this.initializeMemoryManagement(); + // this.initializeMemoryManagement(); }; public updateViewportInfo = async (): Promise => { From 7a75b9d2673599c60cf900ac9d344bfdedf9e5dc Mon Sep 17 00:00:00 2001 From: Rohit Date: Sun, 30 Mar 2025 00:20:12 +0530 Subject: [PATCH 009/106] feat: get user id from namespace auth --- server/src/socket-connection/connection.ts | 105 ++++++++++++++------- 1 file changed, 71 insertions(+), 34 deletions(-) diff --git a/server/src/socket-connection/connection.ts b/server/src/socket-connection/connection.ts index 9fad861b..66294ecf 100644 --- a/server/src/socket-connection/connection.ts +++ b/server/src/socket-connection/connection.ts @@ -1,6 +1,6 @@ import { Namespace, Socket } from 'socket.io'; import { IncomingMessage } from 'http'; -import { verify, JwtPayload } from 'jsonwebtoken'; +import { verify, JwtPayload, sign } from 'jsonwebtoken'; import logger from "../logger"; import registerInputHandlers from '../browser-management/inputHandlers'; @@ -12,48 +12,85 @@ interface AuthenticatedSocket extends Socket { request: AuthenticatedIncomingMessage; } +declare global { + var userContextMap: Map; +} + +if (!global.userContextMap) { + global.userContextMap = new Map(); +} + +/** + * Register browser-user association in the global context map + */ +export function registerBrowserUserContext(browserId: string, userId: string) { + if (!global.userContextMap) { + global.userContextMap = new Map(); + } + global.userContextMap.set(browserId, userId); + logger.log('debug', `Registered browser-user association: ${browserId} -> ${userId}`); +} + /** * Socket.io middleware for authentication * This is a socket.io specific auth handler that doesn't rely on Express middleware */ const socketAuthMiddleware = (socket: Socket, next: (err?: Error) => void) => { - const cookies = socket.handshake.headers.cookie; - if (!cookies) { - return next(new Error('Authentication required')); + // Extract browserId from namespace + const namespace = socket.nsp.name; + const browserId = namespace.slice(1); + + // Check if this browser is in our context map + if (global.userContextMap && global.userContextMap.has(browserId)) { + const userId = global.userContextMap.get(browserId); + logger.log('debug', `Found browser in context map: ${browserId} -> ${userId}`); + + const authSocket = socket as AuthenticatedSocket; + authSocket.request.user = { id: userId }; + return next(); + } + + const cookies = socket.handshake.headers.cookie; + if (!cookies) { + logger.log('debug', `No cookies found in socket handshake for ${browserId}`); + return next(new Error('Authentication required')); + } + + const tokenMatch = cookies.split(';').find(c => c.trim().startsWith('token=')); + if (!tokenMatch) { + logger.log('debug', `No token cookie found in socket handshake for ${browserId}`); + return next(new Error('Authentication required')); + } + + const token = tokenMatch.split('=')[1]; + if (!token) { + logger.log('debug', `Empty token value in cookie for ${browserId}`); + return next(new Error('Authentication required')); + } + + const secret = process.env.JWT_SECRET; + if (!secret) { + logger.error('JWT_SECRET environment variable is not defined'); + return next(new Error('Server configuration error')); + } + + verify(token, secret, (err: any, user: any) => { + if (err) { + logger.log('warn', `JWT verification error: ${err.message}`); + return next(new Error('Authentication failed')); } - const tokenMatch = cookies.split(';').find(c => c.trim().startsWith('token=')); - if (!tokenMatch) { - return next(new Error('Authentication required')); + // Normalize payload key + if (user.userId && !user.id) { + user.id = user.userId; + delete user.userId; } - const token = tokenMatch.split('=')[1]; - if (!token) { - return next(new Error('Authentication required')); - } - - const secret = process.env.JWT_SECRET; - if (!secret) { - return next(new Error('Server configuration error')); - } - - verify(token, secret, (err: any, user: any) => { - if (err) { - logger.log('warn', 'JWT verification error:', err); - return next(new Error('Authentication failed')); - } - - // Normalize payload key - if (user.userId && !user.id) { - user.id = user.userId; - delete user.userId; // temporary: del the old key for clarity - } - - // Attach user to socket request - const authSocket = socket as AuthenticatedSocket; - authSocket.request.user = user; - next(); - }); + // Attach user to socket request + const authSocket = socket as AuthenticatedSocket; + authSocket.request.user = user; + next(); + }); }; /** From d13e9d56cef50449810b647d7d277648d6f21078 Mon Sep 17 00:00:00 2001 From: Rohit Date: Sun, 30 Mar 2025 00:21:24 +0530 Subject: [PATCH 010/106] feat: register browser user context --- server/src/browser-management/controller.ts | 24 ++++++++++++++------- 1 file changed, 16 insertions(+), 8 deletions(-) diff --git a/server/src/browser-management/controller.ts b/server/src/browser-management/controller.ts index 2cd9498d..ef1e0011 100644 --- a/server/src/browser-management/controller.ts +++ b/server/src/browser-management/controller.ts @@ -5,7 +5,7 @@ import { Socket } from "socket.io"; import { uuid } from 'uuidv4'; -import { createSocketConnection, createSocketConnectionForRun } from "../socket-connection/connection"; +import { createSocketConnection, createSocketConnectionForRun, registerBrowserUserContext } from "../socket-connection/connection"; import { io, browserPool } from "../server"; import { RemoteBrowser } from "./classes/RemoteBrowser"; import { RemoteBrowserOptions } from "../types"; @@ -48,19 +48,27 @@ export const initializeRemoteBrowserForRecording = (userId: string): string => { * Starts and initializes a {@link RemoteBrowser} instance for interpretation. * Creates a new {@link Socket} connection over a dedicated namespace. * Returns the new remote browser's generated id. - * @param options {@link RemoteBrowserOptions} to be used when launching the browser - * @returns string + * @param userId User ID for browser ownership + * @returns string Browser ID * @category BrowserManagement-Controller */ export const createRemoteBrowserForRun = (userId: string): string => { const id = uuid(); + + registerBrowserUserContext(id, userId); + logger.log('debug', `Created new browser for run: ${id} for user: ${userId}`); + createSocketConnectionForRun( - io.of(id), + io.of(`/${id}`), async (socket: Socket) => { - const browserSession = new RemoteBrowser(socket, userId); - await browserSession.initialize(userId); - browserPool.addRemoteBrowser(id, browserSession, userId, false, "run"); - socket.emit('ready-for-run'); + try { + const browserSession = new RemoteBrowser(socket, userId); + await browserSession.initialize(userId); + browserPool.addRemoteBrowser(id, browserSession, userId, false, "run"); + socket.emit('ready-for-run'); + } catch (error: any) { + logger.error(`Error initializing browser: ${error.message}`); + } }); return id; }; From b2cd7c7bc23ce388e2bb2d8db6bd6dd6038a5951 Mon Sep 17 00:00:00 2001 From: Rohit Date: Sun, 30 Mar 2025 00:22:40 +0530 Subject: [PATCH 011/106] feat: register worker for queue --- server/src/pgboss-worker.ts | 28 ++++++---------------------- 1 file changed, 6 insertions(+), 22 deletions(-) diff --git a/server/src/pgboss-worker.ts b/server/src/pgboss-worker.ts index 8373b680..190afdad 100644 --- a/server/src/pgboss-worker.ts +++ b/server/src/pgboss-worker.ts @@ -192,6 +192,8 @@ export async function scheduleWorkflow(id: string, userId: string, cronExpressio { id, runId, userId }, { tz: timezone } ); + + await registerWorkerForQueue(queueName); logger.log('info', `Scheduled workflow job for robot ${id}`); } catch (error: unknown) { @@ -476,28 +478,10 @@ async function processScheduledWorkflow(job: Job) { */ async function registerScheduledWorkflowWorker() { try { - // First, get a list of all existing robots - const robots = await Robot.findAll({ - attributes: ['recording_meta.id'], - raw: true - }); - - // Register a worker for each potential robot queue - for (const robot of robots) { - if (robot.recording_meta && robot.recording_meta.id) { - const queueName = `scheduled-workflow-${robot.recording_meta.id}`; - await registerWorkerForQueue(queueName); - } - } - - // Also register workers for any existing PgBoss queues that follow our naming pattern - const queues = await pgBoss.getQueues(); - for (const queue of queues) { - if (queue.name.startsWith('scheduled-workflow-') && - !queue.name.endsWith('_error') && - !queue.name.endsWith('_completed')) { - await registerWorkerForQueue(queue.name); - } + const jobs = await pgBoss.getSchedules(); + for (const job of jobs) { + await pgBoss.createQueue(job.name); + await registerWorkerForQueue(job.name); } logger.log('info', 'Scheduled workflow workers registered successfully'); From 10970b563c1495655302de7c30dd67b244b5ce13 Mon Sep 17 00:00:00 2001 From: Rohit Date: Sun, 30 Mar 2025 00:23:16 +0530 Subject: [PATCH 012/106] feat: get browser from id --- server/src/workflow-management/scheduler/index.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/workflow-management/scheduler/index.ts b/server/src/workflow-management/scheduler/index.ts index b67e1ca0..8267fbb8 100644 --- a/server/src/workflow-management/scheduler/index.ts +++ b/server/src/workflow-management/scheduler/index.ts @@ -114,7 +114,7 @@ async function executeRun(id: string, userId: string) { plainRun.status = 'running'; - const browser = browserPool.getRemoteBrowser(userId); + const browser = browserPool.getRemoteBrowser(plainRun.browserId); if (!browser) { throw new Error('Could not access browser'); } From 47f9fd7eb0c6365074fee59fc7cb3a08c4d1e7fa Mon Sep 17 00:00:00 2001 From: Rohit Date: Sun, 30 Mar 2025 00:39:10 +0530 Subject: [PATCH 013/106] feat: refactor cron expressions for pgboss --- server/src/routes/storage.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/routes/storage.ts b/server/src/routes/storage.ts index 603cde14..e4ed5444 100644 --- a/server/src/routes/storage.ts +++ b/server/src/routes/storage.ts @@ -761,7 +761,7 @@ router.put('/schedule/:id/', requireSignIn, async (req: AuthenticatedRequest, re switch (runEveryUnit) { case 'MINUTES': - cronExpression = `${startMinutes} */${runEvery} * * *`; + cronExpression = `*/${runEvery} * * * *`; break; case 'HOURS': cronExpression = `${startMinutes} */${runEvery} * * *`; @@ -774,7 +774,7 @@ router.put('/schedule/:id/', requireSignIn, async (req: AuthenticatedRequest, re break; case 'MONTHS': // todo: handle leap year - cronExpression = `0 ${atTimeStart} ${dayOfMonth} * *`; + cronExpression = `${startMinutes} ${startHours} ${dayOfMonth} */${runEvery} *`; if (startFrom !== 'SUNDAY') { cronExpression += ` ${dayIndex}`; } From 660da53fba6220521057c91ab991b7d687cc5478 Mon Sep 17 00:00:00 2001 From: Rohit Date: Sun, 30 Mar 2025 03:30:34 +0530 Subject: [PATCH 014/106] feat: rm scheduling logic --- server/src/pgboss-worker.ts | 160 ------------------------------------ 1 file changed, 160 deletions(-) diff --git a/server/src/pgboss-worker.ts b/server/src/pgboss-worker.ts index 190afdad..269f6773 100644 --- a/server/src/pgboss-worker.ts +++ b/server/src/pgboss-worker.ts @@ -21,13 +21,9 @@ import { googleSheetUpdateTasks, processGoogleSheetUpdates } from './workflow-ma import { airtableUpdateTasks, processAirtableUpdates } from './workflow-management/integrations/airtable'; import { RemoteBrowser } from './browser-management/classes/RemoteBrowser'; import { io as serverIo } from "./server"; -import { computeNextRun } from './utils/schedule'; -import { handleRunRecording } from './workflow-management/scheduler'; const pgBossConnectionString = `postgres://${process.env.DB_USER}:${process.env.DB_PASSWORD}@${process.env.DB_HOST}:${process.env.DB_PORT}/${process.env.DB_NAME}`; -const registeredQueues = new Set(); - interface InitializeBrowserData { userId: string; } @@ -45,12 +41,6 @@ interface DestroyBrowserData { userId: string; } -interface ScheduledWorkflowData { - id: string; - runId: string; - userId: string; -} - interface ExecuteRunData { userId: string; runId: string; @@ -171,69 +161,6 @@ async function checkAndProcessQueuedRun(userId: string, browserId: string): Prom } } -/** - * Utility function to schedule a cron job using PgBoss - * @param id The robot ID - * @param userId The user ID - * @param cronExpression The cron expression for scheduling - * @param timezone The timezone for the cron expression - */ -export async function scheduleWorkflow(id: string, userId: string, cronExpression: string, timezone: string): Promise { - try { - const runId = require('uuidv4').uuid(); - - const queueName = `scheduled-workflow-${id}`; - - logger.log('info', `Scheduling workflow ${id} with cron expression ${cronExpression} in timezone ${timezone}`); - - await pgBoss.createQueue(queueName); - - await pgBoss.schedule(queueName, cronExpression, - { id, runId, userId }, - { tz: timezone } - ); - - await registerWorkerForQueue(queueName); - - logger.log('info', `Scheduled workflow job for robot ${id}`); - } catch (error: unknown) { - const errorMessage = error instanceof Error ? error.message : String(error); - logger.log('error', `Failed to schedule workflow: ${errorMessage}`); - throw error; - } -} - -/** - * Utility function to cancel a scheduled job - * @param robotId The robot ID - * @returns true if successful - */ -export async function cancelScheduledWorkflow(robotId: string) { - try { - const jobs = await pgBoss.getSchedules(); - - const matchingJobs = jobs.filter((job: any) => { - try { - const data = job.data; - return data && data.id === robotId; - } catch { - return false; - } - }); - - for (const job of matchingJobs) { - logger.log('info', `Cancelling scheduled job ${job.name} for robot ${robotId}`); - await pgBoss.unschedule(job.name); - } - - return true; - } catch (error: unknown) { - const errorMessage = error instanceof Error ? error.message : String(error); - logger.log('error', `Failed to cancel scheduled workflow: ${errorMessage}`); - throw error; - } -} - /** * Modified processRunExecution function - only add browser reset */ @@ -432,90 +359,6 @@ async function processRunExecution(job: Job) { } } -/** - * Process a scheduled workflow job - */ -async function processScheduledWorkflow(job: Job) { - const { id, runId, userId } = job.data; - logger.log('info', `Processing scheduled workflow job for robotId: ${id}, runId: ${runId}, userId: ${userId}`); - - try { - // Execute the workflow using the existing handleRunRecording function - const result = await handleRunRecording(id, userId); - - // Update the robot's schedule with last run and next run times - const robot = await Robot.findOne({ where: { 'recording_meta.id': id } }); - if (robot && robot.schedule && robot.schedule.cronExpression && robot.schedule.timezone) { - // Update lastRunAt to the current time - const lastRunAt = new Date(); - - // Compute the next run date - const nextRunAt = computeNextRun(robot.schedule.cronExpression, robot.schedule.timezone) || undefined; - - await robot.update({ - schedule: { - ...robot.schedule, - lastRunAt, - nextRunAt, - }, - }); - - logger.log('info', `Updated robot ${id} schedule - next run at: ${nextRunAt}`); - } else { - logger.log('error', `Robot ${id} schedule, cronExpression, or timezone is missing.`); - } - - return { success: true }; - } catch (error: unknown) { - const errorMessage = error instanceof Error ? error.message : String(error); - logger.log('error', `Scheduled workflow job failed: ${errorMessage}`); - return { success: false }; - } -} - -/** - * Register a worker to handle scheduled workflow jobs - */ -async function registerScheduledWorkflowWorker() { - try { - const jobs = await pgBoss.getSchedules(); - for (const job of jobs) { - await pgBoss.createQueue(job.name); - await registerWorkerForQueue(job.name); - } - - logger.log('info', 'Scheduled workflow workers registered successfully'); - } catch (error: unknown) { - const errorMessage = error instanceof Error ? error.message : String(error); - logger.log('error', `Failed to register scheduled workflow workers: ${errorMessage}`); - } -} - -async function registerWorkerForQueue(queueName: string) { - try { - if (registeredQueues.has(queueName)) { - return; - } - - await pgBoss.work(queueName, async (job: Job | Job[]) => { - try { - const singleJob = Array.isArray(job) ? job[0] : job; - return await processScheduledWorkflow(singleJob); - } catch (error: unknown) { - const errorMessage = error instanceof Error ? error.message : String(error); - logger.log('error', `Scheduled workflow job failed in queue ${queueName}: ${errorMessage}`); - throw error; - } - }); - - registeredQueues.add(queueName); - logger.log('info', `Registered worker for queue: ${queueName}`); - } catch (error: unknown) { - const errorMessage = error instanceof Error ? error.message : String(error); - logger.log('error', `Failed to register worker for queue ${queueName}: ${errorMessage}`); - } -} - async function registerRunExecutionWorker() { try { const registeredUserQueues = new Map(); @@ -651,9 +494,6 @@ async function startWorkers() { // Register the run execution worker await registerRunExecutionWorker(); - // Register the scheduled workflow worker - await registerScheduledWorkflowWorker(); - logger.log('info', 'All recording workers registered successfully'); } catch (error: unknown) { const errorMessage = error instanceof Error ? error.message : String(error); From 3c380252d9c264e48c041e6a7f10056b2eae93b3 Mon Sep 17 00:00:00 2001 From: Rohit Date: Sun, 30 Mar 2025 03:31:18 +0530 Subject: [PATCH 015/106] feat: add schedule worker --- server/src/schedule-worker.ts | 209 ++++++++++++++++++++++++++++++++++ 1 file changed, 209 insertions(+) create mode 100644 server/src/schedule-worker.ts diff --git a/server/src/schedule-worker.ts b/server/src/schedule-worker.ts new file mode 100644 index 00000000..bb376c18 --- /dev/null +++ b/server/src/schedule-worker.ts @@ -0,0 +1,209 @@ +/** + * Worker process focused solely on scheduling logic + */ +import PgBoss, { Job } from 'pg-boss'; +import logger from './logger'; +import Robot from './models/Robot'; +import { handleRunRecording } from './workflow-management/scheduler'; +import { computeNextRun } from './utils/schedule'; +import { capture } from './utils/analytics'; + +const pgBossConnectionString = `postgres://${process.env.DB_USER}:${process.env.DB_PASSWORD}@${process.env.DB_HOST}:${process.env.DB_PORT}/${process.env.DB_NAME}`; + +const pgBoss = new PgBoss({connectionString: pgBossConnectionString }); + +const registeredQueues = new Set(); + +interface ScheduledWorkflowData { + id: string; + runId: string; + userId: string; +} + +/** + * Utility function to schedule a cron job using PgBoss + * @param id The robot ID + * @param userId The user ID + * @param cronExpression The cron expression for scheduling + * @param timezone The timezone for the cron expression + */ +export async function scheduleWorkflow(id: string, userId: string, cronExpression: string, timezone: string): Promise { + try { + const runId = require('uuidv4').uuid(); + + const queueName = `scheduled-workflow-${id}`; + + logger.log('info', `Scheduling workflow ${id} with cron expression ${cronExpression} in timezone ${timezone}`); + + await pgBoss.createQueue(queueName); + + await pgBoss.schedule(queueName, cronExpression, + { id, runId, userId }, + { tz: timezone } + ); + + await registerWorkerForQueue(queueName); + + logger.log('info', `Scheduled workflow job for robot ${id}`); + } catch (error: unknown) { + const errorMessage = error instanceof Error ? error.message : String(error); + logger.log('error', `Failed to schedule workflow: ${errorMessage}`); + throw error; + } +} + +/** + * Utility function to cancel a scheduled job + * @param robotId The robot ID + * @returns true if successful + */ +export async function cancelScheduledWorkflow(robotId: string) { + try { + const jobs = await pgBoss.getSchedules(); + + const matchingJobs = jobs.filter((job: any) => { + try { + const data = job.data; + return data && data.id === robotId; + } catch { + return false; + } + }); + + for (const job of matchingJobs) { + logger.log('info', `Cancelling scheduled job ${job.name} for robot ${robotId}`); + await pgBoss.unschedule(job.name); + } + + return true; + } catch (error: unknown) { + const errorMessage = error instanceof Error ? error.message : String(error); + logger.log('error', `Failed to cancel scheduled workflow: ${errorMessage}`); + throw error; + } +} + +/** + * Process a scheduled workflow job + */ +async function processScheduledWorkflow(job: Job) { + const { id, runId, userId } = job.data; + logger.log('info', `Processing scheduled workflow job for robotId: ${id}, runId: ${runId}, userId: ${userId}`); + + try { + // Execute the workflow using the existing handleRunRecording function + const result = await handleRunRecording(id, userId); + + // Update the robot's schedule with last run and next run times + const robot = await Robot.findOne({ where: { 'recording_meta.id': id } }); + if (robot && robot.schedule && robot.schedule.cronExpression && robot.schedule.timezone) { + // Update lastRunAt to the current time + const lastRunAt = new Date(); + + // Compute the next run date + const nextRunAt = computeNextRun(robot.schedule.cronExpression, robot.schedule.timezone) || undefined; + + await robot.update({ + schedule: { + ...robot.schedule, + lastRunAt, + nextRunAt, + }, + }); + + logger.log('info', `Updated robot ${id} schedule - next run at: ${nextRunAt}`); + } else { + logger.log('error', `Robot ${id} schedule, cronExpression, or timezone is missing.`); + } + + return { success: true }; + } catch (error: unknown) { + const errorMessage = error instanceof Error ? error.message : String(error); + logger.log('error', `Scheduled workflow job failed: ${errorMessage}`); + return { success: false }; + } +} + +/** + * Register a worker to handle scheduled workflow jobs + */ +async function registerScheduledWorkflowWorker() { + try { + const jobs = await pgBoss.getSchedules(); + for (const job of jobs) { + await pgBoss.createQueue(job.name); + await registerWorkerForQueue(job.name); + } + + logger.log('info', 'Scheduled workflow workers registered successfully'); + } catch (error: unknown) { + const errorMessage = error instanceof Error ? error.message : String(error); + logger.log('error', `Failed to register scheduled workflow workers: ${errorMessage}`); + } +} + +/** + * Register a worker for a specific queue + */ +async function registerWorkerForQueue(queueName: string) { + try { + if (registeredQueues.has(queueName)) { + return; + } + + await pgBoss.work(queueName, async (job: Job | Job[]) => { + try { + const singleJob = Array.isArray(job) ? job[0] : job; + return await processScheduledWorkflow(singleJob); + } catch (error: unknown) { + const errorMessage = error instanceof Error ? error.message : String(error); + logger.log('error', `Scheduled workflow job failed in queue ${queueName}: ${errorMessage}`); + throw error; + } + }); + + registeredQueues.add(queueName); + logger.log('info', `Registered worker for queue: ${queueName}`); + } catch (error: unknown) { + const errorMessage = error instanceof Error ? error.message : String(error); + logger.log('error', `Failed to register worker for queue ${queueName}: ${errorMessage}`); + } +} + +/** + * Initialize PgBoss and register scheduling workers + */ +async function startScheduleWorker() { + try { + logger.log('info', 'Starting PgBoss scheduling worker...'); + await pgBoss.start(); + logger.log('info', 'PgBoss scheduling worker started successfully'); + + // Register the scheduled workflow worker + await registerScheduledWorkflowWorker(); + + logger.log('info', 'Scheduling worker registered successfully'); + } catch (error: unknown) { + const errorMessage = error instanceof Error ? error.message : String(error); + logger.log('error', `Failed to start PgBoss scheduling worker: ${errorMessage}`); + process.exit(1); + } +} + +startScheduleWorker(); + +pgBoss.on('error', (error) => { + logger.log('error', `PgBoss scheduler error: ${error.message}`); +}); + +process.on('SIGTERM', async () => { + logger.log('info', 'SIGTERM received, shutting down PgBoss scheduler...'); + await pgBoss.stop(); + process.exit(0); +}); + +process.on('SIGINT', async () => { + logger.log('info', 'SIGINT received, shutting down PgBoss scheduler...'); + await pgBoss.stop(); + process.exit(0); +}); From b4e5f2c149e5949e2495922e0f549fc131efa7ea Mon Sep 17 00:00:00 2001 From: Rohit Date: Sun, 30 Mar 2025 03:31:49 +0530 Subject: [PATCH 016/106] feat: resolve imports --- server/src/routes/storage.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/server/src/routes/storage.ts b/server/src/routes/storage.ts index e4ed5444..7643a59c 100644 --- a/server/src/routes/storage.ts +++ b/server/src/routes/storage.ts @@ -22,7 +22,8 @@ import { encrypt, decrypt } from '../utils/auth'; import { WorkflowFile } from 'maxun-core'; import { Page } from 'playwright'; import { airtableUpdateTasks, processAirtableUpdates } from '../workflow-management/integrations/airtable'; -import { cancelScheduledWorkflow, pgBoss, scheduleWorkflow } from '../pgboss-worker'; +import { cancelScheduledWorkflow, scheduleWorkflow } from '../schedule-worker'; +import { pgBoss } from '../pgboss-worker'; chromium.use(stealthPlugin()); export const router = Router(); From e6ef51392ad81ba21f3d5a043152a8a2c6f83f0c Mon Sep 17 00:00:00 2001 From: Rohit Date: Sun, 30 Mar 2025 03:32:14 +0530 Subject: [PATCH 017/106] feat: add schedule worker path --- server/src/server.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/server.ts b/server/src/server.ts index 10e33525..cc3dc199 100644 --- a/server/src/server.ts +++ b/server/src/server.ts @@ -97,7 +97,7 @@ readdirSync(path.join(__dirname, 'api')).forEach((r) => { }); const isProduction = process.env.NODE_ENV === 'production'; -const workerPath = path.resolve(__dirname, isProduction ? './worker.js' : './worker.ts'); +const workerPath = path.resolve(__dirname, isProduction ? './schedule-worker.js' : './schedule-worker.ts'); const recordingWorkerPath = path.resolve(__dirname, isProduction ? './pgboss-worker.js' : './pgboss-worker.ts'); let workerProcess: any; From 0dc2c5545839b247c619d835fd61a29519c3602f Mon Sep 17 00:00:00 2001 From: Karishma Date: Sun, 30 Mar 2025 17:58:44 +0530 Subject: [PATCH 018/106] feat: remove REDIS_PASSWORD --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index ac1a09ca..3af11a0f 100644 --- a/README.md +++ b/README.md @@ -106,7 +106,7 @@ You can access the frontend at http://localhost:5173/ and backend at http://loca | `AIRTABLE_REDIRECT_URI` | No | Redirect URI for handling Airtable OAuth responses. | Airtable login will not work. | | `REDIS_HOST` | Yes | Host address of the Redis server, used by BullMQ for scheduling robots. | Redis connection will fail. | | `REDIS_PORT` | Yes | Port number for the Redis server. | Redis connection will fail. | -| `REDIS_PASSWORD` | No | Password for Redis Authentication. Needed to authenticate with a password-protected Redis instance; | Redis will attempt to connect without authentication. | + | `MAXUN_TELEMETRY` | No | Disables telemetry to stop sending anonymous usage data. Keeping it enabled helps us understand how the product is used and assess the impact of any new changes. Please keep it enabled. | Telemetry data will not be collected. | From 5948c57050278e4bd46d332490246a0dfafef2f9 Mon Sep 17 00:00:00 2001 From: Karishma Date: Sun, 30 Mar 2025 17:59:09 +0530 Subject: [PATCH 019/106] feat: remove REDIS_PORT --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index 3af11a0f..fe33c75e 100644 --- a/README.md +++ b/README.md @@ -105,7 +105,6 @@ You can access the frontend at http://localhost:5173/ and backend at http://loca | `AIRTABLE_CLIENT_ID` | No | Client ID for Airtable, used for Airtable integration authentication. | Airtable login will not work. | | `AIRTABLE_REDIRECT_URI` | No | Redirect URI for handling Airtable OAuth responses. | Airtable login will not work. | | `REDIS_HOST` | Yes | Host address of the Redis server, used by BullMQ for scheduling robots. | Redis connection will fail. | -| `REDIS_PORT` | Yes | Port number for the Redis server. | Redis connection will fail. | | `MAXUN_TELEMETRY` | No | Disables telemetry to stop sending anonymous usage data. Keeping it enabled helps us understand how the product is used and assess the impact of any new changes. Please keep it enabled. | Telemetry data will not be collected. | From f733f7331dc030035cd56e5e3b85626980342793 Mon Sep 17 00:00:00 2001 From: Karishma Date: Sun, 30 Mar 2025 17:59:40 +0530 Subject: [PATCH 020/106] feat: remove REDIS_HOST --- README.md | 1 - 1 file changed, 1 deletion(-) diff --git a/README.md b/README.md index fe33c75e..756d400d 100644 --- a/README.md +++ b/README.md @@ -104,7 +104,6 @@ You can access the frontend at http://localhost:5173/ and backend at http://loca | `GOOGLE_REDIRECT_URI` | No | Redirect URI for handling Google OAuth responses. | Google login will not work. | | `AIRTABLE_CLIENT_ID` | No | Client ID for Airtable, used for Airtable integration authentication. | Airtable login will not work. | | `AIRTABLE_REDIRECT_URI` | No | Redirect URI for handling Airtable OAuth responses. | Airtable login will not work. | -| `REDIS_HOST` | Yes | Host address of the Redis server, used by BullMQ for scheduling robots. | Redis connection will fail. | | `MAXUN_TELEMETRY` | No | Disables telemetry to stop sending anonymous usage data. Keeping it enabled helps us understand how the product is used and assess the impact of any new changes. Please keep it enabled. | Telemetry data will not be collected. | From 144fc1ac410e98aa5ef6ffb77aebf2032c7ecd78 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Tue, 1 Apr 2025 23:23:52 +0530 Subject: [PATCH 021/106] chore: remove lodash import --- server/src/browser-management/classes/RemoteBrowser.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/server/src/browser-management/classes/RemoteBrowser.ts b/server/src/browser-management/classes/RemoteBrowser.ts index 72098a1f..89ea00e2 100644 --- a/server/src/browser-management/classes/RemoteBrowser.ts +++ b/server/src/browser-management/classes/RemoteBrowser.ts @@ -9,7 +9,6 @@ import { chromium } from 'playwright-extra'; import stealthPlugin from 'puppeteer-extra-plugin-stealth'; import { PlaywrightBlocker } from '@cliqz/adblocker-playwright'; import fetch from 'cross-fetch'; -import { throttle } from 'lodash'; import sharp from 'sharp'; import logger from '../../logger'; From 51c9df5490341739907356d12354eba8c7d51cdc Mon Sep 17 00:00:00 2001 From: amhsirak Date: Tue, 1 Apr 2025 23:24:23 +0530 Subject: [PATCH 022/106] chore: remove whitespace --- server/src/browser-management/classes/RemoteBrowser.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/server/src/browser-management/classes/RemoteBrowser.ts b/server/src/browser-management/classes/RemoteBrowser.ts index 89ea00e2..a4f4d876 100644 --- a/server/src/browser-management/classes/RemoteBrowser.ts +++ b/server/src/browser-management/classes/RemoteBrowser.ts @@ -10,7 +10,6 @@ import stealthPlugin from 'puppeteer-extra-plugin-stealth'; import { PlaywrightBlocker } from '@cliqz/adblocker-playwright'; import fetch from 'cross-fetch'; import sharp from 'sharp'; - import logger from '../../logger'; import { InterpreterSettings, RemoteBrowserOptions } from "../../types"; import { WorkflowGenerator } from "../../workflow-management/classes/Generator"; From 0c81c166958825295d71b2f31fa104318e59e869 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Tue, 1 Apr 2025 23:25:47 +0530 Subject: [PATCH 023/106] chore: remove unused import --- server/src/browser-management/classes/RemoteBrowser.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/browser-management/classes/RemoteBrowser.ts b/server/src/browser-management/classes/RemoteBrowser.ts index a4f4d876..483abb8c 100644 --- a/server/src/browser-management/classes/RemoteBrowser.ts +++ b/server/src/browser-management/classes/RemoteBrowser.ts @@ -11,7 +11,7 @@ import { PlaywrightBlocker } from '@cliqz/adblocker-playwright'; import fetch from 'cross-fetch'; import sharp from 'sharp'; import logger from '../../logger'; -import { InterpreterSettings, RemoteBrowserOptions } from "../../types"; +import { InterpreterSettings } from "../../types"; import { WorkflowGenerator } from "../../workflow-management/classes/Generator"; import { WorkflowInterpreter } from "../../workflow-management/classes/Interpreter"; import { getDecryptedProxyConfig } from '../../routes/proxy'; From 626f7ee0bf3d027a5cec7c801173c99932b0373b Mon Sep 17 00:00:00 2001 From: amhsirak Date: Tue, 1 Apr 2025 23:28:06 +0530 Subject: [PATCH 024/106] chore: remove unused import --- server/src/middlewares/api.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/middlewares/api.ts b/server/src/middlewares/api.ts index 81ae028e..5374af9b 100644 --- a/server/src/middlewares/api.ts +++ b/server/src/middlewares/api.ts @@ -1,4 +1,4 @@ -import { Request, Response } from "express"; +import { Response } from "express"; import User from "../models/User"; import { AuthenticatedRequest } from "../routes/record" From ee2443bd9baef4d34f949a43ad251ccbd24e8e34 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Tue, 1 Apr 2025 23:28:27 +0530 Subject: [PATCH 025/106] chore: remove unused import --- server/src/models/Robot.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/models/Robot.ts b/server/src/models/Robot.ts index 1d03999e..8bc50a76 100644 --- a/server/src/models/Robot.ts +++ b/server/src/models/Robot.ts @@ -1,6 +1,6 @@ import { Model, DataTypes, Optional } from 'sequelize'; import sequelize from '../storage/db'; -import { WorkflowFile, Where, What, WhereWhatPair } from 'maxun-core'; +import { WhereWhatPair } from 'maxun-core'; interface RobotMeta { name: string; From ba61637e7bf4ff01c3e6b481ddeab22b44d85c0e Mon Sep 17 00:00:00 2001 From: amhsirak Date: Tue, 1 Apr 2025 23:28:45 +0530 Subject: [PATCH 026/106] chore: remove unused code --- server/src/models/Robot.ts | 5 ----- 1 file changed, 5 deletions(-) diff --git a/server/src/models/Robot.ts b/server/src/models/Robot.ts index 8bc50a76..1681eaac 100644 --- a/server/src/models/Robot.ts +++ b/server/src/models/Robot.ts @@ -143,9 +143,4 @@ Robot.init( } ); -// Robot.hasMany(Run, { -// foreignKey: 'robotId', -// as: 'runs', // Alias for the relation -// }); - export default Robot; \ No newline at end of file From 79e4a6d8bdf3d02effec092348b7e8b4074dcce2 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Tue, 1 Apr 2025 23:29:02 +0530 Subject: [PATCH 027/106] chore: remove unused import --- server/src/models/User.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/server/src/models/User.ts b/server/src/models/User.ts index c8d6884e..defa4eef 100644 --- a/server/src/models/User.ts +++ b/server/src/models/User.ts @@ -1,6 +1,5 @@ import { DataTypes, Model, Optional } from 'sequelize'; import sequelize from '../storage/db'; -import Robot from './Robot'; interface UserAttributes { id: number; From f7c261ecf9702f685759b153c4e6e7873c575502 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Tue, 1 Apr 2025 23:29:26 +0530 Subject: [PATCH 028/106] chore: remove unused code --- server/src/models/User.ts | 5 ----- 1 file changed, 5 deletions(-) diff --git a/server/src/models/User.ts b/server/src/models/User.ts index defa4eef..06de1d26 100644 --- a/server/src/models/User.ts +++ b/server/src/models/User.ts @@ -79,9 +79,4 @@ User.init( } ); -// User.hasMany(Robot, { -// foreignKey: 'userId', -// as: 'robots', // Alias for the relation -// }); - export default User; From 7e0c44865bbf7ef18f1d164f92ca04b27bfb5708 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Tue, 1 Apr 2025 23:29:57 +0530 Subject: [PATCH 029/106] chore: remove unused validation --- server/src/models/User.ts | 7 ------- 1 file changed, 7 deletions(-) diff --git a/server/src/models/User.ts b/server/src/models/User.ts index 06de1d26..6664f381 100644 --- a/server/src/models/User.ts +++ b/server/src/models/User.ts @@ -60,13 +60,6 @@ User.init( proxy_username: { type: DataTypes.STRING, allowNull: true, - // validate: { - // isProxyPasswordRequired(value: string | null) { - // if (value && !this.proxy_password) { - // throw new Error('Proxy password is required when proxy username is provided'); - // } - // }, - // }, }, proxy_password: { type: DataTypes.STRING, From 273fa415031570dafc2c1746f4d764e567160684 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Tue, 1 Apr 2025 23:30:20 +0530 Subject: [PATCH 030/106] chore: remove whitespace --- server/src/routes/auth.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/server/src/routes/auth.ts b/server/src/routes/auth.ts index 4a435cd6..1ce415b1 100644 --- a/server/src/routes/auth.ts +++ b/server/src/routes/auth.ts @@ -1,5 +1,4 @@ import { Router, Request, Response } from "express"; - import User from "../models/User"; import Robot from "../models/Robot"; import jwt from "jsonwebtoken"; @@ -10,7 +9,6 @@ import { google } from "googleapis"; import { capture } from "../utils/analytics"; import crypto from 'crypto'; - declare module "express-session" { interface SessionData { code_verifier: string; From 54f67c6f513224db1a73d111e12c1ec0fc94eb6b Mon Sep 17 00:00:00 2001 From: amhsirak Date: Tue, 1 Apr 2025 23:30:50 +0530 Subject: [PATCH 031/106] chore: remove whitespace --- server/src/routes/record.ts | 3 --- 1 file changed, 3 deletions(-) diff --git a/server/src/routes/record.ts b/server/src/routes/record.ts index fee5039a..7bd24708 100644 --- a/server/src/routes/record.ts +++ b/server/src/routes/record.ts @@ -5,8 +5,6 @@ import { Router, Request, Response } from 'express'; import { initializeRemoteBrowserForRecording, - destroyRemoteBrowser, - getActiveBrowserId, interpretWholeWorkflow, stopRunningInterpretation, getRemoteBrowserCurrentUrl, @@ -16,7 +14,6 @@ import { import { chromium } from 'playwright-extra'; import stealthPlugin from 'puppeteer-extra-plugin-stealth'; import logger from "../logger"; -import { getDecryptedProxyConfig } from './proxy'; import { requireSignIn } from '../middlewares/auth'; import { pgBoss } from '../pgboss-worker'; From bb9ca0721fe93990565c6f858ccca51994ebd6bb Mon Sep 17 00:00:00 2001 From: amhsirak Date: Tue, 1 Apr 2025 23:32:05 +0530 Subject: [PATCH 032/106] chore: remove unused import --- server/src/routes/storage.ts | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/server/src/routes/storage.ts b/server/src/routes/storage.ts index 7643a59c..b8c9a1ab 100644 --- a/server/src/routes/storage.ts +++ b/server/src/routes/storage.ts @@ -1,27 +1,21 @@ import { Router } from 'express'; import logger from "../logger"; -import { createRemoteBrowserForRun, destroyRemoteBrowser, getActiveBrowserIdByState } from "../browser-management/controller"; +import { createRemoteBrowserForRun, getActiveBrowserIdByState } from "../browser-management/controller"; import { chromium } from 'playwright-extra'; import stealthPlugin from 'puppeteer-extra-plugin-stealth'; import { browserPool } from "../server"; import { uuid } from "uuidv4"; import moment from 'moment-timezone'; import cron from 'node-cron'; -import { googleSheetUpdateTasks, processGoogleSheetUpdates } from '../workflow-management/integrations/gsheet'; import { getDecryptedProxyConfig } from './proxy'; import { requireSignIn } from '../middlewares/auth'; import Robot from '../models/Robot'; import Run from '../models/Run'; -import { BinaryOutputService } from '../storage/mino'; -import { workflowQueue } from '../worker'; import { AuthenticatedRequest } from './record'; import { computeNextRun } from '../utils/schedule'; import { capture } from "../utils/analytics"; -import { tryCatch } from 'bullmq'; import { encrypt, decrypt } from '../utils/auth'; import { WorkflowFile } from 'maxun-core'; -import { Page } from 'playwright'; -import { airtableUpdateTasks, processAirtableUpdates } from '../workflow-management/integrations/airtable'; import { cancelScheduledWorkflow, scheduleWorkflow } from '../schedule-worker'; import { pgBoss } from '../pgboss-worker'; chromium.use(stealthPlugin()); From b7030cd341c8a9b22ba352426e8d5bb493f2d80b Mon Sep 17 00:00:00 2001 From: amhsirak Date: Tue, 1 Apr 2025 23:32:29 +0530 Subject: [PATCH 033/106] chore: remove unused import --- server/src/socket-connection/connection.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/socket-connection/connection.ts b/server/src/socket-connection/connection.ts index 66294ecf..a7f7565d 100644 --- a/server/src/socket-connection/connection.ts +++ b/server/src/socket-connection/connection.ts @@ -1,6 +1,6 @@ import { Namespace, Socket } from 'socket.io'; import { IncomingMessage } from 'http'; -import { verify, JwtPayload, sign } from 'jsonwebtoken'; +import { verify, JwtPayload } from 'jsonwebtoken'; import logger from "../logger"; import registerInputHandlers from '../browser-management/inputHandlers'; From a505bf43ef3454897d28c23fb668c82e6c179d89 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Tue, 1 Apr 2025 23:32:48 +0530 Subject: [PATCH 034/106] chore: remove unused import --- server/src/storage/db.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/server/src/storage/db.ts b/server/src/storage/db.ts index cdd84655..25c1d104 100644 --- a/server/src/storage/db.ts +++ b/server/src/storage/db.ts @@ -1,6 +1,5 @@ import { Sequelize } from 'sequelize'; import dotenv from 'dotenv'; -import setupAssociations from '../models/associations'; dotenv.config(); From 7f3af7e60c25888a4c23e6dee485ef53d26ba3e6 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Tue, 1 Apr 2025 23:34:29 +0530 Subject: [PATCH 035/106] chore: remove unused import --- server/src/workflow-management/classes/Generator.ts | 4 ---- 1 file changed, 4 deletions(-) diff --git a/server/src/workflow-management/classes/Generator.ts b/server/src/workflow-management/classes/Generator.ts index aea37126..004126bd 100644 --- a/server/src/workflow-management/classes/Generator.ts +++ b/server/src/workflow-management/classes/Generator.ts @@ -13,11 +13,7 @@ import { selectorAlreadyInWorkflow } from "../selector"; import { CustomActions } from "../../../../src/shared/types"; -import { workflow } from "../../routes"; import Robot from "../../models/Robot"; -import Run from "../../models/Run"; -import { saveFile } from "../storage"; -import fs from "fs"; import { getBestSelectorForAction } from "../utils"; import { browserPool } from "../../server"; import { uuid } from "uuidv4"; From 9918b31c3a00de279800ef15fbfbab5096cdf948 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Tue, 1 Apr 2025 23:35:29 +0530 Subject: [PATCH 036/106] chore: remove unused import --- server/src/pgboss-worker.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/server/src/pgboss-worker.ts b/server/src/pgboss-worker.ts index 269f6773..0771cc27 100644 --- a/server/src/pgboss-worker.ts +++ b/server/src/pgboss-worker.ts @@ -8,7 +8,6 @@ import { destroyRemoteBrowser, interpretWholeWorkflow, stopRunningInterpretation, - createRemoteBrowserForRun } from './browser-management/controller'; import { WorkflowFile } from 'maxun-core'; import Run from './models/Run'; From 5ccdfed26e8e8daaa642a582bdf703bd5e2611c6 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Tue, 1 Apr 2025 23:36:11 +0530 Subject: [PATCH 037/106] chore: remove unused import --- server/src/schedule-worker.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/server/src/schedule-worker.ts b/server/src/schedule-worker.ts index bb376c18..73afd872 100644 --- a/server/src/schedule-worker.ts +++ b/server/src/schedule-worker.ts @@ -6,7 +6,6 @@ import logger from './logger'; import Robot from './models/Robot'; import { handleRunRecording } from './workflow-management/scheduler'; import { computeNextRun } from './utils/schedule'; -import { capture } from './utils/analytics'; const pgBossConnectionString = `postgres://${process.env.DB_USER}:${process.env.DB_PASSWORD}@${process.env.DB_HOST}:${process.env.DB_PORT}/${process.env.DB_NAME}`; From 3b53628655590a3e97da11b6f036412d89b07bac Mon Sep 17 00:00:00 2001 From: amhsirak Date: Tue, 1 Apr 2025 23:36:43 +0530 Subject: [PATCH 038/106] chore: remove unused import --- server/src/server.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/server/src/server.ts b/server/src/server.ts index cc3dc199..8bc3a184 100644 --- a/server/src/server.ts +++ b/server/src/server.ts @@ -8,9 +8,7 @@ import { record, workflow, storage, auth, integration, proxy } from './routes'; import { BrowserPool } from "./browser-management/classes/BrowserPool"; import logger from './logger'; import { connectDB, syncDB } from './storage/db' -import bodyParser from 'body-parser'; import cookieParser from 'cookie-parser'; -import csrf from 'csurf'; import { SERVER_PORT } from "./constants/config"; import { Server } from "socket.io"; import { readdirSync } from "fs" From 502ba6ab929b9a52261c4fe16eb8c37037ceac84 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Tue, 1 Apr 2025 23:37:00 +0530 Subject: [PATCH 039/106] chore: remove unused import --- server/src/server.ts | 2 -- 1 file changed, 2 deletions(-) diff --git a/server/src/server.ts b/server/src/server.ts index 8bc3a184..91174cca 100644 --- a/server/src/server.ts +++ b/server/src/server.ts @@ -18,9 +18,7 @@ import swaggerUi from 'swagger-ui-express'; import swaggerSpec from './swagger/config'; import connectPgSimple from 'connect-pg-simple'; import pg from 'pg'; - import session from 'express-session'; - import Run from './models/Run'; const app = express(); From 0fbd8c189a83b25ea6def3467e147a5a611b48cf Mon Sep 17 00:00:00 2001 From: amhsirak Date: Tue, 1 Apr 2025 23:39:11 +0530 Subject: [PATCH 040/106] chore: remove unused import --- src/components/browser/BrowserNavBar.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/browser/BrowserNavBar.tsx b/src/components/browser/BrowserNavBar.tsx index 64759d62..a9a1a8d0 100644 --- a/src/components/browser/BrowserNavBar.tsx +++ b/src/components/browser/BrowserNavBar.tsx @@ -5,7 +5,7 @@ import ArrowBackIcon from '@mui/icons-material/ArrowBack'; import ArrowForwardIcon from '@mui/icons-material/ArrowForward'; import { NavBarButton } from '../ui/buttons/buttons'; import { UrlForm } from './UrlForm'; -import { useCallback, useEffect, useState } from "react"; +import { useCallback, useEffect } from "react"; import { useSocketStore } from "../../context/socket"; import { getCurrentUrl } from "../../api/recording"; import { useGlobalInfoStore } from '../../context/globalInfo'; From 6988e6aee27c238cf599b83e4b6fb8fcfb42eb10 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Tue, 1 Apr 2025 23:39:44 +0530 Subject: [PATCH 041/106] chore: remove unused import --- src/components/dashboard/MainMenu.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/dashboard/MainMenu.tsx b/src/components/dashboard/MainMenu.tsx index df7b41ae..8fcdaedf 100644 --- a/src/components/dashboard/MainMenu.tsx +++ b/src/components/dashboard/MainMenu.tsx @@ -4,7 +4,7 @@ import Tab from '@mui/material/Tab'; import Box from '@mui/material/Box'; import { useNavigate } from 'react-router-dom'; import { Paper, Button, useTheme } from "@mui/material"; -import { AutoAwesome, FormatListBulleted, VpnKey, Usb, Article, CloudQueue, Code, } from "@mui/icons-material"; +import { AutoAwesome, FormatListBulleted, VpnKey, Usb, CloudQueue, Code, } from "@mui/icons-material"; import { apiUrl } from "../../apiConfig"; import { useTranslation } from 'react-i18next'; import i18n from '../../i18n'; From 33a1cd6c39b7c64269644ae3787772dc308b90c6 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Tue, 1 Apr 2025 23:41:47 +0530 Subject: [PATCH 042/106] chore: remove unused import --- src/components/run/InterpretationLog.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/run/InterpretationLog.tsx b/src/components/run/InterpretationLog.tsx index 09025c99..fa749efa 100644 --- a/src/components/run/InterpretationLog.tsx +++ b/src/components/run/InterpretationLog.tsx @@ -1,7 +1,7 @@ import * as React from 'react'; import SwipeableDrawer from '@mui/material/SwipeableDrawer'; import Typography from '@mui/material/Typography'; -import { Button, TextField, Grid } from '@mui/material'; +import { Button, Grid } from '@mui/material'; import { useCallback, useEffect, useRef, useState } from "react"; import { useSocketStore } from "../../context/socket"; import { Buffer } from 'buffer'; From 1af0109d97f9c976840b156182646717dfff45a8 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Tue, 1 Apr 2025 23:42:04 +0530 Subject: [PATCH 043/106] chore: remove unused import --- src/components/run/Runs.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/run/Runs.tsx b/src/components/run/Runs.tsx index cedbf348..736378f6 100644 --- a/src/components/run/Runs.tsx +++ b/src/components/run/Runs.tsx @@ -1,4 +1,4 @@ -import React, { useEffect } from 'react'; +import React from 'react'; import { Grid } from "@mui/material"; import { RunsTable } from "./RunsTable"; From e88e2445919d91d59d1098b4b89d36af47393367 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Tue, 1 Apr 2025 23:42:20 +0530 Subject: [PATCH 044/106] chore: remove unused import --- src/components/run/RunsTable.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/run/RunsTable.tsx b/src/components/run/RunsTable.tsx index 53d29433..1e52f1c7 100644 --- a/src/components/run/RunsTable.tsx +++ b/src/components/run/RunsTable.tsx @@ -9,7 +9,7 @@ import TableContainer from '@mui/material/TableContainer'; import TableHead from '@mui/material/TableHead'; import TablePagination from '@mui/material/TablePagination'; import TableRow from '@mui/material/TableRow'; -import { Accordion, AccordionSummary, AccordionDetails, Typography, Box, TextField, CircularProgress, Tooltip } from '@mui/material'; +import { Accordion, AccordionSummary, AccordionDetails, Typography, Box, TextField, Tooltip } from '@mui/material'; import ExpandMoreIcon from '@mui/icons-material/ExpandMore'; import SearchIcon from '@mui/icons-material/Search'; import { useLocation, useNavigate } from 'react-router-dom'; From 973d01755fe0758fd884a07e69fd590095e3c79a Mon Sep 17 00:00:00 2001 From: amhsirak Date: Tue, 1 Apr 2025 23:42:42 +0530 Subject: [PATCH 045/106] chore: remove unused import --- src/components/ui/ConfirmationBox.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/ui/ConfirmationBox.tsx b/src/components/ui/ConfirmationBox.tsx index b3eb10c2..1e667b03 100644 --- a/src/components/ui/ConfirmationBox.tsx +++ b/src/components/ui/ConfirmationBox.tsx @@ -1,5 +1,5 @@ import React from 'react'; -import { Box, Button, IconButton, Stack, Typography } from "@mui/material"; +import { Box, Button, Typography } from "@mui/material"; interface ConfirmationBoxProps { selector: string; From d6be3f42c49d35ce7d6e168be130aa2ed77a8d65 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Tue, 1 Apr 2025 23:43:23 +0530 Subject: [PATCH 046/106] chore: remove unused import --- src/pages/MainPage.tsx | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/pages/MainPage.tsx b/src/pages/MainPage.tsx index 60990291..aa896d50 100644 --- a/src/pages/MainPage.tsx +++ b/src/pages/MainPage.tsx @@ -12,8 +12,6 @@ import { io, Socket } from "socket.io-client"; import { stopRecording } from "../api/recording"; import { RunSettings } from "../components/run/RunSettings"; import { ScheduleSettings } from "../components/robot/ScheduleSettings"; -import { IntegrationSettings } from "../components/integration/IntegrationSettings"; -import { RobotSettings } from "../components/robot/RobotSettings"; import { apiUrl } from "../apiConfig"; import { useNavigate } from 'react-router-dom'; From eee385f2c5b75451f8ea4ad91df70ea707d5c5eb Mon Sep 17 00:00:00 2001 From: amhsirak Date: Tue, 1 Apr 2025 23:43:35 +0530 Subject: [PATCH 047/106] chore: remove unused import --- src/pages/PageWrapper.tsx | 1 - 1 file changed, 1 deletion(-) diff --git a/src/pages/PageWrapper.tsx b/src/pages/PageWrapper.tsx index 16f2e50d..e7361b2f 100644 --- a/src/pages/PageWrapper.tsx +++ b/src/pages/PageWrapper.tsx @@ -6,7 +6,6 @@ import { AuthProvider } from '../context/auth'; import { RecordingPage } from "./RecordingPage"; import { MainPage } from "./MainPage"; import { useGlobalInfoStore } from "../context/globalInfo"; -import { getActiveBrowserId } from "../api/recording"; import { AlertSnackbar } from "../components/ui/AlertSnackbar"; import Login from './Login'; import Register from './Register'; From e0af2a6d54439485185a60bdc6cbf9f5b3eb2c3e Mon Sep 17 00:00:00 2001 From: amhsirak Date: Tue, 1 Apr 2025 23:43:47 +0530 Subject: [PATCH 048/106] chore: remove unused import --- src/pages/RecordingPage.tsx | 1 - 1 file changed, 1 deletion(-) diff --git a/src/pages/RecordingPage.tsx b/src/pages/RecordingPage.tsx index 034b2ea8..34c2f90d 100644 --- a/src/pages/RecordingPage.tsx +++ b/src/pages/RecordingPage.tsx @@ -3,7 +3,6 @@ import { Grid } from '@mui/material'; import { BrowserContent } from "../components/browser/BrowserContent"; import { InterpretationLog } from "../components/run/InterpretationLog"; import { startRecording, getActiveBrowserId } from "../api/recording"; -import { LeftSidePanel } from "../components/recorder/LeftSidePanel"; import { RightSidePanel } from "../components/recorder/RightSidePanel"; import { Loader } from "../components/ui/Loader"; import { useSocketStore } from "../context/socket"; From 2d0e945abe71291d446e64753ae4e8dde156460b Mon Sep 17 00:00:00 2001 From: Rohit Date: Thu, 3 Apr 2025 20:08:27 +0530 Subject: [PATCH 049/106] feat: rm page timeout for pagination --- maxun-core/src/interpret.ts | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 94ec4f1d..53b999fa 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -663,10 +663,7 @@ export default class Interpreter extends EventEmitter { let availableSelectors = config.pagination.selector.split(','); try { - while (true) { - // Reduced timeout for faster performance - await page.waitForLoadState('networkidle', { timeout: 10000 }).catch(() => {}); - + while (true) { switch (config.pagination.type) { case 'scrollDown': { await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight)); From b859ee6d9b6ba8a8bf1e7ab44266793999c6e1f8 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Sat, 5 Apr 2025 18:18:18 +0530 Subject: [PATCH 050/106] feat: remove redis --- docker-compose.yml | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index b571cc6f..92ba55a5 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -17,16 +17,6 @@ services: timeout: 5s retries: 5 - redis: - image: redis:6 - environment: - REDIS_HOST: ${REDIS_HOST} - REDIS_PORT: ${REDIS_PORT} - ports: - - "${REDIS_PORT:-6379}:${REDIS_PORT:-6379}" - volumes: - - redis_data:/data - minio: image: minio/minio environment: From 5d2b26d9c0a8c41d7741fa08c39ee1e160876829 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Sat, 5 Apr 2025 18:18:52 +0530 Subject: [PATCH 051/106] feat: remove redis from depends_on --- docker-compose.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/docker-compose.yml b/docker-compose.yml index 92ba55a5..05cba63a 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -51,7 +51,6 @@ services: mem_limit: 2g # Set a 2GB memory limit depends_on: - postgres - - redis - minio volumes: - /var/run/dbus:/var/run/dbus From be97682d62733894e7f97939315ce873f8ee5681 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Sat, 5 Apr 2025 18:19:16 +0530 Subject: [PATCH 052/106] feat: remove redis_data from volumes --- docker-compose.yml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 05cba63a..e57792ae 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -71,5 +71,4 @@ services: volumes: postgres_data: - minio_data: - redis_data: \ No newline at end of file + minio_data: \ No newline at end of file From 61242928990c3339fc4e9355f9d269236eddec8e Mon Sep 17 00:00:00 2001 From: amhsirak Date: Sat, 5 Apr 2025 18:32:12 +0530 Subject: [PATCH 053/106] chore: uninstall bullmq --- package.json | 1 - 1 file changed, 1 deletion(-) diff --git a/package.json b/package.json index efa61130..2279c772 100644 --- a/package.json +++ b/package.json @@ -27,7 +27,6 @@ "bcrypt": "^5.1.1", "body-parser": "^1.20.3", "buffer": "^6.0.3", - "bullmq": "^5.12.15", "connect-pg-simple": "^10.0.0", "connect-redis": "^8.0.1", "cookie-parser": "^1.4.6", From e18626da293f61312618548fe73ec20699d28180 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Sat, 5 Apr 2025 18:32:35 +0530 Subject: [PATCH 054/106] chore: uninstall redis --- package.json | 1 - 1 file changed, 1 deletion(-) diff --git a/package.json b/package.json index 2279c772..fd4ce29a 100644 --- a/package.json +++ b/package.json @@ -71,7 +71,6 @@ "react-router-dom": "^6.26.1", "react-simple-code-editor": "^0.11.2", "react-transition-group": "^4.4.2", - "redis": "^4.7.0", "sequelize": "^6.37.3", "sequelize-typescript": "^2.1.6", "sharp": "^0.33.5", From 48ff8c83ed0c4f16c30e5f95307e25ba6670e15a Mon Sep 17 00:00:00 2001 From: amhsirak Date: Sat, 5 Apr 2025 18:33:00 +0530 Subject: [PATCH 055/106] chore: uninstall connect-redis --- package.json | 1 - 1 file changed, 1 deletion(-) diff --git a/package.json b/package.json index fd4ce29a..a4d11f7f 100644 --- a/package.json +++ b/package.json @@ -28,7 +28,6 @@ "body-parser": "^1.20.3", "buffer": "^6.0.3", "connect-pg-simple": "^10.0.0", - "connect-redis": "^8.0.1", "cookie-parser": "^1.4.6", "cors": "^2.8.5", "cron-parser": "^4.9.0", From 0aac7f1b2d8a3e5aec6dc01f39a5b061dfe32299 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Sat, 5 Apr 2025 18:33:38 +0530 Subject: [PATCH 056/106] chore: uninstall @types/redis --- package.json | 1 - 1 file changed, 1 deletion(-) diff --git a/package.json b/package.json index a4d11f7f..876fa45b 100644 --- a/package.json +++ b/package.json @@ -118,7 +118,6 @@ "@types/prismjs": "^1.26.0", "@types/react-highlight": "^0.12.5", "@types/react-transition-group": "^4.4.4", - "@types/redis": "^4.0.11", "@types/styled-components": "^5.1.23", "@types/swagger-jsdoc": "^6.0.4", "@types/swagger-ui-express": "^4.1.6", From 6aa6d5cadb5b70ae705a9f72472f527052ba0094 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Sat, 5 Apr 2025 18:43:02 +0530 Subject: [PATCH 057/106] chore: !copy maxun-core --- Dockerfile | 1 - 1 file changed, 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 9a165b33..9cb25d6f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,7 +4,6 @@ WORKDIR /app # Copy package files COPY package*.json ./ -COPY maxun-core ./maxun-core # Install dependencies RUN npm install --legacy-peer-deps From 9247e9664c090d686f092b0d610bc8ffea63769c Mon Sep 17 00:00:00 2001 From: amhsirak Date: Sat, 5 Apr 2025 18:43:31 +0530 Subject: [PATCH 058/106] chore: !copy maxun-core --- server/Dockerfile | 1 - 1 file changed, 1 deletion(-) diff --git a/server/Dockerfile b/server/Dockerfile index 65e472eb..af0860fc 100644 --- a/server/Dockerfile +++ b/server/Dockerfile @@ -5,7 +5,6 @@ WORKDIR /app # Install node dependencies COPY package*.json ./ -COPY maxun-core ./maxun-core COPY src ./src COPY public ./public COPY server ./server From 58548ec2f8db6ea928809b6a4dbf6fb704436663 Mon Sep 17 00:00:00 2001 From: Rohit Date: Mon, 7 Apr 2025 22:15:45 +0530 Subject: [PATCH 059/106] feat: change checkbox ui --- src/components/action/ActionDescriptionBox.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/action/ActionDescriptionBox.tsx b/src/components/action/ActionDescriptionBox.tsx index 97c979ec..d36db407 100644 --- a/src/components/action/ActionDescriptionBox.tsx +++ b/src/components/action/ActionDescriptionBox.tsx @@ -102,7 +102,7 @@ const ActionDescriptionBox = ({ isDarkMode }: { isDarkMode: boolean }) => { sx={{ color: isDarkMode ? 'white' : 'default', '&.Mui-checked': { - color: isDarkMode ? '#90caf9' : '#1976d2', + color: '#ff33cc', }, }} /> From 7761b2af35761c7f00c7c622e1b09bd5d911e5f3 Mon Sep 17 00:00:00 2001 From: Rohit Date: Tue, 8 Apr 2025 15:09:38 +0530 Subject: [PATCH 060/106] feat: track results and return scroll pagination --- maxun-core/src/interpret.ts | 44 +++++++++++++++++++++++++++++++++---- 1 file changed, 40 insertions(+), 4 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 53b999fa..e662683c 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -572,6 +572,7 @@ export default class Interpreter extends EventEmitter { let visitedUrls: Set = new Set(); const MAX_RETRIES = 3; const RETRY_DELAY = 1000; // 1 second delay between retries + const MAX_UNCHANGED_RESULTS = 5; const debugLog = (message: string, ...args: any[]) => { console.log(`[Page ${visitedUrls.size}] [URL: ${page.url()}] ${message}`, ...args); @@ -661,18 +662,36 @@ export default class Interpreter extends EventEmitter { }; let availableSelectors = config.pagination.selector.split(','); + let unchangedResultCounter = 0; try { while (true) { switch (config.pagination.type) { case 'scrollDown': { + let previousResultCount = allResults.length; + + await scrapeCurrentPage(); + + if (checkLimit()) { + return allResults; + } + await page.evaluate(() => window.scrollTo(0, document.body.scrollHeight)); await page.waitForTimeout(2000); const currentHeight = await page.evaluate(() => document.body.scrollHeight); + const currentResultCount = allResults.length; + + if (currentResultCount === previousResultCount) { + unchangedResultCounter++; + if (unchangedResultCounter >= MAX_UNCHANGED_RESULTS) { + return allResults; + } + } else { + unchangedResultCounter = 0; + } + if (currentHeight === previousHeight) { - const finalResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); - allResults = allResults.concat(finalResults); return allResults; } @@ -681,13 +700,30 @@ export default class Interpreter extends EventEmitter { } case 'scrollUp': { + let previousResultCount = allResults.length; + + await scrapeCurrentPage(); + + if (checkLimit()) { + return allResults; + } + await page.evaluate(() => window.scrollTo(0, 0)); await page.waitForTimeout(2000); const currentTopHeight = await page.evaluate(() => document.documentElement.scrollTop); + const currentResultCount = allResults.length; + + if (currentResultCount === previousResultCount) { + unchangedResultCounter++; + if (unchangedResultCounter >= MAX_UNCHANGED_RESULTS) { + return allResults; + } + } else { + unchangedResultCounter = 0; + } + if (currentTopHeight === 0) { - const finalResults = await page.evaluate((cfg) => window.scrapeList(cfg), config); - allResults = allResults.concat(finalResults); return allResults; } From 4d2c294ec37fb14e286962f3106903ec0b06615b Mon Sep 17 00:00:00 2001 From: Rohit Date: Tue, 8 Apr 2025 17:35:12 +0530 Subject: [PATCH 061/106] feat: add error handling to wheel input event --- server/src/browser-management/inputHandlers.ts | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/server/src/browser-management/inputHandlers.ts b/server/src/browser-management/inputHandlers.ts index 8f4161b5..598c528e 100644 --- a/server/src/browser-management/inputHandlers.ts +++ b/server/src/browser-management/inputHandlers.ts @@ -185,8 +185,19 @@ const onWheel = async (socket: AuthenticatedSocket, scrollDeltas: ScrollDeltas) * @category BrowserManagement */ const handleWheel = async (generator: WorkflowGenerator, page: Page, { deltaX, deltaY }: ScrollDeltas) => { - await page.mouse.wheel(deltaX, deltaY); - logger.log('debug', `Scrolled horizontally ${deltaX} pixels and vertically ${deltaY} pixels`); + try { + if (page.isClosed()) { + return; + } + + await page.mouse.wheel(deltaX, deltaY).catch(error => { + logger.log('warn', `Wheel event failed: ${error.message}`); + }); + logger.log('debug', `Scrolled horizontally ${deltaX} pixels and vertically ${deltaY} pixels`); + } catch (e) { + const { message } = e as Error; + logger.log('warn', `Error handling wheel event: ${message}`); + } }; /** From 3fd5526685aa2296d2bf96675c3b31233781661e Mon Sep 17 00:00:00 2001 From: Rohit Date: Tue, 8 Apr 2025 18:27:27 +0530 Subject: [PATCH 062/106] feat: add abort run job to pgboss queue --- server/src/routes/storage.ts | 42 ++++++++++++++---------------------- 1 file changed, 16 insertions(+), 26 deletions(-) diff --git a/server/src/routes/storage.ts b/server/src/routes/storage.ts index b8c9a1ab..e7e3939c 100644 --- a/server/src/routes/storage.ts +++ b/server/src/routes/storage.ts @@ -904,42 +904,32 @@ router.delete('/schedule/:id', requireSignIn, async (req: AuthenticatedRequest, router.post('/runs/abort/:id', requireSignIn, async (req: AuthenticatedRequest, res) => { try { if (!req.user) { return res.status(401).send({ error: 'Unauthorized' }); } - const run = await Run.findOne({ where: { + + const run = await Run.findOne({ where: { runId: req.params.id, runByUserId: req.user.id, } }); + if (!run) { return res.status(404).send(false); } - const plainRun = run.toJSON(); - - const browser = browserPool.getRemoteBrowser(plainRun.browserId); - const currentLog = browser?.interpreter.debugMessages.join('/n'); - const serializableOutput = browser?.interpreter.serializableData.reduce((reducedObject, item, index) => { - return { - [`item-${index}`]: item, - ...reducedObject, - } - }, {}); - const binaryOutput = browser?.interpreter.binaryData.reduce((reducedObject, item, index) => { - return { - [`item-${index}`]: item, - ...reducedObject, - } - }, {}); - await run.update({ - ...run, - status: 'aborted', - finishedAt: new Date().toLocaleString(), - browserId: plainRun.browserId, - log: currentLog, - serializableOutput, - binaryOutput, + + const userQueueName = `abort-run-user-${req.user.id}`; + await pgBoss.createQueue(userQueueName); + + await pgBoss.send(userQueueName, { + userId: req.user.id, + runId: req.params.id }); + + await run.update({ + status: 'aborting' + }); + return res.send(true); } catch (e) { const { message } = e as Error; - logger.log('info', `Error while running a robot with name: ${req.params.fileName}_${req.params.runId}.json`); + logger.log('info', `Error while aborting run with id: ${req.params.id} - ${message}`); return res.send(false); } }); From d1c7b5065e9afa5417a4b9cd1f0f125ff0e3228c Mon Sep 17 00:00:00 2001 From: Rohit Date: Tue, 8 Apr 2025 18:28:12 +0530 Subject: [PATCH 063/106] feat: register abort run queue worker --- server/src/pgboss-worker.ts | 126 ++++++++++++++++++++++++++++++++++++ 1 file changed, 126 insertions(+) diff --git a/server/src/pgboss-worker.ts b/server/src/pgboss-worker.ts index 0771cc27..f197489d 100644 --- a/server/src/pgboss-worker.ts +++ b/server/src/pgboss-worker.ts @@ -46,6 +46,11 @@ interface ExecuteRunData { browserId: string; } +interface AbortRunData { + userId: string; + runId: string; +} + const pgBoss = new PgBoss({connectionString: pgBossConnectionString }); /** @@ -358,6 +363,78 @@ async function processRunExecution(job: Job) { } } +async function abortRun(runId: string, userId: string): Promise { + try { + const run = await Run.findOne({ + where: { + runId: runId, + runByUserId: userId + } + }); + + if (!run) { + logger.log('warn', `Run ${runId} not found or does not belong to user ${userId}`); + return false; + } + + const plainRun = run.toJSON(); + + const browser = browserPool.getRemoteBrowser(plainRun.browserId); + + if (!browser) { + await run.update({ + status: 'aborted', + finishedAt: new Date().toLocaleString(), + log: 'Aborted: Browser not found or already closed' + }); + + logger.log('warn', `Browser not found for run ${runId}`); + return true; + } + + const currentLog = browser.interpreter.debugMessages.join('\n'); + const serializableOutput = browser.interpreter.serializableData.reduce((reducedObject, item, index) => { + return { + [`item-${index}`]: item, + ...reducedObject, + } + }, {}); + + const binaryOutput = browser.interpreter.binaryData.reduce((reducedObject, item, index) => { + return { + [`item-${index}`]: item, + ...reducedObject, + } + }, {}); + + await run.update({ + status: 'aborted', + finishedAt: new Date().toLocaleString(), + browserId: plainRun.browserId, + log: currentLog || 'Run aborted by user', + serializableOutput, + binaryOutput, + }); + + const queuedRunProcessed = await checkAndProcessQueuedRun(userId, plainRun.browserId); + + if (!queuedRunProcessed) { + try { + await destroyRemoteBrowser(plainRun.browserId, userId); + logger.log('info', `No queued runs found for browser ${plainRun.browserId}, browser destroyed`); + } catch (cleanupError) { + logger.log('warn', `Failed to clean up browser for aborted run ${runId}: ${cleanupError}`); + } + } + + return true; + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error); + logger.log('error', `Failed to abort run ${runId}: ${errorMessage}`); + return false; + } +} + async function registerRunExecutionWorker() { try { const registeredUserQueues = new Map(); @@ -412,6 +489,52 @@ async function registerRunExecutionWorker() { } } +async function registerAbortRunWorker() { + try { + const registeredAbortQueues = new Map(); + + const checkForNewAbortQueues = async () => { + try { + const activeQueues = await pgBoss.getQueues(); + + const abortQueues = activeQueues.filter(q => q.name.startsWith('abort-run-user-')); + + for (const queue of abortQueues) { + if (!registeredAbortQueues.has(queue.name)) { + await pgBoss.work(queue.name, async (job: Job | Job[]) => { + try { + const data = extractJobData(job); + const { userId, runId } = data; + + logger.log('info', `Processing abort request for run ${runId} by user ${userId}`); + const success = await abortRun(runId, userId); + return { success }; + } catch (error: unknown) { + const errorMessage = error instanceof Error ? error.message : String(error); + logger.log('error', `Abort run job failed in ${queue.name}: ${errorMessage}`); + throw error; + } + }); + + registeredAbortQueues.set(queue.name, true); + logger.log('info', `Registered abort worker for queue: ${queue.name}`); + } + } + } catch (error: unknown) { + const errorMessage = error instanceof Error ? error.message : String(error); + logger.log('error', `Failed to check for new abort queues: ${errorMessage}`); + } + }; + + await checkForNewAbortQueues(); + + logger.log('info', 'Abort run worker registration system initialized'); + } catch (error: unknown) { + const errorMessage = error instanceof Error ? error.message : String(error); + logger.log('error', `Failed to initialize abort run worker system: ${errorMessage}`); + } +} + /** * Initialize PgBoss and register all workers @@ -493,6 +616,9 @@ async function startWorkers() { // Register the run execution worker await registerRunExecutionWorker(); + // Register the abort run worker + await registerAbortRunWorker(); + logger.log('info', 'All recording workers registered successfully'); } catch (error: unknown) { const errorMessage = error instanceof Error ? error.message : String(error); From 2d75662c30fb29bca722c2a4abfc687f1619c520 Mon Sep 17 00:00:00 2001 From: Rohit Date: Tue, 8 Apr 2025 18:38:21 +0530 Subject: [PATCH 064/106] feat: group accordion by updated run name --- src/components/run/RunsTable.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/run/RunsTable.tsx b/src/components/run/RunsTable.tsx index 1e52f1c7..0864c493 100644 --- a/src/components/run/RunsTable.tsx +++ b/src/components/run/RunsTable.tsx @@ -390,7 +390,7 @@ export const RunsTable: React.FC = ({ TransitionProps={{ unmountOnExit: true }} // Optimize accordion rendering > }> - {data[data.length - 1].name} + {data[0].name} From b3a7169a7d1ebb775abeb5749138e9ce2937f269 Mon Sep 17 00:00:00 2001 From: Rohit Date: Tue, 8 Apr 2025 19:00:32 +0530 Subject: [PATCH 065/106] feat: add translation for run chip abort message --- public/locales/de.json | 3 ++- public/locales/en.json | 3 ++- public/locales/es.json | 3 ++- public/locales/ja.json | 3 ++- public/locales/zh.json | 3 ++- 5 files changed, 10 insertions(+), 5 deletions(-) diff --git a/public/locales/de.json b/public/locales/de.json index 363aebbf..5c5c1acc 100644 --- a/public/locales/de.json +++ b/public/locales/de.json @@ -513,7 +513,8 @@ "running": "Läuft", "scheduled": "Geplant", "queued": "In Warteschlange", - "failed": "Fehlgeschlagen" + "failed": "Fehlgeschlagen", + "aborted": "Abgebrochen" }, "run_settings_modal": { "title": "Ausführungseinstellungen", diff --git a/public/locales/en.json b/public/locales/en.json index 53ac4184..03ee61cb 100644 --- a/public/locales/en.json +++ b/public/locales/en.json @@ -521,7 +521,8 @@ "running": "Running", "scheduled": "Scheduled", "queued": "Queued", - "failed": "Failed" + "failed": "Failed", + "aborted": "Aborted" }, "run_settings_modal": { "title": "Run Settings", diff --git a/public/locales/es.json b/public/locales/es.json index 14db7b5d..0abb10a0 100644 --- a/public/locales/es.json +++ b/public/locales/es.json @@ -514,7 +514,8 @@ "running": "Ejecutando", "scheduled": "Programado", "queued": "En cola", - "failed": "Fallido" + "failed": "Fallido", + "aborted": "Abortado" }, "run_settings_modal": { "title": "Configuración de Ejecución", diff --git a/public/locales/ja.json b/public/locales/ja.json index 54f7e788..58219ceb 100644 --- a/public/locales/ja.json +++ b/public/locales/ja.json @@ -514,7 +514,8 @@ "running": "実行中", "scheduled": "スケジュール済み", "queued": "キューに入れました", - "failed": "失敗" + "failed": "失敗", + "aborted": "中止されました" }, "run_settings_modal": { "title": "実行設定", diff --git a/public/locales/zh.json b/public/locales/zh.json index cc23556e..71f98c07 100644 --- a/public/locales/zh.json +++ b/public/locales/zh.json @@ -514,7 +514,8 @@ "running": "运行中", "scheduled": "已计划", "queued": "排队", - "failed": "失败" + "failed": "失败", + "aborted": "已中止" }, "run_settings_modal": { "title": "运行设置", From edbf16529aa4f50a1ab4702010c08c7d838b0f98 Mon Sep 17 00:00:00 2001 From: Rohit Date: Tue, 8 Apr 2025 19:01:21 +0530 Subject: [PATCH 066/106] feat: add abort run chip for runs ui --- src/components/run/ColapsibleRow.tsx | 1 + 1 file changed, 1 insertion(+) diff --git a/src/components/run/ColapsibleRow.tsx b/src/components/run/ColapsibleRow.tsx index 4d5fe822..eadf0823 100644 --- a/src/components/run/ColapsibleRow.tsx +++ b/src/components/run/ColapsibleRow.tsx @@ -125,6 +125,7 @@ export const CollapsibleRow = ({ row, handleDelete, isOpen, currentLog, abortRun {row.status === 'scheduled' && } {row.status === 'queued' && } {row.status === 'failed' && } + {row.status === 'aborted' && } ) case 'delete': From 389a1cbdc8b1124a757c590972d9c09fe6104376 Mon Sep 17 00:00:00 2001 From: Rohit Date: Wed, 9 Apr 2025 19:40:48 +0530 Subject: [PATCH 067/106] feat: add robot retraining logic --- src/components/robot/RecordingsTable.tsx | 85 +++++++++++++++++++++++- 1 file changed, 82 insertions(+), 3 deletions(-) diff --git a/src/components/robot/RecordingsTable.tsx b/src/components/robot/RecordingsTable.tsx index 2fc4f26e..878c998f 100644 --- a/src/components/robot/RecordingsTable.tsx +++ b/src/components/robot/RecordingsTable.tsx @@ -35,7 +35,8 @@ import { Settings, Power, ContentCopy, - MoreHoriz + MoreHoriz, + Refresh } from "@mui/icons-material"; import { useGlobalInfoStore } from "../../context/globalInfo"; import { checkRunsForRecording, deleteRecordingFromStorage, getStoredRecordings } from "../../api/storage"; @@ -117,6 +118,7 @@ const TableRowMemoized = memo(({ row, columns, handlers }: any) => { return ( handlers.handleRetrainRobot(row.id, row.name)} handleEdit={() => handlers.handleEditRobot(row.id, row.name, row.params || [])} handleDuplicate={() => handlers.handleDuplicateRobot(row.id, row.name, row.params || [])} handleDelete={() => handlers.handleDelete(row.id)} @@ -198,6 +200,17 @@ export const RecordingsTable = ({ } } } + + if (event.data && event.data.type === 'session-data-clear') { + window.sessionStorage.removeItem('browserId'); + window.sessionStorage.removeItem('robotToRetrain'); + window.sessionStorage.removeItem('robotName'); + window.sessionStorage.removeItem('recordingUrl'); + window.sessionStorage.removeItem('recordingSessionId'); + window.sessionStorage.removeItem('pendingSessionData'); + window.sessionStorage.removeItem('nextTabIsRecording'); + window.sessionStorage.removeItem('initialUrl'); + } }; window.addEventListener('message', handleMessage); @@ -303,6 +316,63 @@ export const RecordingsTable = ({ setModalOpen(true); }; + const handleRetrainRobot = useCallback(async (id: string, name: string) => { + const activeBrowserId = await getActiveBrowserId(); + const robot = rows.find(row => row.id === id); + let targetUrl; + + if (robot?.content?.workflow && robot.content.workflow.length > 0) { + // Get the last workflow item + const lastPair = robot.content.workflow[robot.content.workflow.length - 1]; + + if (lastPair?.what) { + if (Array.isArray(lastPair.what)) { + const gotoAction = lastPair.what.find(action => + action && typeof action === 'object' && 'action' in action && action.action === "goto" + ) as any; + + if (gotoAction?.args?.[0]) { + targetUrl = gotoAction.args[0]; + } + } + } + } + + // Set the URL in state and session storage + if (targetUrl) { + setInitialUrl(targetUrl); + setRecordingUrl(targetUrl); + window.sessionStorage.setItem('initialUrl', targetUrl); + } + + if (activeBrowserId) { + setActiveBrowserId(activeBrowserId); + setWarningModalOpen(true); + } else { + // Pass the URL directly to avoid timing issues with state updates + startRetrainRecording(id, name, targetUrl); + } + }, [rows, setInitialUrl, setRecordingUrl]); + + const startRetrainRecording = (id: string, name: string, url?: string) => { + setBrowserId('new-recording'); + setRecordingName(''); + setRecordingId(''); + + window.sessionStorage.setItem('browserId', 'new-recording'); + window.sessionStorage.setItem('robotToRetrain', id); + window.sessionStorage.setItem('robotName', name); + + window.sessionStorage.setItem('recordingUrl', url || recordingUrl); + + const sessionId = Date.now().toString(); + window.sessionStorage.setItem('recordingSessionId', sessionId); + + window.openedRecordingWindow = window.open(`/recording-setup?session=${sessionId}`, '_blank'); + + window.sessionStorage.setItem('nextTabIsRecording', 'true'); + }; + const startRecording = () => { setModalOpen(false); @@ -381,6 +451,7 @@ export const RecordingsTable = ({ handleSettingsRecording, handleEditRobot, handleDuplicateRobot, + handleRetrainRobot, handleDelete: async (id: string) => { const hasRuns = await checkRunsForRecording(id); if (hasRuns) { @@ -395,7 +466,7 @@ export const RecordingsTable = ({ fetchRecordings(); } } - }), [handleRunRecording, handleScheduleRecording, handleIntegrateRecording, handleSettingsRecording, handleEditRobot, handleDuplicateRobot, notify, t]); + }), [handleRunRecording, handleScheduleRecording, handleIntegrateRecording, handleSettingsRecording, handleEditRobot, handleDuplicateRobot, handleRetrainRobot, notify, t]); return ( @@ -597,12 +668,13 @@ const SettingsButton = ({ handleSettings }: SettingsButtonProps) => { } interface OptionsButtonProps { + handleRetrain: () => void; handleEdit: () => void; handleDelete: () => void; handleDuplicate: () => void; } -const OptionsButton = ({ handleEdit, handleDelete, handleDuplicate }: OptionsButtonProps) => { +const OptionsButton = ({ handleRetrain, handleEdit, handleDelete, handleDuplicate }: OptionsButtonProps) => { const [anchorEl, setAnchorEl] = React.useState(null); const handleClick = (event: React.MouseEvent) => { @@ -629,6 +701,13 @@ const OptionsButton = ({ handleEdit, handleDelete, handleDuplicate }: OptionsBut open={Boolean(anchorEl)} onClose={handleClose} > + { handleRetrain(); handleClose(); }}> + + + + {t('recordingtable.retrain')} + + { handleEdit(); handleClose(); }}> From 962723b87f83c923def7014a404a649d9e034855 Mon Sep 17 00:00:00 2001 From: Rohit Date: Wed, 9 Apr 2025 20:46:38 +0530 Subject: [PATCH 068/106] feat: add logic to update workflow on save --- .../workflow-management/classes/Generator.ts | 74 ++++++++++++------- 1 file changed, 47 insertions(+), 27 deletions(-) diff --git a/server/src/workflow-management/classes/Generator.ts b/server/src/workflow-management/classes/Generator.ts index 004126bd..563053ba 100644 --- a/server/src/workflow-management/classes/Generator.ts +++ b/server/src/workflow-management/classes/Generator.ts @@ -139,12 +139,14 @@ export class WorkflowGenerator { */ private registerEventHandlers = (socket: Socket) => { socket.on('save', (data) => { - const { fileName, userId, isLogin } = data; + const { fileName, userId, isLogin, robotId } = data; logger.log('debug', `Saving workflow ${fileName} for user ID ${userId}`); - this.saveNewWorkflow(fileName, userId, isLogin); + this.saveNewWorkflow(fileName, userId, isLogin, robotId); }); - socket.on('new-recording', () => this.workflowRecord = { - workflow: [], + socket.on('new-recording', (data) => { + this.workflowRecord = { + workflow: [], + }; }); socket.on('activeIndex', (data) => this.generatedData.lastIndex = parseInt(data)); socket.on('decision', async ({ pair, actionType, decision, userId }) => { @@ -764,32 +766,50 @@ export class WorkflowGenerator { * @param fileName The name of the file. * @returns {Promise} */ - public saveNewWorkflow = async (fileName: string, userId: number, isLogin: boolean) => { + public saveNewWorkflow = async (fileName: string, userId: number, isLogin: boolean, robotId?: string) => { const recording = this.optimizeWorkflow(this.workflowRecord); try { - this.recordingMeta = { - name: fileName, - id: uuid(), - createdAt: this.recordingMeta.createdAt || new Date().toLocaleString(), - pairs: recording.workflow.length, - updatedAt: new Date().toLocaleString(), - params: this.getParams() || [], - isLogin: isLogin, - } - const robot = await Robot.create({ - userId, - recording_meta: this.recordingMeta, - recording: recording, - }); - capture( - 'maxun-oss-robot-created', - { - robot_meta: robot.recording_meta, - recording: robot.recording, - } - ) + if (robotId) { + const robot = await Robot.findOne({ where: { 'recording_meta.id': robotId }}); - logger.log('info', `Robot saved with id: ${robot.id}`); + if (robot) { + await robot.update({ + recording: recording, + recording_meta: { + ...robot.recording_meta, + pairs: recording.workflow.length, + params: this.getParams() || [], + updatedAt: new Date().toLocaleString(), + }, + }) + + logger.log('info', `Robot retrained with id: ${robot.id}`); + } + } else { + this.recordingMeta = { + name: fileName, + id: uuid(), + createdAt: this.recordingMeta.createdAt || new Date().toLocaleString(), + pairs: recording.workflow.length, + updatedAt: new Date().toLocaleString(), + params: this.getParams() || [], + isLogin: isLogin, + } + const robot = await Robot.create({ + userId, + recording_meta: this.recordingMeta, + recording: recording, + }); + capture( + 'maxun-oss-robot-created', + { + robot_meta: robot.recording_meta, + recording: robot.recording, + } + ) + + logger.log('info', `Robot saved with id: ${robot.id}`); + } } catch (e) { const { message } = e as Error; From 5be644d3b58be6f607375427c2ae488f4d8a34d8 Mon Sep 17 00:00:00 2001 From: Rohit Date: Wed, 9 Apr 2025 20:47:12 +0530 Subject: [PATCH 069/106] feat: add robot retrain id global context --- src/context/globalInfo.tsx | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/context/globalInfo.tsx b/src/context/globalInfo.tsx index eaa6ded7..58ee9672 100644 --- a/src/context/globalInfo.tsx +++ b/src/context/globalInfo.tsx @@ -60,6 +60,8 @@ interface GlobalInfo { setRecordingLength: (recordingLength: number) => void; recordingId: string | null; setRecordingId: (newId: string | null) => void; + retrainRobotId: string | null; + setRetrainRobotId: (newId: string | null) => void; recordingName: string; setRecordingName: (recordingName: string) => void; initialUrl: string; @@ -90,6 +92,7 @@ class GlobalInfoStore implements Partial { isOpen: false, }; recordingId = null; + retrainRobotId = null; recordings: string[] = []; rerenderRuns = false; rerenderRobots = false; @@ -119,6 +122,7 @@ export const GlobalInfoProvider = ({ children }: { children: JSX.Element }) => { const [rerenderRobots, setRerenderRobots] = useState(globalInfoStore.rerenderRobots); const [recordingLength, setRecordingLength] = useState(globalInfoStore.recordingLength); const [recordingId, setRecordingId] = useState(globalInfoStore.recordingId); + const [retrainRobotId, setRetrainRobotId] = useState(globalInfoStore.retrainRobotId); const [recordingName, setRecordingName] = useState(globalInfoStore.recordingName); const [isLogin, setIsLogin] = useState(globalInfoStore.isLogin); const [initialUrl, setInitialUrl] = useState(globalInfoStore.initialUrl); @@ -169,6 +173,8 @@ export const GlobalInfoProvider = ({ children }: { children: JSX.Element }) => { setRecordingLength, recordingId, setRecordingId, + retrainRobotId, + setRetrainRobotId, recordingName, setRecordingName, initialUrl, From b95d30cda921cab65eefe33e5b193ffefc926f1f Mon Sep 17 00:00:00 2001 From: Rohit Date: Wed, 9 Apr 2025 20:48:05 +0530 Subject: [PATCH 070/106] feat: save and set retrain robot params --- src/pages/RecordingPage.tsx | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/src/pages/RecordingPage.tsx b/src/pages/RecordingPage.tsx index 34c2f90d..7dbed8b2 100644 --- a/src/pages/RecordingPage.tsx +++ b/src/pages/RecordingPage.tsx @@ -43,7 +43,7 @@ export const RecordingPage = ({ recordingName }: RecordingPageProps) => { const { setId, socket } = useSocketStore(); const { setWidth } = useBrowserDimensionsStore(); - const { browserId, setBrowserId, recordingId, recordingUrl, setRecordingUrl } = useGlobalInfoStore(); + const { browserId, setBrowserId, recordingId, recordingUrl, setRecordingUrl, setRecordingName, setRetrainRobotId } = useGlobalInfoStore(); const handleShowOutputData = useCallback(() => { setShowOutputData(true); @@ -80,6 +80,19 @@ export const RecordingPage = ({ recordingName }: RecordingPageProps) => { const storedUrl = window.sessionStorage.getItem('recordingUrl'); if (storedUrl && !recordingUrl) { setRecordingUrl(storedUrl); + window.sessionStorage.removeItem('recordingUrl'); + } + + const robotName = window.sessionStorage.getItem('robotName'); + if (robotName) { + setRecordingName(robotName); + window.sessionStorage.removeItem('robotName'); + } + + const recordingId = window.sessionStorage.getItem('robotToRetrain'); + if (recordingId) { + setRetrainRobotId(recordingId); + window.sessionStorage.removeItem('robotToRetrain'); } const id = await getActiveBrowserId(); @@ -101,7 +114,7 @@ export const RecordingPage = ({ recordingName }: RecordingPageProps) => { return () => { isCancelled = true; } - }, [setId, recordingUrl, setRecordingUrl]); + }, [setId, recordingUrl, setRecordingUrl, setRecordingName, setRetrainRobotId]); const changeBrowserDimensions = useCallback(() => { if (browserContentRef.current) { @@ -126,7 +139,7 @@ export const RecordingPage = ({ recordingName }: RecordingPageProps) => { } setIsLoaded(true); } - }, [socket, browserId, recordingName, recordingId, isLoaded]) + }, [socket, browserId, recordingName, recordingId, isLoaded]); useEffect(() => { socket?.on('loaded', handleLoaded); From 18924d89dfc3e6940f8b186d189a68e30fe43641 Mon Sep 17 00:00:00 2001 From: Rohit Date: Wed, 9 Apr 2025 20:49:01 +0530 Subject: [PATCH 071/106] feat: pass session storage cleanup message --- src/components/browser/BrowserRecordingSave.tsx | 5 +++++ src/components/robot/RecordingsTable.tsx | 7 ++----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/components/browser/BrowserRecordingSave.tsx b/src/components/browser/BrowserRecordingSave.tsx index d4fd54fb..2adfcd79 100644 --- a/src/components/browser/BrowserRecordingSave.tsx +++ b/src/components/browser/BrowserRecordingSave.tsx @@ -55,6 +55,11 @@ const BrowserRecordingSave = () => { type: 'recording-notification', notification: notificationData }, '*'); + + window.opener.postMessage({ + type: 'session-data-clear', + timestamp: Date.now() + }, '*'); } setBrowserId(null); diff --git a/src/components/robot/RecordingsTable.tsx b/src/components/robot/RecordingsTable.tsx index 878c998f..91f39415 100644 --- a/src/components/robot/RecordingsTable.tsx +++ b/src/components/robot/RecordingsTable.tsx @@ -322,7 +322,6 @@ export const RecordingsTable = ({ let targetUrl; if (robot?.content?.workflow && robot.content.workflow.length > 0) { - // Get the last workflow item const lastPair = robot.content.workflow[robot.content.workflow.length - 1]; if (lastPair?.what) { @@ -338,7 +337,6 @@ export const RecordingsTable = ({ } } - // Set the URL in state and session storage if (targetUrl) { setInitialUrl(targetUrl); setRecordingUrl(targetUrl); @@ -349,15 +347,14 @@ export const RecordingsTable = ({ setActiveBrowserId(activeBrowserId); setWarningModalOpen(true); } else { - // Pass the URL directly to avoid timing issues with state updates startRetrainRecording(id, name, targetUrl); } }, [rows, setInitialUrl, setRecordingUrl]); const startRetrainRecording = (id: string, name: string, url?: string) => { setBrowserId('new-recording'); - setRecordingName(''); - setRecordingId(''); + setRecordingName(name); + setRecordingId(id); window.sessionStorage.setItem('browserId', 'new-recording'); window.sessionStorage.setItem('robotToRetrain', id); From a2d2bf893acf561244c58b713d4ecdb1da3281a6 Mon Sep 17 00:00:00 2001 From: Rohit Date: Wed, 9 Apr 2025 20:49:55 +0530 Subject: [PATCH 072/106] feat: save robot based on retrain robot params --- src/components/recorder/SaveRecording.tsx | 44 +++++++++++++++++------ 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/src/components/recorder/SaveRecording.tsx b/src/components/recorder/SaveRecording.tsx index f7020b44..87d9bd17 100644 --- a/src/components/recorder/SaveRecording.tsx +++ b/src/components/recorder/SaveRecording.tsx @@ -19,26 +19,32 @@ export const SaveRecording = ({ fileName }: SaveRecordingProps) => { const { t } = useTranslation(); const [openModal, setOpenModal] = useState(false); const [needConfirm, setNeedConfirm] = useState(false); - const [recordingName, setRecordingName] = useState(fileName); + const [saveRecordingName, setSaveRecordingName] = useState(fileName); const [waitingForSave, setWaitingForSave] = useState(false); - const { browserId, setBrowserId, notify, recordings, isLogin } = useGlobalInfoStore(); + const { browserId, setBrowserId, notify, recordings, isLogin, recordingName, retrainRobotId } = useGlobalInfoStore(); const { socket } = useSocketStore(); const { state, dispatch } = useContext(AuthContext); const { user } = state; const navigate = useNavigate(); + useEffect(() => { + if (recordingName) { + setSaveRecordingName(recordingName); + } + }, [recordingName]); + const handleChangeOfTitle = (event: React.ChangeEvent) => { const { value } = event.target; if (needConfirm) { setNeedConfirm(false); } - setRecordingName(value); + setSaveRecordingName(value); } const handleSaveRecording = async (event: React.SyntheticEvent) => { event.preventDefault(); - if (recordings.includes(recordingName)) { + if (recordings.includes(saveRecordingName)) { if (needConfirm) { return; } setNeedConfirm(true); } else { @@ -46,19 +52,32 @@ export const SaveRecording = ({ fileName }: SaveRecordingProps) => { } }; + const handleFinishClick = () => { + if (recordingName && !recordings.includes(recordingName)) { + saveRecording(); + } else { + setOpenModal(true); + } + }; + const exitRecording = useCallback(async () => { const notificationData = { type: 'success', message: t('save_recording.notifications.save_success'), timestamp: Date.now() }; - window.sessionStorage.setItem('pendingNotification', JSON.stringify(notificationData)); if (window.opener) { window.opener.postMessage({ type: 'recording-notification', notification: notificationData }, '*'); + + // Also notify about clearing any remaining session data + window.opener.postMessage({ + type: 'session-data-clear', + timestamp: Date.now() + }, '*'); } if (browserId) { @@ -67,16 +86,21 @@ export const SaveRecording = ({ fileName }: SaveRecordingProps) => { setBrowserId(null); window.close(); - }, [setBrowserId, browserId]); + }, [setBrowserId, browserId, t]); // notifies backed to save the recording in progress, // releases resources and changes the view for main page by clearing the global browserId const saveRecording = async () => { if (user) { - const payload = { fileName: recordingName, userId: user.id, isLogin: isLogin }; + const payload = { + fileName: saveRecordingName || recordingName, + userId: user.id, + isLogin: isLogin, + robotId: retrainRobotId, + }; socket?.emit('save', payload); setWaitingForSave(true); - console.log(`Saving the recording as ${recordingName} for userId ${user.id}`); + console.log(`Saving the recording as ${saveRecordingName || recordingName} for userId ${user.id}`); } else { console.error(t('save_recording.notifications.user_not_logged')); } @@ -92,7 +116,7 @@ export const SaveRecording = ({ fileName }: SaveRecordingProps) => { return (
- From d9e5c0cff863a5010b106095a34559eff23e52e9 Mon Sep 17 00:00:00 2001 From: Karishma Shukla Date: Tue, 22 Apr 2025 02:38:55 +0530 Subject: [PATCH 095/106] chore: v0.0.13 --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 876fa45b..a4294c97 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "maxun", - "version": "0.0.12", + "version": "0.0.13", "author": "Maxun", "license": "AGPL-3.0-or-later", "dependencies": { From d6300c7e9acc2b28adc4f66ce557a94657ca5550 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Wed, 23 Apr 2025 00:55:18 +0530 Subject: [PATCH 096/106] feat: link cloud --- src/components/dashboard/MainMenu.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/components/dashboard/MainMenu.tsx b/src/components/dashboard/MainMenu.tsx index 8867db7f..8f8ee717 100644 --- a/src/components/dashboard/MainMenu.tsx +++ b/src/components/dashboard/MainMenu.tsx @@ -112,7 +112,7 @@ export const MainMenu = ({ value = 'robots', handleChangeContent }: MainMenuProp - From 540c9ec7095c97cd7be9235e7e51dd5cffccfab5 Mon Sep 17 00:00:00 2001 From: Rohit Date: Wed, 23 Apr 2025 17:21:14 +0530 Subject: [PATCH 097/106] feat: check parent element url and extract --- maxun-core/src/browserSide/scraper.js | 95 +++++++++++++++------------ 1 file changed, 54 insertions(+), 41 deletions(-) diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index 6e4bf029..e1f99c1d 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -523,49 +523,62 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, // Enhanced value extraction with context awareness function extractValue(element, attribute) { - if (!element) return null; - - // Get context-aware base URL - const baseURL = element.ownerDocument?.location?.href || window.location.origin; + if (!element) return null; - // Check shadow root first - if (element.shadowRoot) { - const shadowContent = element.shadowRoot.textContent; - if (shadowContent?.trim()) { - return shadowContent.trim(); - } + // Get context-aware base URL + const baseURL = element.ownerDocument?.location?.href || window.location.origin; + + // Check shadow root first + if (element.shadowRoot) { + const shadowContent = element.shadowRoot.textContent; + if (shadowContent?.trim()) { + return shadowContent.trim(); + } + } + + if (attribute === 'innerText') { + return element.innerText.trim(); + } else if (attribute === 'innerHTML') { + return element.innerHTML.trim(); + } else if (attribute === 'src' || attribute === 'href') { + if (attribute === 'href' && element.tagName !== 'A') { + const parentElement = element.parentElement; + if (parentElement && parentElement.tagName === 'A') { + const parentHref = parentElement.getAttribute('href'); + if (parentHref) { + try { + return new URL(parentHref, baseURL).href; + } catch (e) { + return parentHref; + } + } + } + } + + const attrValue = element.getAttribute(attribute); + const dataAttr = attrValue || element.getAttribute('data-' + attribute); + + if (!dataAttr || dataAttr.trim() === '') { + if (attribute === 'src') { + const style = window.getComputedStyle(element); + const bgImage = style.backgroundImage; + if (bgImage && bgImage !== 'none') { + const matches = bgImage.match(/url\(['"]?([^'")]+)['"]?\)/); + return matches ? new URL(matches[1], baseURL).href : null; + } + } + return null; + } + + try { + return new URL(dataAttr, baseURL).href; + } catch (e) { + console.warn('Error creating URL from', dataAttr, e); + return dataAttr; // Return the original value if URL construction fails + } + } + return element.getAttribute(attribute); } - - if (attribute === 'innerText') { - return element.innerText.trim(); - } else if (attribute === 'innerHTML') { - return element.innerHTML.trim(); - } else if (attribute === 'src' || attribute === 'href') { - const attrValue = element.getAttribute(attribute); - - const dataAttr = attrValue || element.getAttribute('data-' + attribute); - - if (!dataAttr || dataAttr.trim() === '') { - if (attribute === 'src') { - const style = window.getComputedStyle(element); - const bgImage = style.backgroundImage; - if (bgImage && bgImage !== 'none') { - const matches = bgImage.match(/url\(['"]?([^'")]+)['"]?\)/); - return matches ? new URL(matches[1], baseURL).href : null; - } - } - return null; - } - - try { - return new URL(dataAttr, baseURL).href; - } catch (e) { - console.warn('Error creating URL from', dataAttr, e); - return dataAttr; // Return the original value if URL construction fails - } - } - return element.getAttribute(attribute); - } // Enhanced table ancestor finding with context support function findTableAncestor(element) { From 8ef5348f14e6e2d3027cd73221283a6ff030a48f Mon Sep 17 00:00:00 2001 From: Rohit Date: Wed, 23 Apr 2025 17:22:07 +0530 Subject: [PATCH 098/106] feat: rm page console log --- server/src/workflow-management/selector.ts | 3 --- 1 file changed, 3 deletions(-) diff --git a/server/src/workflow-management/selector.ts b/server/src/workflow-management/selector.ts index a5604e12..5851e7af 100644 --- a/server/src/workflow-management/selector.ts +++ b/server/src/workflow-management/selector.ts @@ -302,9 +302,6 @@ export const getElementInformation = async ( ); return elementInfo; } else { - page.on('console', msg => { - console.log(`Browser console: ${msg.text()}`); - }); const elementInfo = await page.evaluate( async ({ x, y }) => { const getDeepestElementFromPoint = (x: number, y: number): HTMLElement | null => { From a802548f9f37869116bd885f98a596b112330020 Mon Sep 17 00:00:00 2001 From: Karishma Shukla Date: Thu, 24 Apr 2025 00:59:58 +0530 Subject: [PATCH 099/106] feat: link maxun cloud --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 756d400d..2e282dff 100644 --- a/README.md +++ b/README.md @@ -15,11 +15,11 @@ Maxun lets you train a robot in 2 minutes and scrape the web on auto-pilot. Web

+ Go To App | Documentation | Website | Discord | Twitter | - Join Maxun Cloud | Watch Tutorials

@@ -30,7 +30,10 @@ Maxun lets you train a robot in 2 minutes and scrape the web on auto-pilot. Web -# Installation +# Getting Started +The simplest way to get started is to use the hosted version: https://app.maxun.dev. Maxun cloud also deals with anti-bot detection, huge proxy network with automatic proxy rotation, and CAPTCHA solving. + +# Local Installation 1. Create a root folder for your project (e.g. 'maxun') 2. Create a file named `.env` in the root folder of the project 3. Example env file can be viewed [here](https://github.com/getmaxun/maxun/blob/master/ENVEXAMPLE). Copy all content of example env to your `.env` file. @@ -134,9 +137,6 @@ BYOP (Bring Your Own Proxy) lets you connect external proxies to bypass anti-bot - ✨ Integrations (currently Google Sheet) - +++ A lot of amazing things soon! -# Cloud -We offer a managed cloud version to run Maxun without having to manage the infrastructure and extract data at scale. Maxun cloud also deals with anti-bot detection, huge proxy network with automatic proxy rotation, and CAPTCHA solving. If this interests you, [join the cloud waitlist](https://docs.google.com/forms/d/e/1FAIpQLSdbD2uhqC4sbg4eLZ9qrFbyrfkXZ2XsI6dQ0USRCQNZNn5pzg/viewform) as we launch soon. - # Screenshots ![Maxun PH Launch (1)-1-1](https://github.com/user-attachments/assets/d7c75fa2-2bbc-47bb-a5f6-0ee6c162f391) ![Maxun PH Launch (1)-2-1](https://github.com/user-attachments/assets/d85a3ec7-8ce8-4daa-89aa-52d9617e227a) From 395d08ba922c67c75a3d12b97816cd94f8d358b9 Mon Sep 17 00:00:00 2001 From: Rohit Date: Thu, 24 Apr 2025 13:47:26 +0530 Subject: [PATCH 100/106] feat: rm spread operation --- server/src/pgboss-worker.ts | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/server/src/pgboss-worker.ts b/server/src/pgboss-worker.ts index f197489d..bd5ae801 100644 --- a/server/src/pgboss-worker.ts +++ b/server/src/pgboss-worker.ts @@ -393,19 +393,15 @@ async function abortRun(runId: string, userId: string): Promise { } const currentLog = browser.interpreter.debugMessages.join('\n'); - const serializableOutput = browser.interpreter.serializableData.reduce((reducedObject, item, index) => { - return { - [`item-${index}`]: item, - ...reducedObject, - } - }, {}); + const serializableOutput: Record = {}; + browser.interpreter.serializableData.forEach((item, index) => { + serializableOutput[`item-${index}`] = item; + }); - const binaryOutput = browser.interpreter.binaryData.reduce((reducedObject, item, index) => { - return { - [`item-${index}`]: item, - ...reducedObject, - } - }, {}); + const binaryOutput: Record = {}; + browser.interpreter.binaryData.forEach((item, index) => { + binaryOutput[`item-${index}`] = item; + }); await run.update({ status: 'aborted', From 79e6aa711611c07ce3b59e3a679d08756ff100d2 Mon Sep 17 00:00:00 2001 From: Karishma Shukla Date: Thu, 24 Apr 2025 17:18:48 +0530 Subject: [PATCH 101/106] feat: cloud changes --- README.md | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index 2e282dff..a16df810 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ Maxun lets you train a robot in 2 minutes and scrape the web on auto-pilot. Web # Getting Started -The simplest way to get started is to use the hosted version: https://app.maxun.dev. Maxun cloud also deals with anti-bot detection, huge proxy network with automatic proxy rotation, and CAPTCHA solving. +The simplest & fastest way to get started is to use the hosted version: https://app.maxun.dev. Maxun Cloud deals with anti-bot detection, huge proxy network with automatic proxy rotation, and CAPTCHA solving. # Local Installation 1. Create a root folder for your project (e.g. 'maxun') @@ -132,10 +132,11 @@ BYOP (Bring Your Own Proxy) lets you connect external proxies to bypass anti-bot - ✨ Run Robots On A Specific Schedule - ✨ Turn Websites to APIs - ✨ Turn Websites to Spreadsheets -- ✨ Adapt To Website Layout Changes (coming soon) -- ✨ Extract Behind Login, With Two-Factor Authentication Support (coming soon) -- ✨ Integrations (currently Google Sheet) -- +++ A lot of amazing things soon! +- ✨ Adapt To Website Layout Changes +- ✨ Extract Behind Login, +- ✨ Bypass Two-Factor Authentication For Extract Behind Login (coming soon) +- ✨ Integrations +- +++ A lot of amazing things! # Screenshots ![Maxun PH Launch (1)-1-1](https://github.com/user-attachments/assets/d7c75fa2-2bbc-47bb-a5f6-0ee6c162f391) @@ -149,7 +150,7 @@ BYOP (Bring Your Own Proxy) lets you connect external proxies to bypass anti-bot ![Maxun PH Launch (1)-9-1](https://github.com/user-attachments/assets/160f46fa-0357-4c1b-ba50-b4fe64453bb7) # Note -This project is in early stages of development. Your feedback is very important for us - we're actively working to improve the product. Drop anonymous feedback here. +This project is in early stages of development. Your feedback is very important for us - we're actively working to improve the product. # License

From f6f23419d9ed5b50d95b6ccfb18e4832f94449ab Mon Sep 17 00:00:00 2001 From: Rohit Date: Thu, 24 Apr 2025 18:46:04 +0530 Subject: [PATCH 102/106] feat: add error handling destroy browser --- server/src/browser-management/controller.ts | 36 ++++++++++++++++++--- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/server/src/browser-management/controller.ts b/server/src/browser-management/controller.ts index ef1e0011..a02b3b7c 100644 --- a/server/src/browser-management/controller.ts +++ b/server/src/browser-management/controller.ts @@ -81,13 +81,39 @@ export const createRemoteBrowserForRun = (userId: string): string => { * @category BrowserManagement-Controller */ export const destroyRemoteBrowser = async (id: string, userId: string): Promise => { - const browserSession = browserPool.getRemoteBrowser(id); - if (browserSession) { + try { + const browserSession = browserPool.getRemoteBrowser(id); + if (!browserSession) { + logger.log('info', `Browser with id: ${id} not found, may have already been destroyed`); + return true; + } + logger.log('debug', `Switching off the browser with id: ${id}`); - await browserSession.stopCurrentInterpretation(); - await browserSession.switchOff(); + + try { + await browserSession.stopCurrentInterpretation(); + } catch (stopError) { + logger.log('warn', `Error stopping interpretation for browser ${id}: ${stopError}`); + } + + try { + await browserSession.switchOff(); + } catch (switchOffError) { + logger.log('warn', `Error switching off browser ${id}: ${switchOffError}`); + } + + return browserPool.deleteRemoteBrowser(id); + } catch (error) { + const errorMessage = error instanceof Error ? error.message : String(error); + logger.log('error', `Failed to destroy browser ${id}: ${errorMessage}`); + + try { + return browserPool.deleteRemoteBrowser(id); + } catch (deleteError) { + logger.log('error', `Failed to delete browser ${id} from pool: ${deleteError}`); + return false; + } } - return browserPool.deleteRemoteBrowser(id); }; /** From 9878db060adcd3f57ccc26b56ffb8a21a2b3bd13 Mon Sep 17 00:00:00 2001 From: Rohit Date: Thu, 24 Apr 2025 18:46:42 +0530 Subject: [PATCH 103/106] feat: add abort checks and update status --- server/src/pgboss-worker.ts | 170 ++++++++++++++++++++++++++++-------- 1 file changed, 132 insertions(+), 38 deletions(-) diff --git a/server/src/pgboss-worker.ts b/server/src/pgboss-worker.ts index bd5ae801..b09ca253 100644 --- a/server/src/pgboss-worker.ts +++ b/server/src/pgboss-worker.ts @@ -180,6 +180,11 @@ async function processRunExecution(job: Job) { return { success: false }; } + if (run.status === 'aborted' || run.status === 'aborting') { + logger.log('info', `Run ${data.runId} has status ${run.status}, skipping execution`); + return { success: true }; + } + const plainRun = run.toJSON(); // Find the recording @@ -187,12 +192,14 @@ async function processRunExecution(job: Job) { if (!recording) { logger.log('error', `Recording for run ${data.runId} not found`); - // Update run status to failed - await run.update({ - status: 'failed', - finishedAt: new Date().toLocaleString(), - log: 'Failed: Recording not found', - }); + const currentRun = await Run.findOne({ where: { runId: data.runId } }); + if (currentRun && (currentRun.status !== 'aborted' && currentRun.status !== 'aborting')) { + await run.update({ + status: 'failed', + finishedAt: new Date().toLocaleString(), + log: 'Failed: Recording not found', + }); + } // Check for queued runs even if this one failed await checkAndProcessQueuedRun(data.userId, data.browserId); @@ -207,8 +214,6 @@ async function processRunExecution(job: Job) { if (!browser || !currentPage) { logger.log('error', `Browser or page not available for run ${data.runId}`); - await pgBoss.fail(job.id, "Failed to get browser or page for run"); - // Even if this run failed, check for queued runs await checkAndProcessQueuedRun(data.userId, data.browserId); @@ -219,6 +224,11 @@ async function processRunExecution(job: Job) { // Reset the browser state before executing this run await resetBrowserState(browser); + const isRunAborted = async (): Promise => { + const currentRun = await Run.findOne({ where: { runId: data.runId } }); + return currentRun ? (currentRun.status === 'aborted' || currentRun.status === 'aborting') : false; + }; + // Execute the workflow const workflow = AddGeneratedFlags(recording.recording); const interpretationInfo = await browser.interpreter.InterpretRecording( @@ -228,10 +238,28 @@ async function processRunExecution(job: Job) { plainRun.interpreterSettings ); + if (await isRunAborted()) { + logger.log('info', `Run ${data.runId} was aborted during execution, not updating status`); + + const queuedRunProcessed = await checkAndProcessQueuedRun(data.userId, plainRun.browserId); + + if (!queuedRunProcessed) { + await destroyRemoteBrowser(plainRun.browserId, data.userId); + logger.log('info', `No queued runs found for browser ${plainRun.browserId}, browser destroyed`); + } + + return { success: true }; + } + // Process the results const binaryOutputService = new BinaryOutputService('maxun-run-screenshots'); const uploadedBinaryOutput = await binaryOutputService.uploadAndStoreBinaryOutput(run, interpretationInfo.binaryOutput); + if (await isRunAborted()) { + logger.log('info', `Run ${data.runId} was aborted while processing results, not updating status`); + return { success: true }; + } + // Update the run record with results await run.update({ ...run, @@ -322,11 +350,28 @@ async function processRunExecution(job: Job) { } catch (executionError: any) { logger.log('error', `Run execution failed for run ${data.runId}: ${executionError.message}`); - await run.update({ - status: 'failed', - finishedAt: new Date().toLocaleString(), - log: `Failed: ${executionError.message}`, - }); + const currentRun = await Run.findOne({ where: { runId: data.runId } }); + if (currentRun && (currentRun.status !== 'aborted' && currentRun.status !== 'aborting')) { + await run.update({ + status: 'failed', + finishedAt: new Date().toLocaleString(), + log: `Failed: ${executionError.message}`, + }); + + // Capture failure metrics + capture( + 'maxun-oss-run-created-manual', + { + runId: data.runId, + user_id: data.userId, + created_at: new Date().toISOString(), + status: 'failed', + error_message: executionError.message, + } + ); + } else { + logger.log('info', `Run ${data.runId} was aborted, not updating status to failed`); + } // Check for queued runs before destroying the browser const queuedRunProcessed = await checkAndProcessQueuedRun(data.userId, plainRun.browserId); @@ -340,18 +385,6 @@ async function processRunExecution(job: Job) { logger.log('warn', `Failed to clean up browser for failed run ${data.runId}: ${cleanupError.message}`); } } - - // Capture failure metrics - capture( - 'maxun-oss-run-created-manual', - { - runId: data.runId, - user_id: data.userId, - created_at: new Date().toISOString(), - status: 'failed', - error_message: executionError.message, - } - ); return { success: false }; } @@ -377,9 +410,26 @@ async function abortRun(runId: string, userId: string): Promise { return false; } + await run.update({ + status: 'aborting' + }); + const plainRun = run.toJSON(); - const browser = browserPool.getRemoteBrowser(plainRun.browserId); + const recording = await Robot.findOne({ + where: { 'recording_meta.id': plainRun.robotMetaId }, + raw: true + }); + + const robotName = recording?.recording_meta?.name || 'Unknown Robot'; + + let browser; + try { + browser = browserPool.getRemoteBrowser(plainRun.browserId); + } catch (browserError) { + logger.log('warn', `Could not get browser for run ${runId}: ${browserError}`); + browser = null; + } if (!browser) { await run.update({ @@ -388,36 +438,80 @@ async function abortRun(runId: string, userId: string): Promise { log: 'Aborted: Browser not found or already closed' }); + try { + serverIo.of(plainRun.browserId).emit('run-aborted', { + runId, + robotName: robotName, + status: 'aborted', + finishedAt: new Date().toLocaleString() + }); + } catch (socketError) { + logger.log('warn', `Failed to emit run-aborted event: ${socketError}`); + } + logger.log('warn', `Browser not found for run ${runId}`); return true; } - const currentLog = browser.interpreter.debugMessages.join('\n'); - const serializableOutput: Record = {}; - browser.interpreter.serializableData.forEach((item, index) => { - serializableOutput[`item-${index}`] = item; - }); + let currentLog = 'Run aborted by user'; + let serializableOutput: Record = {}; + let binaryOutput: Record = {}; - const binaryOutput: Record = {}; - browser.interpreter.binaryData.forEach((item, index) => { - binaryOutput[`item-${index}`] = item; - }); + try { + if (browser.interpreter) { + if (browser.interpreter.debugMessages) { + currentLog = browser.interpreter.debugMessages.join('\n') || currentLog; + } + + if (browser.interpreter.serializableData) { + browser.interpreter.serializableData.forEach((item, index) => { + serializableOutput[`item-${index}`] = item; + }); + } + + if (browser.interpreter.binaryData) { + browser.interpreter.binaryData.forEach((item, index) => { + binaryOutput[`item-${index}`] = item; + }); + } + } + } catch (interpreterError) { + logger.log('warn', `Error collecting data from interpreter: ${interpreterError}`); + } await run.update({ status: 'aborted', finishedAt: new Date().toLocaleString(), browserId: plainRun.browserId, - log: currentLog || 'Run aborted by user', + log: currentLog, serializableOutput, binaryOutput, }); - const queuedRunProcessed = await checkAndProcessQueuedRun(userId, plainRun.browserId); + try { + serverIo.of(plainRun.browserId).emit('run-aborted', { + runId, + robotName: robotName, + status: 'aborted', + finishedAt: new Date().toLocaleString() + }); + } catch (socketError) { + logger.log('warn', `Failed to emit run-aborted event: ${socketError}`); + } + + let queuedRunProcessed = false; + try { + queuedRunProcessed = await checkAndProcessQueuedRun(userId, plainRun.browserId); + } catch (queueError) { + logger.log('warn', `Error checking queued runs: ${queueError}`); + } if (!queuedRunProcessed) { try { + await new Promise(resolve => setTimeout(resolve, 500)); + await destroyRemoteBrowser(plainRun.browserId, userId); - logger.log('info', `No queued runs found for browser ${plainRun.browserId}, browser destroyed`); + logger.log('info', `Browser ${plainRun.browserId} destroyed successfully after abort`); } catch (cleanupError) { logger.log('warn', `Failed to clean up browser for aborted run ${runId}: ${cleanupError}`); } From d47f696ca5fec36d9f91086d963d0199815045e1 Mon Sep 17 00:00:00 2001 From: Rohit Date: Thu, 24 Apr 2025 18:47:23 +0530 Subject: [PATCH 104/106] feat: notify on socket abort run --- src/pages/MainPage.tsx | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/pages/MainPage.tsx b/src/pages/MainPage.tsx index aa896d50..0587f2a4 100644 --- a/src/pages/MainPage.tsx +++ b/src/pages/MainPage.tsx @@ -71,7 +71,7 @@ export const MainPage = ({ handleEditRecording, initialContent }: MainPageProps) interpretStoredRecording(runId).then(async (interpretation: boolean) => { if (!aborted) { if (interpretation) { - notify('success', t('main_page.notifications.interpretation_success', { name: runningRecordingName })); + // notify('success', t('main_page.notifications.interpretation_success', { name: runningRecordingName })); } else { notify('success', t('main_page.notifications.interpretation_failed', { name: runningRecordingName })); // destroy the created browser @@ -112,6 +112,14 @@ export const MainPage = ({ handleEditRecording, initialContent }: MainPageProps) notify('error', t('main_page.notifications.interpretation_failed', { name: robotName })); } }); + + socket.on('run-aborted', (data) => { + setRerenderRuns(true); + + const abortedRobotName = data.robotName; + notify('success', t('main_page.notifications.abort_success', { name: abortedRobotName })); + }); + setContent('runs'); if (browserId) { notify('info', t('main_page.notifications.run_started', { name: runningRecordingName })); From 0f5759dc1881319a211b784ecfa6440bdcee9675 Mon Sep 17 00:00:00 2001 From: amhsirak Date: Thu, 24 Apr 2025 20:03:27 +0530 Subject: [PATCH 105/106] feat: core 0.0.15 --- maxun-core/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/maxun-core/package.json b/maxun-core/package.json index df790d7b..803fde15 100644 --- a/maxun-core/package.json +++ b/maxun-core/package.json @@ -1,6 +1,6 @@ { "name": "maxun-core", - "version": "0.0.14", + "version": "0.0.15", "description": "Core package for Maxun, responsible for data extraction", "main": "build/index.js", "typings": "build/index.d.ts", From 3100f60d86439dabb79af4f751cc0dc0346f17ae Mon Sep 17 00:00:00 2001 From: amhsirak Date: Thu, 24 Apr 2025 20:07:51 +0530 Subject: [PATCH 106/106] chore: upgrade core 0.0.15 --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index a4294c97..75350f1f 100644 --- a/package.json +++ b/package.json @@ -50,7 +50,7 @@ "lodash": "^4.17.21", "loglevel": "^1.8.0", "loglevel-plugin-remote": "^0.6.8", - "maxun-core": "^0.0.14", + "maxun-core": "^0.0.15", "minio": "^8.0.1", "moment-timezone": "^0.5.45", "node-cron": "^3.0.3",