From 89c7184efcb1efa76243b141660eeb3d389dbba7 Mon Sep 17 00:00:00 2001 From: Rohit Date: Tue, 5 Aug 2025 00:19:17 +0530 Subject: [PATCH 1/4] feat: add restart until manually stopped --- docker-compose.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/docker-compose.yml b/docker-compose.yml index e6995c06..67621344 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,6 +1,7 @@ services: postgres: image: postgres:13 + restart: unless-stopped environment: POSTGRES_USER: ${DB_USER} POSTGRES_PASSWORD: ${DB_PASSWORD} @@ -17,6 +18,7 @@ services: minio: image: minio/minio + restart: unless-stopped environment: MINIO_ROOT_USER: ${MINIO_ACCESS_KEY} MINIO_ROOT_PASSWORD: ${MINIO_SECRET_KEY} @@ -32,6 +34,7 @@ services: #context: . #dockerfile: server/Dockerfile image: getmaxun/maxun-backend:latest + restart: unless-stopped ports: - "${BACKEND_PORT:-8080}:${BACKEND_PORT:-8080}" env_file: .env @@ -58,6 +61,7 @@ services: #context: . #dockerfile: Dockerfile image: getmaxun/maxun-frontend:latest + restart: unless-stopped ports: - "${FRONTEND_PORT:-5173}:${FRONTEND_PORT:-5173}" env_file: .env From 47fc16806dc1fb8b3cc54deed102fbb6d951f5ad Mon Sep 17 00:00:00 2001 From: Rohit Date: Tue, 5 Aug 2025 01:22:51 +0530 Subject: [PATCH 2/4] feat: continue other job execution on fail --- maxun-core/src/utils/concurrency.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/maxun-core/src/utils/concurrency.ts b/maxun-core/src/utils/concurrency.ts index 56c15fd9..41fc1047 100644 --- a/maxun-core/src/utils/concurrency.ts +++ b/maxun-core/src/utils/concurrency.ts @@ -41,6 +41,10 @@ export default class Concurrency { job().then(() => { // console.debug("Job finished, running the next waiting job..."); this.runNextJob(); + }).catch((error) => { + console.error(`Job failed with error: ${error.message}`); + // Continue processing other jobs even if one fails + this.runNextJob(); }); } else { // console.debug("No waiting job found!"); From 780b18b9f4faae63a75d1af9b6ab24d40531c22d Mon Sep 17 00:00:00 2001 From: Rohit Date: Tue, 5 Aug 2025 01:24:18 +0530 Subject: [PATCH 3/4] feat: page validity, continue if click fails --- maxun-core/src/interpret.ts | 61 ++++++++++++++++++++++++++++++------- 1 file changed, 50 insertions(+), 11 deletions(-) diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index c367c16d..5ed6fb12 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -108,7 +108,9 @@ export default class Interpreter extends EventEmitter { PlaywrightBlocker.fromLists(fetch, ['https://easylist.to/easylist/easylist.txt']).then(blocker => { this.blocker = blocker; }).catch(err => { - this.log(`Failed to initialize ad-blocker:`, Level.ERROR); + this.log(`Failed to initialize ad-blocker: ${err.message}`, Level.ERROR); + // Continue without ad-blocker rather than crashing + this.blocker = null; }) } @@ -522,11 +524,16 @@ export default class Interpreter extends EventEmitter { this.options.debugChannel.setActionType('script'); } - const AsyncFunction: FunctionConstructor = Object.getPrototypeOf( - async () => { }, - ).constructor; - const x = new AsyncFunction('page', 'log', code); - await x(page, this.log); + try { + const AsyncFunction: FunctionConstructor = Object.getPrototypeOf( + async () => { }, + ).constructor; + const x = new AsyncFunction('page', 'log', code); + await x(page, this.log); + } catch (error) { + this.log(`Script execution failed: ${error.message}`, Level.ERROR); + throw new Error(`Script execution error: ${error.message}`); + } }, flag: async () => new Promise((res) => { @@ -590,11 +597,18 @@ export default class Interpreter extends EventEmitter { try{ await executeAction(invokee, methodName, [step.args[0], { force: true }]); } catch (error) { - continue + this.log(`Click action failed: ${error.message}`, Level.WARN); + continue; } } } else { - await executeAction(invokee, methodName, step.args); + try { + await executeAction(invokee, methodName, step.args); + } catch (error) { + this.log(`Action ${methodName} failed: ${error.message}`, Level.ERROR); + // Continue with next action instead of crashing + continue; + } } } @@ -1132,7 +1146,16 @@ export default class Interpreter extends EventEmitter { }); /* eslint no-constant-condition: ["warn", { "checkLoops": false }] */ + let loopIterations = 0; + const MAX_LOOP_ITERATIONS = 1000; // Circuit breaker + while (true) { + // Circuit breaker to prevent infinite loops + if (++loopIterations > MAX_LOOP_ITERATIONS) { + this.log('Maximum loop iterations reached, terminating to prevent infinite loop', Level.ERROR); + return; + } + // Checks whether the page was closed from outside, // or the workflow execution has been stopped via `interpreter.stop()` if (p.isClosed() || !this.stopper) { @@ -1147,14 +1170,25 @@ export default class Interpreter extends EventEmitter { } let pageState = {}; - let getStateTest = "Hello"; try { + // Check if page is still valid before accessing state + if (p.isClosed()) { + this.log('Page was closed during execution', Level.WARN); + return; + } + pageState = await this.getState(p, workflowCopy, selectors); selectors = []; console.log("Empty selectors:", selectors) } catch (e: any) { - this.log('The browser has been closed.'); - return; + this.log(`Failed to get page state: ${e.message}`, Level.ERROR); + // If state access fails, attempt graceful recovery + if (p.isClosed()) { + this.log('Browser has been closed, terminating workflow', Level.WARN); + return; + } + // For other errors, continue with empty state to avoid complete failure + pageState = { url: p.url(), selectors: [], cookies: {} }; } if (this.options.debug) { @@ -1207,8 +1241,13 @@ export default class Interpreter extends EventEmitter { selectors.push(selector); } }); + + // Reset loop iteration counter on successful action + loopIterations = 0; } catch (e) { this.log(e, Level.ERROR); + // Don't crash on individual action failures - continue with next iteration + continue; } } else { //await this.disableAdBlocker(p); From 6dac0827b032470ebb6669860a814db9070bf9e1 Mon Sep 17 00:00:00 2001 From: Rohit Date: Tue, 5 Aug 2025 01:25:05 +0530 Subject: [PATCH 4/4] feat: null checks for doc and iframe,frame --- maxun-core/src/browserSide/scraper.js | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index ba688c47..fdf1ff9c 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -537,6 +537,11 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, const evaluateXPath = (document, xpath, isShadow = false) => { try { + if (!document || !xpath) { + console.warn('Invalid document or xpath provided to evaluateXPath'); + return null; + } + const result = document.evaluate( xpath, document, @@ -632,6 +637,7 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, return null; } catch (err) { console.error("Critical XPath failure:", xpath, err); + // Return null instead of throwing to prevent crashes return null; } }; @@ -694,16 +700,25 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, for (let i = 0; i < parts.length; i++) { if (!currentElement) return null; - // Handle iframe and frame traversal + // Handle iframe and frame traversal with enhanced safety if ( currentElement.tagName === "IFRAME" || currentElement.tagName === "FRAME" ) { try { + // Check if frame is accessible + if (!currentElement.contentDocument && !currentElement.contentWindow) { + console.warn('Frame is not accessible (cross-origin or unloaded)'); + return null; + } + const frameDoc = currentElement.contentDocument || - currentElement.contentWindow.document; - if (!frameDoc) return null; + currentElement.contentWindow?.document; + if (!frameDoc) { + console.warn('Frame document is not available'); + return null; + } if (isXPathSelector(parts[i])) { currentElement = evaluateXPath(frameDoc, parts[i]);