diff --git a/.dockerignore b/.dockerignore index b675b276..6f9ea982 100644 --- a/.dockerignore +++ b/.dockerignore @@ -3,9 +3,10 @@ npm-debug.log dist .git .gitignore -.env .md .vscode coverage docker-compose.yml -Dockerfile \ No newline at end of file +Dockerfile +Dockerfile.frontend +Dockerfile.backend \ No newline at end of file diff --git a/server/Dockerfile b/Dockerfile.backend similarity index 86% rename from server/Dockerfile rename to Dockerfile.backend index af0860fc..8a5fc23e 100644 --- a/server/Dockerfile +++ b/Dockerfile.backend @@ -3,6 +3,9 @@ FROM --platform=$BUILDPLATFORM mcr.microsoft.com/playwright:v1.46.0-noble # Set working directory WORKDIR /app +COPY .sequelizerc .sequelizerc +COPY .env .env + # Install node dependencies COPY package*.json ./ COPY src ./src @@ -43,8 +46,9 @@ RUN apt-get update && apt-get install -y \ && rm -rf /var/lib/apt/lists/* \ && mkdir -p /tmp/.X11-unix && chmod 1777 /tmp/.X11-unix -# Expose the backend port +# Expose backend port EXPOSE ${BACKEND_PORT:-8080} -# Start the backend using the start script +# Run migrations & start backend using start script CMD ["npm", "run", "server"] +# CMD ["sh", "-c", "npm run migrate && npm run server"] \ No newline at end of file diff --git a/Dockerfile b/Dockerfile.frontend similarity index 100% rename from Dockerfile rename to Dockerfile.frontend diff --git a/README.md b/README.md index aae7e5ab..92ae1c94 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ Maxun lets you train a robot in 2 minutes and scrape the web on auto-pilot. Web getmaxun%2Fmaxun | Trendshift

-![maxun_demo](https://github.com/user-attachments/assets/a61ba670-e56a-4ae1-9681-0b4bd6ba9cdc) +![maxun_gif](https://github.com/user-attachments/assets/3e0b0cf8-9e52-44d2-a140-b26b7b481477) @@ -103,8 +103,8 @@ You can access the frontend at http://localhost:5173/ and backend at http://loca | `MINIO_PORT` | Yes | Port number for MinIO service. | Connection to MinIO storage will fail. | | `MINIO_CONSOLE_PORT` | No | Port number for MinIO WebUI service. Needed for Docker setup. | Cannot access MinIO Web UI. | | `MINIO_ACCESS_KEY` | Yes | Access key for authenticating with MinIO. | MinIO authentication will fail. | -| `GOOGLE_CLIENT_ID` | No | Client ID for Google OAuth, used for Google Sheet integration authentication. | Google login will not work. | -| `GOOGLE_CLIENT_SECRET`| No | Client Secret for Google OAuth. | Google login will not work. | +| `GOOGLE_CLIENT_ID` | No | Client ID for Google OAuth. Used for Google Sheet integration authentication. | Google login will not work. | +| `GOOGLE_CLIENT_SECRET`| No | Client Secret for Google OAuth. Used for Google Sheet integration authentication. | Google login will not work. | | `GOOGLE_REDIRECT_URI` | No | Redirect URI for handling Google OAuth responses. | Google login will not work. | | `AIRTABLE_CLIENT_ID` | No | Client ID for Airtable, used for Airtable integration authentication. | Airtable login will not work. | | `AIRTABLE_REDIRECT_URI` | No | Redirect URI for handling Airtable OAuth responses. | Airtable login will not work. | @@ -133,9 +133,10 @@ BYOP (Bring Your Own Proxy) lets you connect external proxies to bypass anti-bot - ✨ Turn Websites to APIs - ✨ Turn Websites to Spreadsheets - ✨ Adapt To Website Layout Changes -- ✨ Extract Behind Login, -- ✨ Bypass Two-Factor Authentication For Extract Behind Login (coming soon) +- ✨ Extract Behind Login - ✨ Integrations +- ✨ MCP Server +- ✨ Bypass 2FA & MFA For Extract Behind Login (coming soon) - +++ A lot of amazing things! # Screenshots @@ -150,13 +151,16 @@ BYOP (Bring Your Own Proxy) lets you connect external proxies to bypass anti-bot ![Maxun PH Launch (1)-9-1](https://github.com/user-attachments/assets/160f46fa-0357-4c1b-ba50-b4fe64453bb7) # Note -This project is in early stages of development. Your feedback is very important for us - we're actively working to improve the product. +This project is in early stages of development. Your feedback is very important for us - we're actively working on improvements. # License

This project is licensed under AGPLv3.

+# Support Us +Star the repository, contribute if you love what we’re building, or make a [one-time donation](https://bit.ly/maxun-oss). Every little bit helps us keep the lights on and the robots running. + # Contributors Thank you to the combined efforts of everyone who contributes! diff --git a/docker-compose.yml b/docker-compose.yml index e6995c06..67621344 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,6 +1,7 @@ services: postgres: image: postgres:13 + restart: unless-stopped environment: POSTGRES_USER: ${DB_USER} POSTGRES_PASSWORD: ${DB_PASSWORD} @@ -17,6 +18,7 @@ services: minio: image: minio/minio + restart: unless-stopped environment: MINIO_ROOT_USER: ${MINIO_ACCESS_KEY} MINIO_ROOT_PASSWORD: ${MINIO_SECRET_KEY} @@ -32,6 +34,7 @@ services: #context: . #dockerfile: server/Dockerfile image: getmaxun/maxun-backend:latest + restart: unless-stopped ports: - "${BACKEND_PORT:-8080}:${BACKEND_PORT:-8080}" env_file: .env @@ -58,6 +61,7 @@ services: #context: . #dockerfile: Dockerfile image: getmaxun/maxun-frontend:latest + restart: unless-stopped ports: - "${FRONTEND_PORT:-5173}:${FRONTEND_PORT:-5173}" env_file: .env diff --git a/docs/nginx.conf b/docs/nginx.conf new file mode 100644 index 00000000..d5cf2ba8 --- /dev/null +++ b/docs/nginx.conf @@ -0,0 +1,92 @@ +# Robust maxun nginx config file +# DO NOT uncomment commented lines unless YOU know what they mean and YOU know what YOU are doing! +### HTTP server block ### +server { + server_name maxun.my.domain; + root /usr/share/nginx/html; + listen 80; + server_tokens off; + return 301 https://$server_name$request_uri; +} +### HTTPS server block ### +server { +### Default config ### + server_name maxun.my.domain; + root /usr/share/nginx/html; + access_log /var/log/nginx/maxun_access.log; + error_log /var/log/nginx/maxun_error.log info; + listen 443 ssl; + http2 on; + server_tokens off; +### SSL config ### + ssl_certificate /etc/letsencrypt/live/my.domain/fullchain.pem; + ssl_certificate_key /etc/letsencrypt/live/my.domain/privkey.pem; + ssl_trusted_certificate /etc/letsencrypt/live/my.domain/chain.pem; + ssl_protocols TLSv1.2 TLSv1.3; + #ssl_ecdh_curve X25519MLKEM768:X25519:prime256v1:secp384r1; + ssl_ecdh_curve X25519:prime256v1:secp384r1; + ssl_prefer_server_ciphers off; + ssl_ciphers ECDHE-ECDSA-AES128-GCM-SHA256:ECDHE-RSA-AES128-GCM-SHA256:ECDHE-ECDSA-AES256-GCM-SHA384:ECDHE-RSA-AES256-GCM-SHA384:ECDHE-ECDSA-CHACHA20-POLY1305:ECDHE-RSA-CHACHA20-POLY1305:DHE-RSA-AES128-GCM-SHA256:DHE-RSA-AES256-GCM-SHA384:DHE-RSA-CHACHA20-POLY1305; + ssl_stapling off; + ssl_stapling_verify off; + ssl_session_cache shared:MozSSL:10m; + ssl_session_tickets off; + ssl_session_timeout 1d; + ssl_dhparam dh.pem; + #ssl_conf_command Options KTLS; +### Performance tuning config ### + client_max_body_size 512M; + client_body_timeout 300s; + client_body_buffer_size 256k; + #pagespeed off; +### Compression ### + ## gzip ## + gzip on; + gzip_vary on; + gzip_comp_level 5; + gzip_min_length 256; + gzip_disable msie6; + gzip_proxied expired no-cache no-store private no_last_modified no_etag auth; + gzip_buffers 16 8k; + gzip_types application/atom+xml text/javascript application/javascript application/json application/ld+json application/manifest+json application/rss+xml application/vnd.geo+json application/vnd.ms-fontobject application/wasm application/x-font-ttf application/x-web-app-manifest+json application/xhtml+xml application/xml font/opentype image/bmp image/svg+xml image/x-icon text/cache-manifest text/css text/plain text/vcard text/vnd.rim.location.xloc text/vtt text/x-component text/x-cross-domain-policy; + ## brotli: enable only if you have compiled nginx with brotli support!!! ## + #brotli on; + #brotli_static on; + #brotli_comp_level 6; + #brotli_types application/atom+xml application/javascript application/json application/rss+xml + # application/vnd.ms-fontobject application/x-font-opentype application/x-font-truetype + # application/x-font-ttf application/x-javascript application/xhtml+xml application/xml + # font/eot font/opentype font/otf font/truetype image/svg+xml image/vnd.microsoft.icon + # image/x-icon image/x-win-bitmap text/css text/javascript text/plain text/xml; +### Default headers ### + add_header Referrer-Policy "no-referrer" always; + add_header X-Content-Type-Options "nosniff" always; + add_header X-Frame-Options "SAMEORIGIN" always; + add_header X-Permitted-Cross-Domain-Policies "none" always; + add_header X-Robots-Tag "noindex, nofollow" always; + add_header X-XSS-Protection "1; mode=block" always; + add_header Permissions-Policy "geolocation=(self), midi=(self), sync-xhr=(self), microphone=(self), camera=(self), magnetometer=(self), gyroscope=(self), fullscreen=(self), payment=(self), interest-cohort=()"; +### Proxy rules ### + # Backend web traffic and websockets + location ~ ^/(auth|storage|record|workflow|robot|proxy|api-docs|api|webhook|socket.io)(/|$) { + proxy_pass http://localhost:8080; #Change the port number to match .env file BACKEND_PORT variable + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection 'upgrade'; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } + # Frontend web traffic + location / { + proxy_pass http://localhost:5173; #Change the port number to match .env file FRONTEND_PORT variable + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection 'upgrade'; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + proxy_set_header X-Forwarded-Proto $scheme; + } +} diff --git a/docs/self-hosting-docker.md b/docs/self-hosting-docker.md new file mode 100644 index 00000000..93419a16 --- /dev/null +++ b/docs/self-hosting-docker.md @@ -0,0 +1,134 @@ +# Self hosting docker guide + +So you want to create a bot? Let's get you started! + +## Requirements (not covered) +- Webserver (Apache2, nginx, etc.) +- SSL Certificates (letsencrypt, zerossl, etc) +- A sub-domain to host maxun i.e. maxun.my.domain +- Docker +- Docker compose +- Probably others... + +## Guide +For this guide, we assume that before you start, you have a dedicated docker folder to house config files and everything else we need for persistence between docker container reboots and updates. The path in this guide is `/home/$USER/Docker/maxun`. +1. Change directory into your docker folder `cd /home/$USER/Docker/` +2. Create a new directory for maxun and all the required sub-folders for our docker services `mkdir -p maxun/{db,minio,redis}` +3. Change directory to enter the newly created folder `cd maxun` +4. Create an environment file to save your variables `nano .env` with the following contents: +``` +NODE_ENV=production +JWT_SECRET=openssl rand -base64 48 +DB_NAME=maxun +DB_USER=postgres +DB_PASSWORD=openssl rand -base64 24 +DB_HOST=postgres +DB_PORT=5432 +ENCRYPTION_KEY=openssl rand -base64 64 +SESSION_SECRET=openssl rand -base64 48 +MINIO_ENDPOINT=minio +MINIO_PORT=9000 +MINIO_CONSOLE_PORT=9001 +MINIO_ACCESS_KEY=minio +MINIO_SECRET_KEY=openssl rand -base64 24 +REDIS_HOST=maxun-redis +REDIS_PORT=6379 +REDIS_PASSWORD= +BACKEND_PORT=8080 +FRONTEND_PORT=5173 +BACKEND_URL=https://maxun.my.domain +PUBLIC_URL=https://maxun.my.domain +VITE_BACKEND_URL=https://maxun.my.domain +VITE_PUBLIC_URL=https://maxun.my.domain +GOOGLE_CLIENT_ID= +GOOGLE_CLIENT_SECRET= +GOOGLE_REDIRECT_URI= +AIRTABLE_CLIENT_ID= +AIRTABLE_REDIRECT_URI= +MAXUN_TELEMETRY=true +``` +5. Ctrl + x, Y, Enter will save your changes +6. Please be sure to READ this file and change the variables to match your environment!!! i.e. BACKEND_PORT=30000 +7. Create a file for docker compose `nano docker-compose.yml` with the following contents: +```yml +services: + postgres: + image: postgres:17 + container_name: maxun-postgres + mem_limit: 512M + environment: + POSTGRES_USER: ${DB_USER} + POSTGRES_PASSWORD: ${DB_PASSWORD} + POSTGRES_DB: ${DB_NAME} + volumes: + - /home/$USER/Docker/maxun/db:/var/lib/postgresql/data + healthcheck: + test: ["CMD-SHELL", "pg_isready -U postgres"] + interval: 10s + timeout: 5s + retries: 5 + + redis: + image: docker.io/library/redis:7 + container_name: maxun-redis + restart: always + mem_limit: 128M + volumes: + - /home/$USER/Docker/maxun/redis:/data + + minio: + image: minio/minio + container_name: maxun-minio + mem_limit: 512M + environment: + MINIO_ROOT_USER: ${MINIO_ACCESS_KEY} + MINIO_ROOT_PASSWORD: ${MINIO_SECRET_KEY} + command: server /data --console-address :${MINIO_CONSOLE_PORT:-9001} + volumes: + - /home/$USER/Docker/maxun/minio:/data + + backend: + image: getmaxun/maxun-backend:latest + container_name: maxun-backend + ports: + - "127.0.0.1:${BACKEND_PORT:-8080}:${BACKEND_PORT:-8080}" + env_file: .env + environment: + BACKEND_URL: ${BACKEND_URL} + PLAYWRIGHT_BROWSERS_PATH: /ms-playwright + PLAYWRIGHT_SKIP_BROWSER_DOWNLOAD: 0 + # DEBUG: pw:api + # PWDEBUG: 1 # Enables debugging + CHROMIUM_FLAGS: '--disable-gpu --no-sandbox --headless=new' + security_opt: + - seccomp=unconfined # This might help with browser sandbox issues + shm_size: '2gb' + mem_limit: 4g + depends_on: + - postgres + - minio + volumes: + - /var/run/dbus:/var/run/dbus + + frontend: + image: getmaxun/maxun-frontend:latest + container_name: maxun-frontend + mem_limit: 512M + ports: + - "127.0.0.1:${FRONTEND_PORT:-5173}:5173" + env_file: .env + environment: + PUBLIC_URL: ${PUBLIC_URL} + BACKEND_URL: ${BACKEND_URL} + depends_on: + - backend +``` +8. Ctrl + x, Y, Enter will save your changes +9. This particular setup is "production ready" meaning that maxun is only accessible from localhost. You must configure a reverse proxy to access it! +10. Start maxun `sudo docker compose up -d` or `sudo docker-compose up -d` +11. Wait 30 seconds for everything to come up +12. Access your maxun instance at http://localhost:5173 if using defaults + +## Next steps +You will want to configure a reverse proxy. Click on a link below to check out some examples. +- [Nginx](nginx.conf) diff --git a/server/src/worker.ts b/legacy/server/worker.ts similarity index 100% rename from server/src/worker.ts rename to legacy/server/worker.ts diff --git a/maxun-core/package.json b/maxun-core/package.json index c9620bcb..81808a0f 100644 --- a/maxun-core/package.json +++ b/maxun-core/package.json @@ -1,6 +1,6 @@ { "name": "maxun-core", - "version": "0.0.16", + "version": "0.0.21", "description": "Core package for Maxun, responsible for data extraction", "main": "build/index.js", "typings": "build/index.d.ts", diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js index 7ee0f812..fdf1ff9c 100644 --- a/maxun-core/src/browserSide/scraper.js +++ b/maxun-core/src/browserSide/scraper.js @@ -423,50 +423,358 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, * @returns {Array.>} Array of arrays of scraped items, one sub-array per list */ window.scrapeList = async function ({ listSelector, fields, limit = 10 }) { - // Enhanced query function to handle iframe, frame and shadow DOM - const queryElement = (rootElement, selector) => { - if (!selector.includes('>>') && !selector.includes(':>>')) { - return rootElement.querySelector(selector); + // XPath evaluation functions + const queryInsideContext = (context, part) => { + try { + const { tagName, conditions } = parseXPathPart(part); + + const candidateElements = Array.from(context.querySelectorAll(tagName)); + if (candidateElements.length === 0) { + return []; + } + + const matchingElements = candidateElements.filter((el) => { + return elementMatchesConditions(el, conditions); + }); + + return matchingElements; + } catch (err) { + console.error("Error in queryInsideContext:", err); + return []; + } + }; + + // Helper function to parse XPath part + const parseXPathPart = (part) => { + const tagMatch = part.match(/^([a-zA-Z0-9-]+)/); + const tagName = tagMatch ? tagMatch[1] : "*"; + + const conditionMatches = part.match(/\[([^\]]+)\]/g); + const conditions = conditionMatches + ? conditionMatches.map((c) => c.slice(1, -1)) + : []; + + return { tagName, conditions }; + }; + + // Helper function to check if element matches all conditions + const elementMatchesConditions = (element, conditions) => { + for (const condition of conditions) { + if (!elementMatchesCondition(element, condition)) { + return false; + } + } + return true; + }; + + // Helper function to check if element matches a single condition + const elementMatchesCondition = (element, condition) => { + condition = condition.trim(); + + if (/^\d+$/.test(condition)) { + return true; } - const parts = selector.split(/(?:>>|:>>)/).map(part => part.trim()); + // Handle @attribute="value" + const attrMatch = condition.match(/^@([^=]+)=["']([^"']+)["']$/); + if (attrMatch) { + const [, attr, value] = attrMatch; + const elementValue = element.getAttribute(attr); + return elementValue === value; + } + + // Handle contains(@class, 'value') + const classContainsMatch = condition.match( + /^contains\(@class,\s*["']([^"']+)["']\)$/ + ); + if (classContainsMatch) { + const className = classContainsMatch[1]; + return element.classList.contains(className); + } + + // Handle contains(@attribute, 'value') + const attrContainsMatch = condition.match( + /^contains\(@([^,]+),\s*["']([^"']+)["']\)$/ + ); + if (attrContainsMatch) { + const [, attr, value] = attrContainsMatch; + const elementValue = element.getAttribute(attr) || ""; + return elementValue.includes(value); + } + + // Handle text()="value" + const textMatch = condition.match(/^text\(\)=["']([^"']+)["']$/); + if (textMatch) { + const expectedText = textMatch[1]; + const elementText = element.textContent?.trim() || ""; + return elementText === expectedText; + } + + // Handle contains(text(), 'value') + const textContainsMatch = condition.match( + /^contains\(text\(\),\s*["']([^"']+)["']\)$/ + ); + if (textContainsMatch) { + const expectedText = textContainsMatch[1]; + const elementText = element.textContent?.trim() || ""; + return elementText.includes(expectedText); + } + + // Handle count(*)=0 (element has no children) + if (condition === "count(*)=0") { + return element.children.length === 0; + } + + // Handle other count conditions + const countMatch = condition.match(/^count\(\*\)=(\d+)$/); + if (countMatch) { + const expectedCount = parseInt(countMatch[1]); + return element.children.length === expectedCount; + } + + return true; + }; + + const evaluateXPath = (document, xpath, isShadow = false) => { + try { + if (!document || !xpath) { + console.warn('Invalid document or xpath provided to evaluateXPath'); + return null; + } + + const result = document.evaluate( + xpath, + document, + null, + XPathResult.FIRST_ORDERED_NODE_TYPE, + null + ).singleNodeValue; + + if (!isShadow) { + if (result === null) { + return null; + } + return result; + } + + let cleanPath = xpath; + let isIndexed = false; + + const indexedMatch = xpath.match(/^\((.*?)\)\[(\d+)\](.*)$/); + if (indexedMatch) { + cleanPath = indexedMatch[1] + indexedMatch[3]; + isIndexed = true; + } + + const pathParts = cleanPath + .replace(/^\/\//, "") + .split("/") + .map((p) => p.trim()) + .filter((p) => p.length > 0); + + let currentContexts = [document]; + + for (let i = 0; i < pathParts.length; i++) { + const part = pathParts[i]; + const nextContexts = []; + + for (const ctx of currentContexts) { + const positionalMatch = part.match(/^([^[]+)\[(\d+)\]$/); + let partWithoutPosition = part; + let requestedPosition = null; + + if (positionalMatch) { + partWithoutPosition = positionalMatch[1]; + requestedPosition = parseInt(positionalMatch[2]); + } + + const matched = queryInsideContext(ctx, partWithoutPosition); + + let elementsToAdd = matched; + if (requestedPosition !== null) { + const index = requestedPosition - 1; // XPath is 1-based, arrays are 0-based + if (index >= 0 && index < matched.length) { + elementsToAdd = [matched[index]]; + } else { + console.warn( + `Position ${requestedPosition} out of range (${matched.length} elements found)` + ); + elementsToAdd = []; + } + } + + elementsToAdd.forEach((el) => { + nextContexts.push(el); + if (el.shadowRoot) { + nextContexts.push(el.shadowRoot); + } + }); + } + + if (nextContexts.length === 0) { + return null; + } + + currentContexts = nextContexts; + } + + if (currentContexts.length > 0) { + if (isIndexed && indexedMatch) { + const requestedIndex = parseInt(indexedMatch[2]) - 1; + if (requestedIndex >= 0 && requestedIndex < currentContexts.length) { + return currentContexts[requestedIndex]; + } else { + console.warn( + `Requested index ${requestedIndex + 1} out of range (${currentContexts.length} elements found)` + ); + return null; + } + } + + return currentContexts[0]; + } + + return null; + } catch (err) { + console.error("Critical XPath failure:", xpath, err); + // Return null instead of throwing to prevent crashes + return null; + } + }; + + const evaluateXPathAll = (rootElement, xpath) => { + try { + const ownerDoc = + rootElement.nodeType === Node.DOCUMENT_NODE + ? rootElement + : rootElement.ownerDocument; + + if (!ownerDoc) return []; + + const result = ownerDoc.evaluate( + xpath, + rootElement, + null, + XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, + null + ); + + const elements = []; + for (let i = 0; i < result.snapshotLength; i++) { + const node = result.snapshotItem(i); + if (node && node.nodeType === Node.ELEMENT_NODE) { + elements.push(node); + } + } + + return elements; + } catch (error) { + console.warn("XPath evaluation failed:", xpath, error); + return []; + } + }; + + // Helper function to detect selector type + const isXPathSelector = (selector) => { + return ( + selector.startsWith("//") || + selector.startsWith("/") || + selector.startsWith("./") + ); + }; + + // Enhanced query function to handle iframe, frame, shadow DOM, CSS selectors, and XPath + const queryElement = (rootElement, selector) => { + if (!selector.includes(">>") && !selector.includes(":>>")) { + // Check if it's an XPath selector + if (isXPathSelector(selector)) { + return evaluateXPath(rootElement, selector); + } else { + return rootElement.querySelector(selector); + } + } + + const parts = selector.split(/(?:>>|:>>)/).map((part) => part.trim()); let currentElement = rootElement; for (let i = 0; i < parts.length; i++) { - if (!currentElement) return null; + if (!currentElement) return null; - // Handle iframe and frame traversal - if (currentElement.tagName === 'IFRAME' || currentElement.tagName === 'FRAME') { - try { - const frameDoc = currentElement.contentDocument || currentElement.contentWindow.document; - currentElement = frameDoc.querySelector(parts[i]); - continue; - } catch (e) { - console.warn(`Cannot access ${currentElement.tagName.toLowerCase()} content:`, e); - return null; + // Handle iframe and frame traversal with enhanced safety + if ( + currentElement.tagName === "IFRAME" || + currentElement.tagName === "FRAME" + ) { + try { + // Check if frame is accessible + if (!currentElement.contentDocument && !currentElement.contentWindow) { + console.warn('Frame is not accessible (cross-origin or unloaded)'); + return null; + } + + const frameDoc = + currentElement.contentDocument || + currentElement.contentWindow?.document; + if (!frameDoc) { + console.warn('Frame document is not available'); + return null; + } + + if (isXPathSelector(parts[i])) { + currentElement = evaluateXPath(frameDoc, parts[i]); + } else { + currentElement = frameDoc.querySelector(parts[i]); + } + continue; + } catch (e) { + console.warn( + `Cannot access ${currentElement.tagName.toLowerCase()} content:`, + e + ); + return null; + } + } + + let nextElement = null; + + // Try regular DOM first + if ("querySelector" in currentElement) { + if (isXPathSelector(parts[i])) { + nextElement = evaluateXPath(currentElement, parts[i]); + } else { + nextElement = currentElement.querySelector(parts[i]); + } + } + + // Try shadow DOM if not found + if ( + !nextElement && + "shadowRoot" in currentElement && + currentElement.shadowRoot + ) { + if (isXPathSelector(parts[i])) { + nextElement = evaluateXPath(currentElement.shadowRoot, parts[i]); + } else { + nextElement = currentElement.shadowRoot.querySelector(parts[i]); + } + } + + // Check children's shadow roots if still not found + if (!nextElement && "children" in currentElement) { + const children = Array.from(currentElement.children || []); + for (const child of children) { + if (child.shadowRoot) { + if (isXPathSelector(parts[i])) { + nextElement = evaluateXPath(child.shadowRoot, parts[i]); + } else { + nextElement = child.shadowRoot.querySelector(parts[i]); } + if (nextElement) break; + } } + } - // Try regular DOM first - let nextElement = currentElement.querySelector(parts[i]); - - // Try shadow DOM if not found - if (!nextElement && currentElement.shadowRoot) { - nextElement = currentElement.shadowRoot.querySelector(parts[i]); - } - - // Check children's shadow roots if still not found - if (!nextElement) { - const children = Array.from(currentElement.children || []); - for (const child of children) { - if (child.shadowRoot) { - nextElement = child.shadowRoot.querySelector(parts[i]); - if (nextElement) break; - } - } - } - - currentElement = nextElement; + currentElement = nextElement; } return currentElement; @@ -474,322 +782,492 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, // Enhanced query all function for both contexts const queryElementAll = (rootElement, selector) => { - if (!selector.includes('>>') && !selector.includes(':>>')) { - return rootElement.querySelectorAll(selector); + if (!selector.includes(">>") && !selector.includes(":>>")) { + if (isXPathSelector(selector)) { + return evaluateXPathAll(rootElement, selector); + } else { + return Array.from(rootElement.querySelectorAll(selector)); + } } - const parts = selector.split(/(?:>>|:>>)/).map(part => part.trim()); + const parts = selector.split(/(?:>>|:>>)/).map((part) => part.trim()); let currentElements = [rootElement]; for (const part of parts) { - const nextElements = []; + const nextElements = []; - for (const element of currentElements) { - // Handle iframe and frame traversal - if (element.tagName === 'IFRAME' || element.tagName === 'FRAME') { - try { - const frameDoc = element.contentDocument || element.contentWindow.document; - nextElements.push(...frameDoc.querySelectorAll(part)); - } catch (e) { - console.warn(`Cannot access ${element.tagName.toLowerCase()} content:`, e); - continue; - } - } else { - // Regular DOM elements - if (element.querySelectorAll) { - nextElements.push(...element.querySelectorAll(part)); - } - - // Shadow DOM elements - if (element.shadowRoot) { - nextElements.push(...element.shadowRoot.querySelectorAll(part)); - } - - // Check children's shadow roots - const children = Array.from(element.children || []); - for (const child of children) { - if (child.shadowRoot) { - nextElements.push(...child.shadowRoot.querySelectorAll(part)); - } - } + for (const element of currentElements) { + // Handle iframe and frame traversal + if (element.tagName === "IFRAME" || element.tagName === "FRAME") { + try { + const frameDoc = + element.contentDocument || element.contentWindow.document; + if (frameDoc) { + if (isXPathSelector(part)) { + nextElements.push(...evaluateXPathAll(frameDoc, part)); + } else { + nextElements.push( + ...Array.from(frameDoc.querySelectorAll(part)) + ); + } } - } + } catch (e) { + console.warn( + `Cannot access ${element.tagName.toLowerCase()} content:`, + e + ); + continue; + } + } else { + // Regular DOM elements + if (element.querySelectorAll) { + if (isXPathSelector(part)) { + nextElements.push(...evaluateXPathAll(element, part)); + } else { + nextElements.push( + ...Array.from(element.querySelectorAll(part)) + ); + } + } - currentElements = nextElements; + // Shadow DOM elements + if (element.shadowRoot) { + if (isXPathSelector(part)) { + nextElements.push( + ...evaluateXPathAll(element.shadowRoot, part) + ); + } else { + nextElements.push( + ...Array.from(element.shadowRoot.querySelectorAll(part)) + ); + } + } + + // Check children's shadow roots + const children = Array.from(element.children || []); + for (const child of children) { + if (child.shadowRoot) { + if (isXPathSelector(part)) { + nextElements.push( + ...evaluateXPathAll(child.shadowRoot, part) + ); + } else { + nextElements.push( + ...Array.from(child.shadowRoot.querySelectorAll(part)) + ); + } + } + } + } + } + + currentElements = nextElements; } return currentElements; }; // Enhanced value extraction with context awareness - function extractValue(element, attribute) { - if (!element) return null; - - // Get context-aware base URL - const baseURL = element.ownerDocument?.location?.href || window.location.origin; - - // Check shadow root first - if (element.shadowRoot) { - const shadowContent = element.shadowRoot.textContent; - if (shadowContent?.trim()) { - return shadowContent.trim(); - } + const extractValue = (element, attribute) => { + if (!element) return null; + + // Get context-aware base URL + const baseURL = + element.ownerDocument?.location?.href || window.location.origin; + + // Check shadow root first + if (element.shadowRoot) { + const shadowContent = element.shadowRoot.textContent; + if (shadowContent?.trim()) { + return shadowContent.trim(); } - - if (attribute === 'innerText') { - return element.innerText.trim(); - } else if (attribute === 'innerHTML') { - return element.innerHTML.trim(); - } else if (attribute === 'src' || attribute === 'href') { - if (attribute === 'href' && element.tagName !== 'A') { - const parentElement = element.parentElement; - if (parentElement && parentElement.tagName === 'A') { - const parentHref = parentElement.getAttribute('href'); - if (parentHref) { - try { - return new URL(parentHref, baseURL).href; - } catch (e) { - return parentHref; - } - } - } - } - - const attrValue = element.getAttribute(attribute); - const dataAttr = attrValue || element.getAttribute('data-' + attribute); - - if (!dataAttr || dataAttr.trim() === '') { - if (attribute === 'src') { - const style = window.getComputedStyle(element); - const bgImage = style.backgroundImage; - if (bgImage && bgImage !== 'none') { - const matches = bgImage.match(/url\(['"]?([^'")]+)['"]?\)/); - return matches ? new URL(matches[1], baseURL).href : null; - } - } - return null; - } - - try { - return new URL(dataAttr, baseURL).href; - } catch (e) { - console.warn('Error creating URL from', dataAttr, e); - return dataAttr; // Return the original value if URL construction fails - } - } - return element.getAttribute(attribute); } + if (attribute === "innerText") { + // First try standard innerText/textContent + let textContent = + element.innerText?.trim() || element.textContent?.trim(); + + // If empty, check for common data attributes that might contain the text + if (!textContent) { + const dataAttributes = [ + "data-600", + "data-text", + "data-label", + "data-value", + "data-content", + ]; + for (const attr of dataAttributes) { + const dataValue = element.getAttribute(attr); + if (dataValue && dataValue.trim()) { + textContent = dataValue.trim(); + break; + } + } + } + + return textContent || null; + } else if (attribute === "innerHTML") { + return element.innerHTML?.trim() || null; + } else if (attribute === "src" || attribute === "href") { + if (attribute === "href" && element.tagName !== "A") { + const parentElement = element.parentElement; + if (parentElement && parentElement.tagName === "A") { + const parentHref = parentElement.getAttribute("href"); + if (parentHref) { + try { + return new URL(parentHref, baseURL).href; + } catch (e) { + return parentHref; + } + } + } + } + + const attrValue = element.getAttribute(attribute); + const dataAttr = attrValue || element.getAttribute("data-" + attribute); + + if (!dataAttr || dataAttr.trim() === "") { + if (attribute === "src") { + const style = window.getComputedStyle(element); + const bgImage = style.backgroundImage; + if (bgImage && bgImage !== "none") { + const matches = bgImage.match(/url\(['"]?([^'")]+)['"]?\)/); + return matches ? new URL(matches[1], baseURL).href : null; + } + } + return null; + } + + try { + return new URL(dataAttr, baseURL).href; + } catch (e) { + console.warn("Error creating URL from", dataAttr, e); + return dataAttr; + } + } + return element.getAttribute(attribute); + }; + // Enhanced table ancestor finding with context support - function findTableAncestor(element) { + const findTableAncestor = (element) => { let currentElement = element; const MAX_DEPTH = 5; let depth = 0; - + while (currentElement && depth < MAX_DEPTH) { - // Handle shadow DOM - if (currentElement.getRootNode() instanceof ShadowRoot) { - currentElement = currentElement.getRootNode().host; - continue; + // Handle shadow DOM + if (currentElement.getRootNode() instanceof ShadowRoot) { + currentElement = currentElement.getRootNode().host; + continue; + } + + if (currentElement.tagName === "TD") { + return { type: "TD", element: currentElement }; + } else if (currentElement.tagName === "TR") { + return { type: "TR", element: currentElement }; + } + + // Handle iframe and frame crossing + if ( + currentElement.tagName === "IFRAME" || + currentElement.tagName === "FRAME" + ) { + try { + currentElement = currentElement.contentDocument.body; + } catch (e) { + return null; } - - if (currentElement.tagName === 'TD') { - return { type: 'TD', element: currentElement }; - } else if (currentElement.tagName === 'TR') { - return { type: 'TR', element: currentElement }; - } - - // Handle iframe and frame crossing - if (currentElement.tagName === 'IFRAME' || currentElement.tagName === 'FRAME') { - try { - currentElement = currentElement.contentDocument.body; - } catch (e) { - return null; - } - } else { - currentElement = currentElement.parentElement; - } - depth++; + } else { + currentElement = currentElement.parentElement; + } + depth++; } return null; - } + }; // Helper function to get cell index - function getCellIndex(td) { + const getCellIndex = (td) => { if (td.getRootNode() instanceof ShadowRoot) { - const shadowRoot = td.getRootNode(); - const allCells = Array.from(shadowRoot.querySelectorAll('td')); - return allCells.indexOf(td); + const shadowRoot = td.getRootNode(); + const allCells = Array.from(shadowRoot.querySelectorAll("td")); + return allCells.indexOf(td); } - + let index = 0; let sibling = td; - while (sibling = sibling.previousElementSibling) { - index++; + while ((sibling = sibling.previousElementSibling)) { + index++; } return index; - } + }; // Helper function to check for TH elements - function hasThElement(row, tableFields) { + const hasThElement = (row, tableFields) => { for (const [_, { selector }] of Object.entries(tableFields)) { - const element = queryElement(row, selector); - if (element) { - let current = element; - while (current && current !== row) { - if (current.getRootNode() instanceof ShadowRoot) { - current = current.getRootNode().host; - continue; - } - - if (current.tagName === 'TH') return true; - - if (current.tagName === 'IFRAME' || current.tagName === 'FRAME') { - try { - current = current.contentDocument.body; - } catch (e) { - break; - } - } else { - current = current.parentElement; - } + const element = queryElement(row, selector); + if (element) { + let current = element; + while (current && current !== row) { + if (current.getRootNode() instanceof ShadowRoot) { + current = current.getRootNode().host; + continue; + } + + if (current.tagName === "TH") return true; + + if (current.tagName === "IFRAME" || current.tagName === "FRAME") { + try { + current = current.contentDocument.body; + } catch (e) { + break; } + } else { + current = current.parentElement; + } } + } } return false; - } + }; // Helper function to filter rows - function filterRowsBasedOnTag(rows, tableFields) { - for (const row of rows) { - if (hasThElement(row, tableFields)) { - return rows; - } + const filterRowsBasedOnTag = (rows, tableFields) => { + for (const row of rows) { + if (hasThElement(row, tableFields)) { + return rows; } - // Include shadow DOM in TH search - return rows.filter(row => { - const directTH = row.getElementsByTagName('TH').length === 0; - const shadowTH = row.shadowRoot ? - row.shadowRoot.querySelector('th') === null : true; - return directTH && shadowTH; - }); - } + } + return rows.filter((row) => { + const directTH = row.getElementsByTagName("TH").length === 0; + const shadowTH = row.shadowRoot + ? row.shadowRoot.querySelector("th") === null + : true; + return directTH && shadowTH; + }); + }; // Class similarity comparison functions - function calculateClassSimilarity(classList1, classList2) { - const set1 = new Set(classList1); - const set2 = new Set(classList2); - const intersection = new Set([...set1].filter(x => set2.has(x))); - const union = new Set([...set1, ...set2]); - return intersection.size / union.size; - } + const calculateClassSimilarity = (classList1, classList2) => { + const set1 = new Set(classList1); + const set2 = new Set(classList2); + const intersection = new Set([...set1].filter((x) => set2.has(x))); + const union = new Set([...set1, ...set2]); + return intersection.size / union.size; + }; // Enhanced similar elements finding with context support - function findSimilarElements(baseElement, similarityThreshold = 0.7) { + const findSimilarElements = (baseElement, similarityThreshold = 0.7) => { const baseClasses = Array.from(baseElement.classList); if (baseClasses.length === 0) return []; const allElements = []; - + // Get elements from main document allElements.push(...document.getElementsByTagName(baseElement.tagName)); - + // Get elements from shadow DOM if (baseElement.getRootNode() instanceof ShadowRoot) { - const shadowHost = baseElement.getRootNode().host; - allElements.push(...shadowHost.getElementsByTagName(baseElement.tagName)); + const shadowHost = baseElement.getRootNode().host; + allElements.push( + ...shadowHost.getElementsByTagName(baseElement.tagName) + ); } - + // Get elements from iframes and frames const frames = [ - ...Array.from(document.getElementsByTagName('iframe')), - ...Array.from(document.getElementsByTagName('frame')) + ...Array.from(document.getElementsByTagName("iframe")), + ...Array.from(document.getElementsByTagName("frame")), ]; - + for (const frame of frames) { - try { - const frameDoc = frame.contentDocument || frame.contentWindow.document; - allElements.push(...frameDoc.getElementsByTagName(baseElement.tagName)); - } catch (e) { - console.warn(`Cannot access ${frame.tagName.toLowerCase()} content:`, e); - } + try { + const frameDoc = + frame.contentDocument || frame.contentWindow.document; + allElements.push( + ...frameDoc.getElementsByTagName(baseElement.tagName) + ); + } catch (e) { + console.warn( + `Cannot access ${frame.tagName.toLowerCase()} content:`, + e + ); + } } - return allElements.filter(element => { - if (element === baseElement) return false; - const similarity = calculateClassSimilarity( - baseClasses, - Array.from(element.classList) - ); - return similarity >= similarityThreshold; + return allElements.filter((element) => { + if (element === baseElement) return false; + const similarity = calculateClassSimilarity( + baseClasses, + Array.from(element.classList) + ); + return similarity >= similarityThreshold; }); - } + }; - function tryFallbackSelector(rootElement, originalSelector) { - let element = queryElement(rootElement, originalSelector); - - if (!element && originalSelector.includes('nth-child')) { - const match = originalSelector.match(/nth-child\((\d+)\)/); - if (match) { - const position = parseInt(match[1], 10); - - for (let i = position - 1; i >= 1; i--) { - const fallbackSelector = originalSelector.replace(/nth-child\(\d+\)/, `nth-child(${i})`); - element = queryElement(rootElement, fallbackSelector); - if (element) break; - } - - if (!element) { - const baseSelector = originalSelector.replace(/\:nth-child\(\d+\)/, ''); - element = queryElement(rootElement, baseSelector); - } - } + const tryFallbackSelector = (rootElement, originalSelector) => { + let element = queryElement(rootElement, originalSelector); + + if (!element && originalSelector.includes("nth-child")) { + const match = originalSelector.match(/nth-child\((\d+)\)/); + if (match) { + const position = parseInt(match[1], 10); + + for (let i = position - 1; i >= 1; i--) { + const fallbackSelector = originalSelector.replace( + /nth-child\(\d+\)/, + `nth-child(${i})` + ); + element = queryElement(rootElement, fallbackSelector); + if (element) break; + } + + if (!element) { + const baseSelector = originalSelector.replace( + /\:nth-child\(\d+\)/, + "" + ); + element = queryElement(rootElement, baseSelector); + } } - - return element; - } + } + + return element; + }; + + // Create indexed XPath for specific container instance + const createIndexedXPath = ( + childSelector, + listSelector, + containerIndex + ) => { + // Check if the child selector contains the list selector pattern + if (childSelector.includes(listSelector.replace("//", ""))) { + // Replace the list selector part with indexed version + const listPattern = listSelector.replace("//", ""); + const indexedListSelector = `(${listSelector})[${containerIndex}]`; + + const indexedSelector = childSelector.replace( + `//${listPattern}`, + indexedListSelector + ); + + return indexedSelector; + } else { + // If pattern doesn't match, create a more generic indexed selector + return `(${listSelector})[${containerIndex}]${childSelector.replace( + "//", + "/" + )}`; + } + }; + + // Main scraping logic with unified support for both CSS and XPath + console.log("🚀 Starting unified list data extraction"); + console.log("List Selector:", listSelector); + console.log("Fields:", fields); - // Main scraping logic with context support let containers = queryElementAll(document, listSelector); containers = Array.from(containers); - if (containers.length === 0) return []; + if (containers.length === 0) { + console.warn("❌ No containers found for listSelector:", listSelector); + return []; + } - if (limit > 1 && containers.length === 1) { + console.log(`📦 Found ${containers.length} list containers`); + + // For CSS selectors, try to find similar containers if needed + if ( + !isXPathSelector(listSelector) && + limit > 1 && + containers.length === 1 + ) { const baseContainer = containers[0]; const similarContainers = findSimilarElements(baseContainer); - + if (similarContainers.length > 0) { - const newContainers = similarContainers.filter(container => - !container.matches(listSelector) - ); - containers = [...containers, ...newContainers]; + const newContainers = similarContainers.filter( + (container) => !container.matches(listSelector) + ); + containers = [...containers, ...newContainers]; } } const containerFields = containers.map(() => ({ tableFields: {}, - nonTableFields: {} + nonTableFields: {}, })); - // Classify fields + // For XPath selectors, use the new approach + if (isXPathSelector(listSelector)) { + const extractedData = []; + const containersToProcess = Math.min(containers.length, limit); + + for ( + let containerIndex = 0; + containerIndex < containersToProcess; + containerIndex++ + ) { + const record = {}; + + for (const [label, field] of Object.entries(fields)) { + let element = null; + + if (isXPathSelector(field.selector)) { + // Create indexed absolute XPath + const indexedSelector = createIndexedXPath( + field.selector, + listSelector, + containerIndex + 1 + ); + element = evaluateXPath(document, indexedSelector, field.isShadow); + } else { + // Fallback for CSS selectors within XPath containers + const container = containers[containerIndex]; + element = queryElement(container, field.selector); + } + + if (element) { + const value = extractValue(element, field.attribute); + if (value !== null && value !== "") { + record[label] = value; + } else { + record[label] = ""; + } + } else { + record[label] = ""; + } + } + + if (Object.values(record).some((value) => value !== "")) { + extractedData.push(record); + } + } + + console.log(`📊 Total records extracted: ${extractedData.length}`); + return extractedData; + } + + // For CSS selectors, use the original table-aware approach containers.forEach((container, containerIndex) => { for (const [label, field] of Object.entries(fields)) { const sampleElement = queryElement(container, field.selector); - + if (sampleElement) { - const ancestor = findTableAncestor(sampleElement); - if (ancestor) { - containerFields[containerIndex].tableFields[label] = { - ...field, - tableContext: ancestor.type, - cellIndex: ancestor.type === 'TD' ? getCellIndex(ancestor.element) : -1 - }; - } else { - containerFields[containerIndex].nonTableFields[label] = field; - } - } else { + const ancestor = findTableAncestor(sampleElement); + if (ancestor) { + containerFields[containerIndex].tableFields[label] = { + ...field, + tableContext: ancestor.type, + cellIndex: + ancestor.type === "TD" ? getCellIndex(ancestor.element) : -1, + }; + } else { containerFields[containerIndex].nonTableFields[label] = field; + } + } else { + containerFields[containerIndex].nonTableFields[label] = field; } } }); @@ -798,149 +1276,192 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3, const nonTableData = []; // Process table data with support for iframes, frames, and shadow DOM - for (let containerIndex = 0; containerIndex < containers.length; containerIndex++) { + for ( + let containerIndex = 0; + containerIndex < containers.length; + containerIndex++ + ) { const container = containers[containerIndex]; const { tableFields } = containerFields[containerIndex]; if (Object.keys(tableFields).length > 0) { - const firstField = Object.values(tableFields)[0]; - const firstElement = queryElement(container, firstField.selector); - let tableContext = firstElement; - - // Find table context including iframe, frame and shadow DOM - while (tableContext && tableContext.tagName !== 'TABLE' && tableContext !== container) { - if (tableContext.getRootNode() instanceof ShadowRoot) { - tableContext = tableContext.getRootNode().host; - continue; - } - - if (tableContext.tagName === 'IFRAME' || tableContext.tagName === 'FRAME') { - try { - tableContext = tableContext.contentDocument.body; - } catch (e) { - break; + const firstField = Object.values(tableFields)[0]; + const firstElement = queryElement(container, firstField.selector); + let tableContext = firstElement; + + // Find table context including iframe, frame and shadow DOM + while ( + tableContext && + tableContext.tagName !== "TABLE" && + tableContext !== container + ) { + if (tableContext.getRootNode() instanceof ShadowRoot) { + tableContext = tableContext.getRootNode().host; + continue; + } + + if ( + tableContext.tagName === "IFRAME" || + tableContext.tagName === "FRAME" + ) { + try { + tableContext = tableContext.contentDocument.body; + } catch (e) { + break; + } + } else { + tableContext = tableContext.parentElement; + } + } + + if (tableContext) { + // Get rows from all contexts + const rows = []; + + // Get rows from regular DOM + rows.push(...tableContext.getElementsByTagName("TR")); + + // Get rows from shadow DOM + if (tableContext.shadowRoot) { + rows.push(...tableContext.shadowRoot.getElementsByTagName("TR")); + } + + // Get rows from iframes and frames + if ( + tableContext.tagName === "IFRAME" || + tableContext.tagName === "FRAME" + ) { + try { + const frameDoc = + tableContext.contentDocument || + tableContext.contentWindow.document; + rows.push(...frameDoc.getElementsByTagName("TR")); + } catch (e) { + console.warn( + `Cannot access ${tableContext.tagName.toLowerCase()} rows:`, + e + ); + } + } + + const processedRows = filterRowsBasedOnTag(rows, tableFields); + + for ( + let rowIndex = 0; + rowIndex < Math.min(processedRows.length, limit); + rowIndex++ + ) { + const record = {}; + const currentRow = processedRows[rowIndex]; + + for (const [ + label, + { selector, attribute, cellIndex }, + ] of Object.entries(tableFields)) { + let element = null; + + if (cellIndex >= 0) { + // Get TD element considering both contexts + let td = currentRow.children[cellIndex]; + + // Check shadow DOM for td + if (!td && currentRow.shadowRoot) { + const shadowCells = currentRow.shadowRoot.children; + if (shadowCells && shadowCells.length > cellIndex) { + td = shadowCells[cellIndex]; } + } + + if (td) { + element = queryElement(td, selector); + + if ( + !element && + selector + .split(/(?:>>|:>>)/) + .pop() + .includes("td:nth-child") + ) { + element = td; + } + + if (!element) { + const tagOnlySelector = selector.split(".")[0]; + element = queryElement(td, tagOnlySelector); + } + + if (!element) { + let currentElement = td; + while ( + currentElement && + currentElement.children.length > 0 + ) { + let foundContentChild = false; + for (const child of currentElement.children) { + if (extractValue(child, attribute)) { + currentElement = child; + foundContentChild = true; + break; + } + } + if (!foundContentChild) break; + } + element = currentElement; + } + } } else { - tableContext = tableContext.parentElement; - } - } - - if (tableContext) { - // Get rows from all contexts - const rows = []; - - // Get rows from regular DOM - rows.push(...tableContext.getElementsByTagName('TR')); - - // Get rows from shadow DOM - if (tableContext.shadowRoot) { - rows.push(...tableContext.shadowRoot.getElementsByTagName('TR')); - } - - // Get rows from iframes and frames - if (tableContext.tagName === 'IFRAME' || tableContext.tagName === 'FRAME') { - try { - const frameDoc = tableContext.contentDocument || tableContext.contentWindow.document; - rows.push(...frameDoc.getElementsByTagName('TR')); - } catch (e) { - console.warn(`Cannot access ${tableContext.tagName.toLowerCase()} rows:`, e); - } - } - - const processedRows = filterRowsBasedOnTag(rows, tableFields); - - for (let rowIndex = 0; rowIndex < Math.min(processedRows.length, limit); rowIndex++) { - const record = {}; - const currentRow = processedRows[rowIndex]; - - for (const [label, { selector, attribute, cellIndex }] of Object.entries(tableFields)) { - let element = null; - - if (cellIndex >= 0) { - // Get TD element considering both contexts - let td = currentRow.children[cellIndex]; - - // Check shadow DOM for td - if (!td && currentRow.shadowRoot) { - const shadowCells = currentRow.shadowRoot.children; - if (shadowCells && shadowCells.length > cellIndex) { - td = shadowCells[cellIndex]; - } - } - - if (td) { - element = queryElement(td, selector); - - if (!element && selector.split(/(?:>>|:>>)/).pop().includes('td:nth-child')) { - element = td; - } - - if (!element) { - const tagOnlySelector = selector.split('.')[0]; - element = queryElement(td, tagOnlySelector); - } - - if (!element) { - let currentElement = td; - while (currentElement && currentElement.children.length > 0) { - let foundContentChild = false; - for (const child of currentElement.children) { - if (extractValue(child, attribute)) { - currentElement = child; - foundContentChild = true; - break; - } - } - if (!foundContentChild) break; - } - element = currentElement; - } - } - } else { - element = queryElement(currentRow, selector); - } - - if (element) { - record[label] = extractValue(element, attribute); - } - } - - if (Object.keys(record).length > 0) { - tableData.push(record); - } + element = queryElement(currentRow, selector); } + + if (element) { + record[label] = extractValue(element, attribute); + } + } + + if (Object.keys(record).length > 0) { + tableData.push(record); + } } + } } } // Process non-table data with all contexts support - for (let containerIndex = 0; containerIndex < containers.length; containerIndex++) { + for ( + let containerIndex = 0; + containerIndex < containers.length; + containerIndex++ + ) { if (nonTableData.length >= limit) break; const container = containers[containerIndex]; const { nonTableFields } = containerFields[containerIndex]; if (Object.keys(nonTableFields).length > 0) { - const record = {}; + const record = {}; - for (const [label, { selector, attribute }] of Object.entries(nonTableFields)) { - // Get the last part of the selector after any context delimiter - const relativeSelector = selector.split(/(?:>>|:>>)/).slice(-1)[0]; - const element = tryFallbackSelector(container, relativeSelector); - - if (element) { - record[label] = extractValue(element, attribute); - } + for (const [label, { selector, attribute }] of Object.entries( + nonTableFields + )) { + // Get the last part of the selector after any context delimiter + const relativeSelector = selector.split(/(?:>>|:>>)/).slice(-1)[0]; + const element = tryFallbackSelector(container, relativeSelector); + + if (element) { + record[label] = extractValue(element, attribute); } - - if (Object.keys(record).length > 0) { - nonTableData.push(record); - } - } + } + + if (Object.keys(record).length > 0) { + nonTableData.push(record); + } + } } - + // Merge and limit the results const scrapedData = [...tableData, ...nonTableData]; + console.log(`📊 Total records extracted: ${scrapedData.length}`); + return scrapedData; }; diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts index 19b97707..5ed6fb12 100644 --- a/maxun-core/src/interpret.ts +++ b/maxun-core/src/interpret.ts @@ -47,6 +47,7 @@ interface InterpreterOptions { activeId: (id: number) => void, debugMessage: (msg: string) => void, setActionType: (type: string) => void, + incrementScrapeListIndex: () => void, }> } @@ -107,7 +108,9 @@ export default class Interpreter extends EventEmitter { PlaywrightBlocker.fromLists(fetch, ['https://easylist.to/easylist/easylist.txt']).then(blocker => { this.blocker = blocker; }).catch(err => { - this.log(`Failed to initialize ad-blocker:`, Level.ERROR); + this.log(`Failed to initialize ad-blocker: ${err.message}`, Level.ERROR); + // Continue without ad-blocker rather than crashing + this.blocker = null; }) } @@ -475,6 +478,11 @@ export default class Interpreter extends EventEmitter { } await this.ensureScriptsLoaded(page); + + if (this.options.debugChannel?.incrementScrapeListIndex) { + this.options.debugChannel.incrementScrapeListIndex(); + } + if (!config.pagination) { const scrapeResults: Record[] = await page.evaluate((cfg) => window.scrapeList(cfg), config); await this.options.serializableCallback(scrapeResults); @@ -516,11 +524,16 @@ export default class Interpreter extends EventEmitter { this.options.debugChannel.setActionType('script'); } - const AsyncFunction: FunctionConstructor = Object.getPrototypeOf( - async () => { }, - ).constructor; - const x = new AsyncFunction('page', 'log', code); - await x(page, this.log); + try { + const AsyncFunction: FunctionConstructor = Object.getPrototypeOf( + async () => { }, + ).constructor; + const x = new AsyncFunction('page', 'log', code); + await x(page, this.log); + } catch (error) { + this.log(`Script execution failed: ${error.message}`, Level.ERROR); + throw new Error(`Script execution error: ${error.message}`); + } }, flag: async () => new Promise((res) => { @@ -584,11 +597,18 @@ export default class Interpreter extends EventEmitter { try{ await executeAction(invokee, methodName, [step.args[0], { force: true }]); } catch (error) { - continue + this.log(`Click action failed: ${error.message}`, Level.WARN); + continue; } } } else { - await executeAction(invokee, methodName, step.args); + try { + await executeAction(invokee, methodName, step.args); + } catch (error) { + this.log(`Action ${methodName} failed: ${error.message}`, Level.ERROR); + // Continue with next action instead of crashing + continue; + } } } @@ -624,6 +644,8 @@ export default class Interpreter extends EventEmitter { }); allResults = allResults.concat(newResults); debugLog("Results collected:", allResults.length); + + await this.options.serializableCallback(allResults); }; const checkLimit = () => { @@ -797,10 +819,53 @@ export default class Interpreter extends EventEmitter { let retryCount = 0; let paginationSuccess = false; - // Capture basic content signature before click + // Capture basic content signature before click - with XPath support const captureContentSignature = async () => { - return await page.evaluate((selector) => { - const items = document.querySelectorAll(selector); + return await page.evaluate((listSelector) => { + const isXPath = (selector: string) => { + return selector.startsWith('//') || selector.startsWith('./') || selector.includes('::'); + }; + + let items: NodeListOf | Element[] = []; + + if (isXPath(listSelector)) { + try { + // Use XPath to find elements + const xpathResult = document.evaluate( + listSelector, + document, + null, + XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, + null + ); + + items = []; + for (let i = 0; i < xpathResult.snapshotLength; i++) { + const node = xpathResult.snapshotItem(i); + if (node && node.nodeType === Node.ELEMENT_NODE) { + items.push(node as Element); + } + } + } catch (xpathError) { + console.warn('XPath evaluation failed, trying CSS selector as fallback:', xpathError); + // Fallback to CSS selector + try { + items = document.querySelectorAll(listSelector); + } catch (cssError) { + console.warn('CSS selector fallback also failed:', cssError); + items = []; + } + } + } else { + try { + // Use CSS selector + items = document.querySelectorAll(listSelector); + } catch (cssError) { + console.warn('CSS selector failed:', cssError); + items = []; + } + } + return { url: window.location.href, itemCount: items.length, @@ -899,9 +964,9 @@ export default class Interpreter extends EventEmitter { if (checkLimit()) return allResults; let loadMoreCounter = 0; - let previousResultCount = allResults.length; - let noNewItemsCounter = 0; - const MAX_NO_NEW_ITEMS = 2; + // let previousResultCount = allResults.length; + // let noNewItemsCounter = 0; + // const MAX_NO_NEW_ITEMS = 2; while (true) { // Find working button with retry mechanism @@ -968,21 +1033,21 @@ export default class Interpreter extends EventEmitter { await scrapeCurrentPage(); - const currentResultCount = allResults.length; - const newItemsAdded = currentResultCount > previousResultCount; + // const currentResultCount = allResults.length; + // const newItemsAdded = currentResultCount > previousResultCount; - if (!newItemsAdded) { - noNewItemsCounter++; - debugLog(`No new items added after click (${noNewItemsCounter}/${MAX_NO_NEW_ITEMS})`); + // if (!newItemsAdded) { + // noNewItemsCounter++; + // debugLog(`No new items added after click (${noNewItemsCounter}/${MAX_NO_NEW_ITEMS})`); - if (noNewItemsCounter >= MAX_NO_NEW_ITEMS) { - debugLog(`Stopping after ${MAX_NO_NEW_ITEMS} clicks with no new items`); - return allResults; - } - } else { - noNewItemsCounter = 0; - previousResultCount = currentResultCount; - } + // if (noNewItemsCounter >= MAX_NO_NEW_ITEMS) { + // debugLog(`Stopping after ${MAX_NO_NEW_ITEMS} clicks with no new items`); + // return allResults; + // } + // } else { + // noNewItemsCounter = 0; + // previousResultCount = currentResultCount; + // } if (checkLimit()) return allResults; @@ -1081,7 +1146,16 @@ export default class Interpreter extends EventEmitter { }); /* eslint no-constant-condition: ["warn", { "checkLoops": false }] */ + let loopIterations = 0; + const MAX_LOOP_ITERATIONS = 1000; // Circuit breaker + while (true) { + // Circuit breaker to prevent infinite loops + if (++loopIterations > MAX_LOOP_ITERATIONS) { + this.log('Maximum loop iterations reached, terminating to prevent infinite loop', Level.ERROR); + return; + } + // Checks whether the page was closed from outside, // or the workflow execution has been stopped via `interpreter.stop()` if (p.isClosed() || !this.stopper) { @@ -1096,14 +1170,25 @@ export default class Interpreter extends EventEmitter { } let pageState = {}; - let getStateTest = "Hello"; try { + // Check if page is still valid before accessing state + if (p.isClosed()) { + this.log('Page was closed during execution', Level.WARN); + return; + } + pageState = await this.getState(p, workflowCopy, selectors); selectors = []; console.log("Empty selectors:", selectors) } catch (e: any) { - this.log('The browser has been closed.'); - return; + this.log(`Failed to get page state: ${e.message}`, Level.ERROR); + // If state access fails, attempt graceful recovery + if (p.isClosed()) { + this.log('Browser has been closed, terminating workflow', Level.WARN); + return; + } + // For other errors, continue with empty state to avoid complete failure + pageState = { url: p.url(), selectors: [], cookies: {} }; } if (this.options.debug) { @@ -1156,8 +1241,13 @@ export default class Interpreter extends EventEmitter { selectors.push(selector); } }); + + // Reset loop iteration counter on successful action + loopIterations = 0; } catch (e) { this.log(e, Level.ERROR); + // Don't crash on individual action failures - continue with next iteration + continue; } } else { //await this.disableAdBlocker(p); diff --git a/maxun-core/src/utils/concurrency.ts b/maxun-core/src/utils/concurrency.ts index 56c15fd9..41fc1047 100644 --- a/maxun-core/src/utils/concurrency.ts +++ b/maxun-core/src/utils/concurrency.ts @@ -41,6 +41,10 @@ export default class Concurrency { job().then(() => { // console.debug("Job finished, running the next waiting job..."); this.runNextJob(); + }).catch((error) => { + console.error(`Job failed with error: ${error.message}`); + // Continue processing other jobs even if one fails + this.runNextJob(); }); } else { // console.debug("No waiting job found!"); diff --git a/nginx.conf b/nginx.conf index c3651437..13878b3e 100644 --- a/nginx.conf +++ b/nginx.conf @@ -11,7 +11,7 @@ server { } # Proxy for backend - location ^/(auth|storage|record|workflow|robot|proxy|api-docs|api)(/|$) { + location ~ ^/(auth|storage|record|workflow|robot|proxy|api-docs|api|webhook)(/|$) { proxy_pass http://localhost:8080; # change as per your setup proxy_http_version 1.1; proxy_set_header Upgrade $http_upgrade; diff --git a/package.json b/package.json index 795795bd..8e79c106 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "maxun", - "version": "0.0.14", + "version": "0.0.21", "author": "Maxun", "license": "AGPL-3.0-or-later", "dependencies": { @@ -23,7 +23,7 @@ "@types/react-dom": "^18.0.1", "@types/uuid": "^8.3.4", "airtable": "^0.12.2", - "axios": "^0.26.0", + "axios": "^1.9.0", "bcrypt": "^5.1.1", "body-parser": "^1.20.3", "buffer": "^6.0.3", @@ -50,7 +50,7 @@ "lodash": "^4.17.21", "loglevel": "^1.8.0", "loglevel-plugin-remote": "^0.6.8", - "maxun-core": "^0.0.16", + "maxun-core": "^0.0.21", "minio": "^8.0.1", "moment-timezone": "^0.5.45", "node-cron": "^3.0.3", @@ -70,6 +70,7 @@ "react-router-dom": "^6.26.1", "react-simple-code-editor": "^0.11.2", "react-transition-group": "^4.4.2", + "rrweb-snapshot": "^2.0.0-alpha.4", "sequelize": "^6.37.3", "sequelize-typescript": "^2.1.6", "sharp": "^0.33.5", @@ -99,7 +100,8 @@ "migrate:undo:all": "sequelize-cli db:migrate:undo:all", "seed": "sequelize-cli db:seed:all", "seed:undo:all": "sequelize-cli db:seed:undo:all", - "migration:generate": "sequelize-cli migration:generate --name" + "migration:generate": "sequelize-cli migration:generate --name", + "mcp:build": "tsc --project server/tsconfig.mcp.json" }, "eslintConfig": { "extends": [ @@ -107,6 +109,7 @@ ] }, "devDependencies": { + "@modelcontextprotocol/sdk": "^1.12.1", "@types/connect-pg-simple": "^7.0.3", "@types/cookie-parser": "^1.4.7", "@types/express": "^4.17.13", @@ -115,6 +118,7 @@ "@types/loglevel": "^1.6.3", "@types/node": "22.7.9", "@types/node-cron": "^3.0.11", + "@types/node-fetch": "^2.6.12", "@types/prismjs": "^1.26.0", "@types/react-highlight": "^0.12.5", "@types/react-transition-group": "^4.4.4", @@ -129,6 +133,7 @@ "nodemon": "^2.0.15", "sequelize-cli": "^6.6.2", "ts-node": "^10.4.0", - "vite": "^5.4.10" + "vite": "^5.4.10", + "zod": "^3.25.62" } } diff --git a/public/locales/de.json b/public/locales/de.json index 4e26dc35..19b37c7b 100644 --- a/public/locales/de.json +++ b/public/locales/de.json @@ -58,10 +58,18 @@ "edit": "Bearbeiten", "delete": "Löschen", "duplicate": "Duplizieren", + "search": "Roboter suchen...", + "warning_modal": { + "title": "Aktiver Browser erkannt", + "message": "Es läuft bereits eine Browser-Aufzeichnungssitzung. Möchten Sie sie verwerfen und eine neue Aufzeichnung erstellen?", + "discard_and_create": "Verwerfen & Neu erstellen", + "cancel": "Abbrechen" + }, "notifications": { "delete_warning": "Der Roboter hat zugehörige Ausführungen. Löschen Sie zuerst die Ausführungen, um den Roboter zu löschen", "delete_success": "Roboter erfolgreich gelöscht", - "auth_success": "Roboter erfolgreich authentifiziert" + "auth_success": "Roboter erfolgreich authentifiziert", + "browser_limit_warning": "Remote-Browser sind derzeit ausgelastet. Bitte warten Sie einige Minuten und versuchen Sie es erneut" } }, "mainmenu": { @@ -140,11 +148,11 @@ "no_key_message": "Sie haben noch keinen API-Schlüssel generiert.", "generate_button": "API-Schlüssel generieren", "notifications": { - "fetch_error": "API-Schlüssel konnte nicht abgerufen werden - ${error}", + "fetch_error": "API-Schlüssel konnte nicht abgerufen werden - {{error}}", "generate_success": "API-Schlüssel erfolgreich generiert", - "generate_error": "API-Schlüssel konnte nicht generiert werden - ${error}", + "generate_error": "API-Schlüssel konnte nicht generiert werden - {{error}}", "delete_success": "API-Schlüssel erfolgreich gelöscht", - "delete_error": "API-Schlüssel konnte nicht gelöscht werden - ${error}", + "delete_error": "API-Schlüssel konnte nicht gelöscht werden - {{error}}", "copy_success": "API-Schlüssel erfolgreich kopiert" } }, @@ -170,6 +178,11 @@ "pagination": "Wählen Sie aus, wie der Roboter den Rest der Liste erfassen kann", "limit": "Wählen Sie die Anzahl der zu extrahierenden Elemente", "complete": "Erfassung ist abgeschlossen" + }, + "actions": { + "text": "Text erfassen", + "list": "Liste erfassen", + "screenshot": "Screenshot erfassen" } }, "right_panel": { @@ -182,6 +195,7 @@ "confirm_capture": "Erfassung bestätigen", "confirm_pagination": "Bestätigen", "confirm_limit": "Bestätigen", + "confirm_reset": "Bestätigen", "finish_capture": "Erfassung abschließen", "back": "Zurück", "reset": "Starten Sie die Aufnahme neu", @@ -291,7 +305,7 @@ "no": "Nein" }, "messages": { - "extracting": "Daten werden extrahiert...bitte warten Sie 10 Sekunden bis 1 Minute", + "extracting": "Daten werden extrahiert...bitte warten", "restart_required": "Bitte starten Sie die Interpretation nach der Aktualisierung der Aufnahme neu", "run_finished": "Durchlauf beendet", "run_failed": "Start fehlgeschlagen" @@ -300,11 +314,14 @@ "use_previous": "Möchten Sie Ihre vorherige Auswahl als Bedingung für diese Aktion verwenden?", "previous_action": "Ihre vorherige Aktion war: ", "element_text": "auf einem Element mit Text " + }, + "notifications": { + "reset_success": "Ausgabevorschau erfolgreich zurückgesetzt" } }, "recording_page": { "loader": { - "browser_startup": "Browser wird gestartet...Navigation zu {{url}}" + "browser_startup": "Browser wird gestartet...Festhalten" } }, "integration_settings": { @@ -473,7 +490,8 @@ "schedule_success": "Roboter {{name}} erfolgreich geplant", "schedule_failed": "Planen des Roboters {{name}} fehlgeschlagen", "abort_success": "Interpretation des Roboters {{name}} erfolgreich abgebrochen", - "abort_failed": "Abbrechen der Interpretation des Roboters {{name}} fehlgeschlagen" + "abort_failed": "Abbrechen der Interpretation des Roboters {{name}} fehlgeschlagen", + "abort_initiated": "Interpretation des Roboters {{name}} wird abgebrochen" }, "menu": { "recordings": "Roboter", @@ -598,6 +616,7 @@ "es": "Spanisch", "ja": "Japanisch", "zh": "Chinesisch", - "de": "Deutsch" + "de": "Deutsch", + "tr": "Türkisch" } } diff --git a/public/locales/en.json b/public/locales/en.json index e4f15c52..51aada6b 100644 --- a/public/locales/en.json +++ b/public/locales/en.json @@ -68,7 +68,8 @@ "notifications": { "delete_warning": "The robot has associated runs. First delete runs to delete the robot", "delete_success": "Robot deleted successfully", - "auth_success": "Robot successfully authenticated" + "auth_success": "Robot successfully authenticated", + "browser_limit_warning": "Remote browsers are currently busy. Please wait for a few minutes and try again" } }, "mainmenu":{ @@ -147,11 +148,11 @@ "no_key_message": "You haven't generated an API key yet.", "generate_button": "Generate API Key", "notifications": { - "fetch_error": "Failed to fetch API Key - ${error}", + "fetch_error": "Failed to fetch API Key - {{error}}", "generate_success": "Generated API Key successfully", - "generate_error": "Failed to generate API Key - ${error}", + "generate_error": "Failed to generate API Key - {{error}}", "delete_success": "API Key deleted successfully", - "delete_error": "Failed to delete API Key - ${error}", + "delete_error": "Failed to delete API Key - {{error}}", "copy_success": "Copied API Key successfully" } }, @@ -304,7 +305,7 @@ "no": "No" }, "messages": { - "extracting": "Extracting data...please wait for 10secs to 1min", + "extracting": "Extracting data...please wait", "restart_required": "Please restart the interpretation after updating the recording", "run_finished": "Run finished", "run_failed": "Run failed to start" @@ -313,11 +314,14 @@ "use_previous": "Do you want to use your previous selection as a condition for performing this action?", "previous_action": "Your previous action was: ", "element_text": "on an element with text " + }, + "notifications": { + "reset_success": "Output Preview reset successfully" } }, "recording_page": { "loader": { - "browser_startup": "Spinning up a browser...Navigating to {{url}}" + "browser_startup": "Spinning up a browser...Hold tight" } }, "integration_settings": { @@ -486,7 +490,8 @@ "schedule_success": "Robot {{name}} scheduled successfully", "schedule_failed": "Failed to schedule robot {{name}}", "abort_success": "Interpretation of robot {{name}} aborted successfully", - "abort_failed": "Failed to abort the interpretation of robot {{name}}" + "abort_failed": "Failed to abort the interpretation of robot {{name}}", + "abort_initiated": "Aborting the interpretation of robot {{name}}" }, "menu": { "recordings": "Robots", @@ -611,6 +616,7 @@ "es": "Spanish", "ja": "Japanese", "zh": "Chinese", - "de": "German" + "de": "German", + "tr": "Turkish" } -} \ No newline at end of file +} diff --git a/public/locales/es.json b/public/locales/es.json index 35b1fba1..92bf7fe3 100644 --- a/public/locales/es.json +++ b/public/locales/es.json @@ -59,10 +59,17 @@ "delete": "Eliminar", "duplicate": "Duplicar", "search": "Buscar robots...", + "warning_modal": { + "title": "Navegador Activo Detectado", + "message": "Ya hay una sesión de grabación del navegador en ejecución. ¿Le gustaría descartarla y crear una nueva grabación?", + "discard_and_create": "Descartar y Crear Nueva", + "cancel": "Cancelar" + }, "notifications": { "delete_warning": "El robot tiene ejecuciones asociadas. Primero elimine las ejecuciones para eliminar el robot", "delete_success": "Robot eliminado exitosamente", - "auth_success": "Robot autenticado exitosamente" + "auth_success": "Robot autenticado exitosamente", + "browser_limit_warning": "Los navegadores remotos están ocupados actualmente. Por favor, espere unos minutos e inténtelo de nuevo" } }, "mainmenu": { @@ -141,11 +148,11 @@ "no_key_message": "Aún no has generado una clave API.", "generate_button": "Generar Clave API", "notifications": { - "fetch_error": "Error al obtener la clave API - ${error}", + "fetch_error": "Error al obtener la clave API - {{error}}", "generate_success": "Clave API generada con éxito", - "generate_error": "Error al generar la clave API - ${error}", + "generate_error": "Error al generar la clave API - {{error}}", "delete_success": "Clave API eliminada con éxito", - "delete_error": "Error al eliminar la clave API - ${error}", + "delete_error": "Error al eliminar la clave API - {{error}}", "copy_success": "Clave API copiada con éxito" } }, @@ -171,6 +178,11 @@ "pagination": "Seleccione cómo puede el robot capturar el resto de la lista", "limit": "Elija el número de elementos a extraer", "complete": "Captura completada" + }, + "actions": { + "text": "Capturar Texto", + "list": "Capturar Lista", + "screenshot": "Capturar Pantalla" } }, "right_panel": { @@ -183,6 +195,7 @@ "confirm_capture": "Confirmar Captura", "confirm_pagination": "Confirmar", "confirm_limit": "Confirmar", + "confirm_reset": "Confirmar", "finish_capture": "Finalizar Captura", "back": "Atrás", "reset": "Reiniciar", @@ -289,7 +302,7 @@ "no": "No" }, "messages": { - "extracting": "Extrayendo datos...espere de 10 segundos a 1 minuto", + "extracting": "Extrayendo datos...espere", "restart_required": "Por favor, reinicie la interpretación después de actualizar la grabación", "run_finished": "Ejecución finalizada", "run_failed": "Error al iniciar la ejecución" @@ -303,9 +316,30 @@ "reset_success": "Vista previa restablecida correctamente" } }, + "interpretation_log": { + "titles": { + "output_preview": "Vista Previa de Datos de Salida", + "screenshot": "Captura de pantalla" + }, + "messages": { + "additional_rows": "Se extraerán filas adicionales de datos una vez que termine la grabación.", + "successful_training": "¡Has entrenado exitosamente al robot para realizar acciones! Haz clic en el botón de abajo para obtener una vista previa de los datos que tu robot extraerá.", + "no_selection": "Parece que aún no has seleccionado nada para extraer. Una vez que lo hagas, el robot mostrará una vista previa de tus selecciones aquí." + }, + "data_sections": { + "binary_received": "---------- Datos binarios de salida recibidos ----------", + "serializable_received": "---------- Datos serializables de salida recibidos ----------", + "mimetype": "tipo MIME: ", + "image_below": "La imagen se muestra a continuación:", + "separator": "--------------------------------------------------" + }, + "notifications": { + "reset_success": "Vista previa restablecida correctamente" + } + }, "recording_page": { "loader": { - "browser_startup": "Iniciando el navegador...Navegando a {{url}}" + "browser_startup": "Iniciando el navegador...Mantener apretado" } }, "integration_settings": { @@ -474,7 +508,8 @@ "schedule_success": "Robot {{name}} programado exitosamente", "schedule_failed": "Error al programar el robot {{name}}", "abort_success": "Interpretación del robot {{name}} abortada exitosamente", - "abort_failed": "Error al abortar la interpretación del robot {{name}}" + "abort_failed": "Error al abortar la interpretación del robot {{name}}", + "abort_initiated": "Cancelando la interpretación del robot {{name}}" }, "menu": { "recordings": "Robots", @@ -599,6 +634,7 @@ "es": "Español", "ja": "Japonés", "zh": "Chino", - "de": "Alemán" + "de": "Alemán", + "tr": "Turco" } -} \ No newline at end of file +} diff --git a/public/locales/ja.json b/public/locales/ja.json index b9e1174a..421bdecf 100644 --- a/public/locales/ja.json +++ b/public/locales/ja.json @@ -59,10 +59,17 @@ "delete": "削除", "duplicate": "複製", "search": "ロボットを検索...", + "warning_modal": { + "title": "アクティブなブラウザが検出されました", + "message": "既にブラウザ録画セッションが実行されています。破棄して新しい録画を作成しますか?", + "discard_and_create": "破棄して新規作成", + "cancel": "キャンセル" + }, "notifications": { "delete_warning": "ロボットには関連する実行があります。ロボットを削除するには、まず実行を削除してください", "delete_success": "ロボットが正常に削除されました", - "auth_success": "ロボットの認証に成功しました" + "auth_success": "ロボットの認証に成功しました", + "browser_limit_warning": "リモートブラウザは現在ビジー状態です。数分お待ちいただいてから再度お試しください" } }, "mainmenu": { @@ -141,11 +148,11 @@ "no_key_message": "APIキーはまだ生成されていません。", "generate_button": "APIキーを生成", "notifications": { - "fetch_error": "APIキーの取得に失敗しました - ${error}", + "fetch_error": "APIキーの取得に失敗しました - {{error}}", "generate_success": "APIキーの生成に成功しました", - "generate_error": "APIキーの生成に失敗しました - ${error}", + "generate_error": "APIキーの生成に失敗しました - {{error}}", "delete_success": "APIキーの削除に成功しました", - "delete_error": "APIキーの削除に失敗しました - ${error}", + "delete_error": "APIキーの削除に失敗しました - {{error}}", "copy_success": "APIキーのコピーに成功しました" } }, @@ -171,6 +178,11 @@ "pagination": "ロボットがリストの残りをどのように取得するか選択してください", "limit": "抽出するアイテムの数を選択してください", "complete": "取得が完了しました" + }, + "actions": { + "text": "テキストを取得", + "list": "リストを取得", + "screenshot": "スクリーンショットを取得" } }, "right_panel": { @@ -183,6 +195,7 @@ "confirm_capture": "取得を確認", "confirm_pagination": "確認", "confirm_limit": "確認", + "confirm_reset": "確認", "finish_capture": "取得を完了", "back": "戻る", "reset": "リセット", @@ -292,7 +305,7 @@ "no": "いいえ" }, "messages": { - "extracting": "データ抽出中...10秒から1分ほどお待ちください", + "extracting": "データを抽出しています...お待ちください", "restart_required": "録画を更新した後、解釈を再起動してください", "run_finished": "実行完了", "run_failed": "実行の開始に失敗しました" @@ -301,11 +314,14 @@ "use_previous": "この操作の条件として前回の選択を使用しますか?", "previous_action": "前回の操作: ", "element_text": "テキスト要素 " + }, + "notifications": { + "reset_success": "出力プレビューが正常にリセットされました" } }, "recording_page": { "loader": { - "browser_startup": "ブラウザを起動中...{{url}}に移動中" + "browser_startup": "ブラウザを起動中...しっかり握って" } }, "integration_settings": { @@ -474,7 +490,8 @@ "schedule_success": "ロボット{{name}}のスケジュールが正常に設定されました", "schedule_failed": "ロボット{{name}}のスケジュール設定に失敗しました", "abort_success": "ロボット{{name}}の解釈を中止しました", - "abort_failed": "ロボット{{name}}の解釈中止に失敗しました" + "abort_failed": "ロボット{{name}}の解釈中止に失敗しました", + "abort_initiated": "ロボット {{name}} の解釈を中止しています" }, "menu": { "recordings": "ロボット", @@ -599,6 +616,7 @@ "es": "スペイン語", "ja": "日本語", "zh": "中国語", - "de": "ドイツ語" + "de": "ドイツ語", + "tr": "トルコ語" } } diff --git a/public/locales/tr.json b/public/locales/tr.json new file mode 100644 index 00000000..b4118ed0 --- /dev/null +++ b/public/locales/tr.json @@ -0,0 +1,622 @@ +{ + "login": { + "title": "Tekrar Hoş Geldiniz!", + "email": "İş E‑postası Girin", + "password": "Şifre", + "button": "Giriş Yap", + "loading": "Yükleniyor", + "register_prompt": "Hesabınız yok mu?", + "register_link": "Kaydol", + "welcome_notification": "Maxun’a Hoş Geldiniz!", + "validation": { + "required_fields": "E‑posta ve şifre zorunludur", + "password_length": "Şifre en az 6 karakter olmalıdır" + }, + "error": { + "user_not_found": "Kullanıcı mevcut değil", + "invalid_credentials": "Geçersiz e‑posta veya şifre", + "server_error": "Giriş başarısız. Lütfen daha sonra tekrar deneyin", + "generic": "Bir hata oluştu. Lütfen tekrar deneyin" + } + }, + "register": { + "title": "Hesap Oluştur", + "email": "İş E‑postası Girin", + "password": "Şifre", + "button": "Kaydol", + "loading": "Yükleniyor", + "register_prompt": "Zaten hesabınız var mı?", + "login_link": "Giriş Yap", + "welcome_notification": "Maxun’a Hoş Geldiniz!", + "validation": { + "email_required": "E‑posta zorunludur", + "password_requirements": "Şifre en az 6 karakter olmalıdır" + }, + "error": { + "user_exists": "Bu e‑posta ile kullanıcı zaten mevcut", + "creation_failed": "Hesap oluşturulamadı", + "server_error": "Sunucu hatası oluştu", + "generic": "Kayıt başarısız. Lütfen tekrar deneyin" + } + }, + "recordingtable": { + "run": "Çalıştır", + "name": "Ad", + "schedule": "Zamanlama", + "integrate": "Entegre Et", + "settings": "Ayarlar", + "options": "Seçenekler", + "heading": "Robotlarım", + "new": "Robot Oluştur", + "modal": { + "title": "URL’yi Girin", + "login_title": "Bu web sitesine giriş gerekiyor mu?", + "label": "URL", + "button": "Kaydı Başlat" + }, + "warning_modal": { + "title": "Aktif Tarayıcı Tespit Edildi", + "message": "Hâlihazırda çalışan bir tarayıcı kayıt oturumu var. İptal edip yeni bir kayıt başlatmak ister misiniz?", + "discard_and_create": "İptal Et ve Yenisi Oluştur", + "cancel": "İptal" + }, + "retrain": "Yeniden Eğit", + "edit": "Düzenle", + "delete": "Sil", + "duplicate": "Çoğalt", + "search": "Robot Ara...", + "notifications": { + "delete_warning": "Robota bağlı çalıştırmalar var. Robotu silmeden önce çalıştırmaları silin", + "delete_success": "Robot başarıyla silindi", + "auth_success": "Robot başarıyla kimlik doğrulandı", + "browser_limit_warning": "Uzak tarayıcılar şu anda meşgul. Lütfen birkaç dakika bekleyip tekrar deneyin" + } + }, + "mainmenu": { + "recordings": "Robotlar", + "runs": "Çalıştırmalar", + "proxy": "Proxy", + "apikey": "API Anahtarı", + "feedback": "Maxun Cloud’a Katıl", + "apidocs": "Web Siteyi API’ye Dönüştür" + }, + "runstable": { + "runs": "Tüm Çalıştırmalar", + "runStatus": "Durum", + "runName": "Ad", + "startedAt": "Başlama", + "finishedAt": "Bitiş", + "delete": "Sil", + "settings": "Ayarlar", + "search": "Çalıştırma Ara...", + "sort_tooltip": "Sıralamak için tıkla", + "notifications": { + "no_runs": "Çalıştırma bulunamadı. Lütfen tekrar deneyin.", + "delete_success": "Çalıştırma başarıyla silindi" + } + }, + "proxy": { + "title": "Proxy Yapılandırması", + "tab_standard": "Standart Proxy", + "tab_rotation": "Otomatik Proxy Döndürme", + "server_url": "Proxy Sunucu URL’si", + "server_url_helper": "Tüm robotlar için kullanılacak proxy. HTTP ve SOCKS desteklenir. Örnek http://myproxy.com:3128 veya socks5://myproxy.com:3128. Kısa biçim myproxy.com:3128 HTTP proxy kabul edilir.", + "requires_auth": "Kimlik Doğrulama Gerekli mi?", + "username": "Kullanıcı Adı", + "password": "Şifre", + "add_proxy": "Proxy Ekle", + "test_proxy": "Proxy’yi Test Et", + "remove_proxy": "Proxy’yi Kaldır", + "table": { + "proxy_url": "Proxy URL", + "requires_auth": "Kimlik Doğrulama Gerekli" + }, + "coming_soon": "Yakında — Açık Kaynak (Temel Döndürme) & Cloud (Gelişmiş Döndürme). Altyapı yönetmek istemiyorsanız, erken erişim için cloud bekleme listemize katılın.", + "join_waitlist": "Maxun Cloud Bekleme Listesine Katıl", + "alert": { + "title": "Proxy’niz kullanıcı adı ve şifre gerektiriyorsa, bunları her zaman URL’den ayrı girin.", + "right_way": "Doğru yol", + "wrong_way": "Yanlış yol", + "proxy_url": "Proxy URL:", + "username": "Kullanıcı Adı:", + "password": "Şifre:" + }, + "notifications": { + "config_success": "Proxy yapılandırması başarıyla gönderildi", + "config_error": "Proxy yapılandırması gönderilemedi. Tekrar deneyin.", + "test_success": "Proxy yapılandırması çalışıyor", + "test_error": "Proxy testi başarısız. Tekrar deneyin.", + "fetch_success": "Proxy yapılandırması alındı", + "remove_success": "Proxy yapılandırması kaldırıldı", + "remove_error": "Proxy kaldırma başarısız. Tekrar deneyin." + } + }, + "apikey": { + "title": "API Anahtarını Yönet", + "default_name": "Maxun API Anahtarı", + "table": { + "name": "API Anahtar Adı", + "key": "API Anahtarı", + "actions": "Eylemler" + }, + "actions": { + "copy": "Kopyala", + "show": "Göster", + "hide": "Gizle", + "delete": "Sil" + }, + "no_key_message": "Henüz bir API anahtarı oluşturmadınız.", + "generate_button": "API Anahtarı Oluştur", + "notifications": { + "fetch_error": "API Anahtarı alınamadı - {{error}}", + "generate_success": "API anahtarı oluşturuldu", + "generate_error": "API anahtarı oluşturulamadı - {{error}}", + "delete_success": "API anahtarı silindi", + "delete_error": "API anahtarı silinemedi - {{error}}", + "copy_success": "API anahtarı kopyalandı" + } + }, + "action_description": { + "text": { + "title": "Metin Yakala", + "description": "Çıkarmak istediğiniz metinlerin üzerine gelin ve tıklayarak seçin" + }, + "screenshot": { + "title": "Ekran Görüntüsü Yakala", + "description": "Sayfanın tamamının veya bir bölümünün ekran görüntüsünü alın" + }, + "list": { + "title": "Liste Yakala", + "description": "Çıkarmak istediğiniz listenin üzerine gelin. Seçtikten sonra, listenin içindeki tüm metinleri seçebilirsiniz." + }, + "default": { + "title": "Hangi verileri çıkarmak istiyorsunuz?", + "description": "Bir robot bir veya birden fazla işlem gerçekleştirebilir. Aşağıdaki seçeneklerden seçim yapın." + }, + "list_stages": { + "initial": "Listeyi ve içindeki metinleri seçin", + "pagination": "Robotun listenin geri kalanını nasıl yakalayacağını seçin", + "limit": "Çıkarılacak öğe sayısını seçin", + "complete": "Yakalama tamamlandı" + }, + "actions": { + "text": "Metin Yakala", + "list": "Liste Yakala", + "screenshot": "Ekran Görüntüsü Yakala" + } + }, + "right_panel": { + "buttons": { + "capture_list": "Liste Yakala", + "capture_text": "Metin Yakala", + "capture_screenshot": "Ekran Görüntüsü Yakala", + "confirm": "Onayla", + "discard": "İptal", + "confirm_capture": "Yakalamayı Onayla", + "confirm_pagination": "Onayla", + "confirm_limit": "Onayla", + "confirm_reset": "Onayla", + "finish_capture": "Yakalamayı Bitir", + "back": "Geri", + "reset": "Kaydı Yeniden Başlat", + "finish": "Bitir", + "cancel": "İptal", + "delete": "Sil" + }, + "screenshot": { + "capture_fullpage": "Tam Sayfa Yakala", + "capture_visible": "Görünen Kısmı Yakala", + "display_fullpage": "Tam Sayfa Görüntü Al", + "display_visible": "Görünen Kısmın Görüntüsünü Al" + }, + "pagination": { + "title": "Sayfada sonraki liste öğesini nasıl bulalım?", + "click_next": "Sonraki sayfaya gitmek için ‘sonraki’yi tıkla", + "click_load_more": "Daha fazla yüklemek için ‘daha fazla yükle’yi tıkla", + "scroll_down": "Daha fazla öğe için aşağı kaydır", + "scroll_up": "Daha fazla öğe için yukarı kaydır", + "none": "Yüklenecek başka öğe yok" + }, + "limit": { + "title": "En fazla kaç satır çıkarmak istiyorsunuz?", + "custom": "Özel", + "enter_number": "Sayı gir" + }, + "fields": { + "label": "Etiket", + "data": "Veri", + "field_label": "Alan Etiketi", + "field_data": "Alan Verisi" + }, + "messages": { + "list_selected": "Liste başarıyla seçildi", + "list_empty": "Liste seçildi. Lütfen listenin içindeki alanları seçin." + }, + "errors": { + "select_pagination": "Lütfen bir sayfalama tipi seçin.", + "select_pagination_element": "Lütfen önce sayfalama öğesini seçin.", + "select_limit": "Lütfen bir limit seçin veya özel limit girin.", + "invalid_limit": "Geçerli bir limit girin.", + "confirm_text_fields": "Lütfen tüm metin alanlarını onaylayın", + "unable_create_settings": "Liste ayarları oluşturulamadı. Bir alan tanımladığınızdan emin olun.", + "capture_text_discarded": "Metin Yakalama İptal Edildi", + "capture_list_discarded": "Liste Yakalama İptal Edildi", + "label_required": "Etiket boş olamaz" + } + }, + "save_recording": { + "title": "Robotu Kaydet", + "robot_name": "Robot Adı", + "buttons": { + "save": "Kaydet", + "confirm": "Onayla" + }, + "notifications": { + "save_success": "Robot kaydedildi", + "retrain_success": "Robot yeniden eğitildi", + "save_error": "Robot kaydedilirken hata" + }, + "errors": { + "user_not_logged": "Kullanıcı girişi yok. Kaydedilemedi.", + "exists_warning": "Bu isimde robot zaten var; üzerine yazmayı onaylayın." + }, + "tooltips": { + "saving": "Akış optimize ediliyor ve kaydediliyor" + } + }, + "browser_recording": { + "modal": { + "confirm_discard": "Kaydı iptal etmek istediğinize emin misiniz?", + "confirm_reset": "Kaydı yeniden başlatmak istediğinize emin misiniz?", + "reset_warning": "Bu işlem, mevcut oturumdaki tüm yakalamaları temizler ve aynı site için kaydı yeniden başlatır." + }, + "notifications": { + "terminated": "Kayıt sonlandırıldı", + "environment_reset": "Tarayıcı ortamı sıfırlandı", + "reset_successful": "Yakalamalar sıfırlandı ve başlangıç durumuna dönüldü" + } + }, + "interpretation_log": { + "titles": { + "output_preview": "Çıktı Verisi Önizlemesi", + "screenshot": "Ekran Görüntüsü" + }, + "messages": { + "additional_rows": "Kaydı bitirdiğinizde ek satırlar çıkarılacak.", + "successful_training": "Robotu başarıyla eğittiniz! Çıkaracağı verilerin önizlemesi için aşağıdaki butona tıklayın.", + "no_selection": "Henüz seçim yapmadınız. Seçim yaptığınızda önizleme burada görünecek." + }, + "data_sections": { + "binary_received": "---------- İkili çıktı verisi alındı ----------", + "serializable_received": "---------- Serileştirilebilir çıktı verisi alındı ----------", + "mimetype": "mimetype: ", + "image_below": "Görüntü aşağıda:", + "separator": "--------------------------------------------------" + }, + "notifications": { + "reset_success": "Önizleme sıfırlandı" + } + }, + "interpretation_buttons": { + "buttons": { + "preview": "Çıktı Önizle", + "reset": "Sıfırla", + "yes": "Evet", + "no": "Hayır" + }, + "messages": { + "extracting": "Veri çıkarılıyor... lütfen bekleyin", + "restart_required": "Kaydı güncelledikten sonra yorumlamayı yeniden başlatın", + "run_finished": "Çalıştırma tamamlandı", + "run_failed": "Çalıştırma başlatılamadı" + }, + "modal": { + "use_previous": "Bu işlem için önceki seçiminizi koşul olarak kullanmak ister misiniz?", + "previous_action": "Önceki işleminiz:", + "element_text": " metnine sahip öğe" + }, + "notifications": { + "reset_success": "Önizleme başarıyla sıfırlandı" + } + }, + "recording_page": { + "loader": { + "browser_startup": "Tarayıcı başlatılıyor... Lütfen bekleyin" + } + }, + "integration_settings": { + "title": "Entegrasyon Ayarları", + "descriptions": { + "authenticated_as": "Kimlik doğrulandı: {{email}}" + }, + "buttons": { + "submit": "Gönder", + "remove_integration": "Entegrasyonu Kaldır" + }, + "google": { + "title": "Google Sheet ile Entegrasyon", + "descriptions": { + "sync_info": "Bu seçenek etkinse robot başarılı olduğunda veriler Google Sheet’e eklenir.", + "authenticated_as": "Kimlik doğrulandı: {{email}}" + }, + "alerts": { + "success": { + "title": "Google Sheet entegrasyonu başarılı", + "content": "Robot her başarılı çalıştırmada veriyi {{sheetName}} sayfanıza ekler. Kontrol etmek için", + "here": "buraya", + "note": "Not:", + "sync_limitation": "Entegrasyon öncesi veriler senkronize edilmez." + } + }, + "buttons": { + "authenticate": "Google ile Giriş Yap", + "fetch_sheets": "E‑Tabloları Getir", + "remove_integration": "Entegrasyonu Kaldır", + "submit": "Gönder" + }, + "fields": { + "select_sheet": "Google Sheet Seç", + "selected_sheet": "Seçilen Sheet: {{name}} (ID: {{id}})" + }, + "errors": { + "auth_error": "Google kimlik doğrulama hatası", + "fetch_error": "E‑tablo alma hatası: {{message}}", + "update_error": "Sheet ID güncelleme hatası: {{message}}", + "remove_error": "Google Sheets entegrasyon kaldırma hatası: {{message}}" + }, + "notifications": { + "sheet_selected": "Google Sheet seçildi", + "integration_removed": "Google Sheets entegrasyonu kaldırıldı" + } + }, + "airtable": { + "title": "Airtable ile Entegrasyon", + "descriptions": { + "sync_info": "Bu seçenek etkinse robot başarılı olduğunda veriler Airtable Base’e eklenir.", + "authenticated_as": "Airtable kimlik doğrulandı. Base ve tablo seçebilirsiniz." + }, + "alerts": { + "success": { + "title": "Airtable entegrasyonu başarılı", + "content": "Robot her başarılı çalıştırmada veriyi {{baseName}} > {{tableName}}’e ekler. Kontrol etmek için", + "here": "buraya", + "note": "Not:", + "sync_limitation": "Yalnızca entegrasyon sonrası veriler senkronize edilir." + } + }, + "buttons": { + "authenticate": "Airtable’a Bağlan", + "fetch_bases": "Base’leri Getir", + "fetch_tables": "Tabloları Getir", + "remove_integration": "Entegrasyonu Kaldır", + "submit": "Base ve Tablo Seç" + }, + "fields": { + "select_base": "Airtable Base Seç", + "select_table": "Airtable Tablo Seç", + "selected_base": "Seçilen Base: {{name}}", + "selected_table": "Seçilen Tablo: {{name}}" + }, + "errors": { + "auth_error": "Airtable kimlik doğrulama hatası", + "fetch_error": "Base alma hatası: {{message}}", + "fetch_tables_error": "Tablo alma hatası: {{message}}", + "update_error": "Base güncelleme hatası: {{message}}", + "remove_error": "Airtable entegrasyon kaldırma hatası: {{message}}" + }, + "notifications": { + "base_selected": "Base seçildi", + "table_selected": "Tablo seçildi", + "integration_removed": "Airtable entegrasyonu kaldırıldı" + } + } + }, + "robot_duplication": { + "title": "Robotu Çoğalt", + "descriptions": { + "purpose": "Aynı yapıya sahip sayfalarda veri toplamak için kullanılır.", + "example": "Örnek: {{url1}} için robot oluşturduysanız, benzer {{url2}} sayfaları için çoğaltabilirsiniz.", + "warning": "⚠️ Yeni sayfanın yapısının aynı olduğundan emin olun." + }, + "fields": { + "target_url": "Robot Hedef URL" + }, + "buttons": { + "duplicate": "Robotu Çoğalt", + "cancel": "İptal" + }, + "notifications": { + "robot_not_found": "Robot bulunamadı. Tekrar deneyin.", + "url_required": "Hedef URL gerekli.", + "duplicate_success": "Robot çoğaltıldı", + "duplicate_error": "Hedef URL güncellenemedi. Tekrar deneyin.", + "unknown_error": "Hedef URL güncellenirken hata oluştu" + } + }, + "robot_settings": { + "title": "Robot Ayarları", + "target_url": "Robot Hedef URL", + "robot_id": "Robot ID", + "robot_limit": "Robot Limiti", + "created_by_user": "Oluşturan", + "created_at": "Oluşturulma", + "errors": { + "robot_not_found": "Robot bulunamadı. Tekrar deneyin." + } + }, + "robot_edit": { + "title": "Robotu Düzenle", + "change_name": "Robot Adı", + "robot_limit": "Robot Limiti", + "save": "Değişiklikleri Kaydet", + "cancel": "İptal", + "notifications": { + "update_success": "Robot güncellendi", + "update_failed": "Robot güncellenemedi. Tekrar deneyin.", + "update_error": "Güncelleme sırasında hata" + } + }, + "schedule_settings": { + "title": "Zamanlama Ayarları", + "run_every": "Çalıştırma aralığı", + "start_from": "Başlangıç", + "on_day": "Gününde", + "at_around": "Saat civarı", + "timezone": "Zaman Dilimi", + "buttons": { + "delete_schedule": "Zamanlamayı Sil", + "save_schedule": "Zamanlamayı Kaydet", + "cancel": "İptal" + }, + "labels": { + "in_between": "Arasında", + "run_once_every": "Her", + "start_from_label": "Başlangıç", + "on_day_of_month": "Ayın Günü", + "on_day": { + "st": ".", + "nd": ".", + "rd": ".", + "th": "." + } + } + }, + "main_page": { + "notifications": { + "interpretation_success": "Robot {{name}} yorumlandı", + "interpretation_failed": "Robot {{name}} yorumlanamadı", + "run_started": "Robot çalıştırılıyor: {{name}}", + "run_start_failed": "Robot çalıştırılamadı: {{name}}", + "schedule_success": "Robot {{name}} zamanlandı", + "schedule_failed": "Robot {{name}} zamanlanamadı", + "abort_success": "Robot {{name}} yorumlaması iptal edildi", + "abort_failed": "Robot {{name}} yorumlaması iptal edilemedi", + "abort_initiated": "Robot {{name}} yorumu iptal ediliyor" + }, + "menu": { + "recordings": "Robotlar", + "runs": "Çalıştırmalar", + "proxy": "Proxy", + "apikey": "API Anahtarı" + } + }, + "browser_window": { + "attribute_modal": { + "title": "Öznitelik Seç", + "notifications": { + "list_select_success": "Liste seçildi. Çıkarılacak verileri seçin.", + "pagination_select_success": "Sayfalama öğesi seçildi" + } + }, + "attribute_options": { + "anchor": { + "text": "Metin: {{text}}", + "url": "URL: {{url}}" + }, + "image": { + "alt_text": "Alternatif Metin: {{altText}}", + "image_url": "Görsel URL: {{imageUrl}}" + }, + "default": { + "text": "Metin: {{text}}" + } + } + }, + "runs_table": { + "run_type_chips": { + "manual_run": "Manuel", + "scheduled_run": "Zamanlanmış", + "api": "API", + "unknown_run_type": "Bilinmeyen" + }, + "run_status_chips": { + "success": "Başarılı", + "running": "Çalışıyor", + "scheduled": "Zamanlandı", + "queued": "Kuyrukta", + "failed": "Başarısız", + "aborted": "İptal" + }, + "run_settings_modal": { + "title": "Çalıştırma Ayarları", + "labels": { + "run_id": "Çalıştırma ID", + "run_by_user": "Kullanıcı", + "run_by_schedule": "Zamanlama ID", + "run_by_api": "API", + "run_type": "Tür" + } + } + }, + "run_content": { + "tabs": { + "output_data": "Çıktı Verisi", + "log": "Kayıt" + }, + "buttons": { + "stop": "Durdur" + }, + "loading": "Veriler yükleniyor...", + "empty_output": "Çıktı verisi yok", + "captured_data": { + "title": "Yakalanan Veriler", + "download_csv": "CSV İndir", + "view_full": "Tam Veriyi Gör", + "items": "öğe", + "schema_title": "Yakalanan Metinler", + "list_title": "Yakalanan Listeler" + }, + "captured_screenshot": { + "title": "Yakalanan Görüntüler", + "download": "İndir", + "render_failed": "Görüntü render edilemedi" + } + }, + "navbar": { + "project_name": "Maxun", + "notifications": { + "success": { + "logout": "Çıkış yapıldı" + }, + "errors": { + "logout": { + "unauthorized": "Bu işlemi yapmaya yetkiniz yok", + "server": "Çıkış sırasında sunucu hatası", + "network": "Çıkış sırasında ağ hatası", + "unknown": "Bilinmeyen hata oluştu" + } + } + }, + "upgrade": { + "button": "Yükselt", + "modal": { + "up_to_date": "🎉 Güncelsiniz!", + "new_version_available": "Yeni sürüm mevcut: {{version}}. Güncelleyerek yeni özelliklere erişin!", + "view_updates": "Tüm güncellemeleri görüntüle", + "view_updates_link": "buradan", + "tabs": { + "manual_setup": "Manuel Kurulum", + "docker_setup": "Docker Kurulumu" + } + } + }, + "menu_items": { + "logout": "Çıkış Yap", + "discord": "Discord", + "youtube": "YouTube", + "twitter": "Twitter (X)", + "language": "Dil" + }, + "recording": { + "discard": "İptal" + } + }, + "language_menu": { + "en": "İngilizce", + "es": "İspanyolca", + "ja": "Japonca", + "zh": "Çince", + "de": "Almanca", + "tr": "Türkçe" + } +} diff --git a/public/locales/zh.json b/public/locales/zh.json index 6ac76ed9..c1e32760 100644 --- a/public/locales/zh.json +++ b/public/locales/zh.json @@ -59,10 +59,17 @@ "delete": "删除", "duplicate": "复制", "search": "搜索机器人...", + "warning_modal": { + "title": "检测到活跃浏览器", + "message": "已经有一个浏览器录制会话正在运行。您想要放弃它并创建新的录制吗?", + "discard_and_create": "放弃并创建新的", + "cancel": "取消" + }, "notifications": { "delete_warning": "该机器人有关联的运行记录。请先删除运行记录才能删除机器人", "delete_success": "机器人删除成功", - "auth_success": "机器人认证成功" + "auth_success": "机器人认证成功", + "browser_limit_warning": "远程浏览器当前繁忙。请稍等几分钟后重试" } }, "mainmenu": { @@ -141,11 +148,11 @@ "no_key_message": "您还未生成API密钥。", "generate_button": "生成API密钥", "notifications": { - "fetch_error": "获取API密钥失败 - ${error}", + "fetch_error": "获取API密钥失败 - {{error}}", "generate_success": "成功生成API密钥", - "generate_error": "生成API密钥失败 - ${error}", + "generate_error": "生成API密钥失败 - {{error}}", "delete_success": "成功删除API密钥", - "delete_error": "删除API密钥失败 - ${error}", + "delete_error": "删除API密钥失败 - {{error}}", "copy_success": "成功复制API密钥" } }, @@ -171,6 +178,11 @@ "pagination": "选择机器人如何捕获列表的其余部分", "limit": "选择要提取的项目数量", "complete": "捕获完成" + }, + "actions": { + "text": "捕获文本", + "list": "捕获列表", + "screenshot": "捕获截图" } }, "right_panel": { @@ -183,6 +195,7 @@ "confirm_capture": "确认捕获", "confirm_pagination": "确认", "confirm_limit": "确认", + "confirm_reset": "确认", "finish_capture": "完成捕获", "back": "返回", "reset": "重置", @@ -292,7 +305,7 @@ "no": "否" }, "messages": { - "extracting": "正在提取数据...请等待10秒到1分钟", + "extracting": "正在提取数据...请等待", "restart_required": "更新录制后请重新启动解释", "run_finished": "运行完成", "run_failed": "运行启动失败" @@ -301,11 +314,14 @@ "use_previous": "您要将之前的选择用作执行此操作的条件吗?", "previous_action": "您之前的操作是:", "element_text": "在文本元素上 " + }, + "notifications": { + "reset_success": "输出预览已成功重置" } }, "recording_page": { "loader": { - "browser_startup": "正在启动浏览器...正在导航至{{url}}" + "browser_startup": "正在启动浏览器...抓紧" } }, "integration_settings": { @@ -474,7 +490,8 @@ "schedule_success": "机器人{{name}}调度成功", "schedule_failed": "机器人{{name}}调度失败", "abort_success": "成功中止机器人{{name}}的解释", - "abort_failed": "中止机器人{{name}}的解释失败" + "abort_failed": "中止机器人{{name}}的解释失败", + "abort_initiated": "正在中止机器人 {{name}} 的解释" }, "menu": { "recordings": "机器人", @@ -599,6 +616,7 @@ "es": "西班牙语", "ja": "日语", "zh": "中文", - "de": "德语" + "de": "德语", + "tr": "土耳其语" } -} \ No newline at end of file +} diff --git a/public/svg/mcp.svg b/public/svg/mcp.svg new file mode 100644 index 00000000..979efde5 --- /dev/null +++ b/public/svg/mcp.svg @@ -0,0 +1,15 @@ + + + + + + + + + + ModelContextProtocol + + + + + \ No newline at end of file diff --git a/public/svg/webhook.svg b/public/svg/webhook.svg new file mode 100644 index 00000000..959bc307 --- /dev/null +++ b/public/svg/webhook.svg @@ -0,0 +1,14 @@ + + + + + + + + + + + + + + \ No newline at end of file diff --git a/server/src/api/record.ts b/server/src/api/record.ts index 9eec3edb..e05aa8ce 100644 --- a/server/src/api/record.ts +++ b/server/src/api/record.ts @@ -7,7 +7,7 @@ import Robot from "../models/Robot"; import Run from "../models/Run"; const router = Router(); import { getDecryptedProxyConfig } from "../routes/proxy"; -import { uuid } from "uuidv4"; +import { v4 as uuid } from "uuid"; import { createRemoteBrowserForRun, destroyRemoteBrowser } from "../browser-management/controller"; import logger from "../logger"; import { browserPool } from "../server"; @@ -19,6 +19,7 @@ import { Page } from "playwright"; import { WorkflowFile } from "maxun-core"; import { googleSheetUpdateTasks, processGoogleSheetUpdates } from "../workflow-management/integrations/gsheet"; import { airtableUpdateTasks, processAirtableUpdates } from "../workflow-management/integrations/airtable"; +import { sendWebhook } from "../routes/webhook"; chromium.use(stealthPlugin()); const formatRecording = (recordingData: any) => { @@ -667,6 +668,35 @@ async function executeRun(id: string, userId: string) { } ) + // Trigger webhooks for run completion + const webhookPayload = { + robot_id: plainRun.robotMetaId, + run_id: plainRun.runId, + robot_name: recording.recording_meta.name, + status: 'success', + started_at: plainRun.startedAt, + finished_at: new Date().toLocaleString(), + extracted_data: { + captured_texts: Object.values(categorizedOutput.scrapeSchema).flat() || [], + captured_lists: categorizedOutput.scrapeList, + total_rows: totalRowsExtracted, + captured_texts_count: totalSchemaItemsExtracted, + captured_lists_count: totalListItemsExtracted, + screenshots_count: extractedScreenshotsCount + }, + metadata: { + browser_id: plainRun.browserId, + user_id: userId, + } + }; + + try { + await sendWebhook(plainRun.robotMetaId, 'run_completed', webhookPayload); + logger.log('info', `Webhooks sent successfully for completed run ${plainRun.runId}`); + } catch (webhookError: any) { + logger.log('error', `Failed to send webhooks for run ${plainRun.runId}: ${webhookError.message}`); + } + try { googleSheetUpdateTasks[id] = { robotId: plainRun.robotMetaId, @@ -701,6 +731,34 @@ async function executeRun(id: string, userId: string) { status: 'failed', finishedAt: new Date().toLocaleString(), }); + + const recording = await Robot.findOne({ where: { 'recording_meta.id': run.robotMetaId }, raw: true }); + + // Trigger webhooks for run failure + const failedWebhookPayload = { + robot_id: run.robotMetaId, + run_id: run.runId, + robot_name: recording ? recording.recording_meta.name : 'Unknown Robot', + status: 'failed', + started_at: run.startedAt, + finished_at: new Date().toLocaleString(), + error: { + message: error.message, + stack: error.stack, + type: error.name || 'ExecutionError' + }, + metadata: { + browser_id: run.browserId, + user_id: userId, + } + }; + + try { + await sendWebhook(run.robotMetaId, 'run_failed', failedWebhookPayload); + logger.log('info', `Failure webhooks sent successfully for run ${run.runId}`); + } catch (webhookError: any) { + logger.log('error', `Failed to send failure webhooks for run ${run.runId}: ${webhookError.message}`); + } } capture( 'maxun-oss-run-created-api', diff --git a/server/src/browser-management/classes/BrowserPool.ts b/server/src/browser-management/classes/BrowserPool.ts index c1f0f557..e6dcf6b8 100644 --- a/server/src/browser-management/classes/BrowserPool.ts +++ b/server/src/browser-management/classes/BrowserPool.ts @@ -14,7 +14,7 @@ interface BrowserPoolInfo { /** * The instance of remote browser. */ - browser: RemoteBrowser, + browser: RemoteBrowser | null, /** * States if the browser's instance is being actively used. * Helps to persist the progress on the frontend when the application has been reloaded. @@ -31,6 +31,11 @@ interface BrowserPoolInfo { * @default "recording" */ state: BrowserState, + /** + * The status of the browser instance. + * Can be "reserved", "initializing", "ready" or "failed". + */ + status?: "reserved" | "initializing" | "ready" | "failed", } /** @@ -205,8 +210,18 @@ export class BrowserPool { * @returns remote browser instance or undefined if it does not exist in the pool */ public getRemoteBrowser = (id: string): RemoteBrowser | undefined => { - logger.log('debug', `Remote browser with id: ${id} retrieved from the pool`); - return this.pool[id]?.browser; + const poolInfo = this.pool[id]; + if (!poolInfo) { + return undefined; + } + + // Return undefined for reserved slots (browser is null) + if (poolInfo.status === "reserved") { + logger.log('debug', `Browser ${id} is reserved but not yet ready`); + return undefined; + } + + return poolInfo.browser || undefined; }; /** @@ -506,6 +521,29 @@ export class BrowserPool { return browserIds.length > 0 ? browserIds[0] : null; }; + /** + * Checks if there are available browser slots for a user. + * Returns true if user has available slots AND none of their active browsers are in "recording" state. + * @param userId the user ID to check browser slots for + * @returns {boolean} true if user has available slots and no recording browsers, false otherwise + */ + public hasAvailableBrowserSlots = (userId: string, state?: BrowserState): boolean => { + const userBrowserIds = this.userToBrowserMap.get(userId) || []; + + if (userBrowserIds.length >= 2) { + return false; + } + + if (state === "recording") { + const hasBrowserInState = userBrowserIds.some(browserId => + this.pool[browserId] && this.pool[browserId].state === "recording" + ); + return !hasBrowserInState; + } + + return true; + }; + /** * Returns the first active browser's instance id from the pool. * If there is no active browser, it returns null. @@ -524,4 +562,71 @@ export class BrowserPool { // logger.log('warn', `No active browser in the pool`); return null; }; + + /** + * Reserves a browser slot immediately without creating the actual browser. + * This ensures slot counting is accurate for rapid successive requests. + * + * @param id browser ID to reserve + * @param userId user ID that owns this reservation + * @param state browser state ("recording" or "run") + * @returns true if slot was reserved, false if user has reached limit + */ + public reserveBrowserSlot = (id: string, userId: string, state: BrowserState = "run"): boolean => { + // Check if user has available slots first + if (!this.hasAvailableBrowserSlots(userId, state)) { + logger.log('debug', `Cannot reserve slot for user ${userId}: no available slots`); + return false; + } + + // Reserve the slot with null browser + this.pool[id] = { + browser: null, + active: false, + userId, + state, + status: "reserved" + }; + + // Update the user-to-browser mapping + let userBrowserIds = this.userToBrowserMap.get(userId) || []; + if (!userBrowserIds.includes(id)) { + userBrowserIds.push(id); + this.userToBrowserMap.set(userId, userBrowserIds); + } + + logger.log('info', `Reserved browser slot ${id} for user ${userId} in state ${state}`); + return true; + }; + + /** + * Upgrades a reserved slot to an actual browser instance. + * + * @param id browser ID that was previously reserved + * @param browser the actual RemoteBrowser instance + * @returns true if successful, false if slot wasn't reserved + */ + public upgradeBrowserSlot = (id: string, browser: RemoteBrowser): boolean => { + if (!this.pool[id] || this.pool[id].status !== "reserved") { + logger.log('warn', `Cannot upgrade browser ${id}: slot not reserved`); + return false; + } + + this.pool[id].browser = browser; + this.pool[id].status = "ready"; + logger.log('info', `Upgraded browser slot ${id} to ready state`); + return true; + }; + + /** + * Marks a reserved slot as failed and removes it. + * + * @param id browser ID to mark as failed + */ + public failBrowserSlot = (id: string): void => { + if (this.pool[id]) { + logger.log('info', `Marking browser slot ${id} as failed`); + this.deleteRemoteBrowser(id); + } + }; } \ No newline at end of file diff --git a/server/src/browser-management/classes/RemoteBrowser.ts b/server/src/browser-management/classes/RemoteBrowser.ts index ddfa9ab5..51f33574 100644 --- a/server/src/browser-management/classes/RemoteBrowser.ts +++ b/server/src/browser-management/classes/RemoteBrowser.ts @@ -17,6 +17,72 @@ import { WorkflowInterpreter } from "../../workflow-management/classes/Interpret import { getDecryptedProxyConfig } from '../../routes/proxy'; import { getInjectableScript } from 'idcac-playwright'; +declare global { + interface Window { + rrwebSnapshot?: any; + } +} + +interface RRWebSnapshot { + type: number; + childNodes?: RRWebSnapshot[]; + tagName?: string; + attributes?: Record; + textContent?: string; + id: number; + [key: string]: any; +} + +interface ProcessedSnapshot { + snapshot: RRWebSnapshot; + resources: { + stylesheets: Array<{ + href: string; + content: string; + media?: string; + }>; + images: Array<{ + src: string; + dataUrl: string; + alt?: string; + }>; + fonts: Array<{ + url: string; + dataUrl: string; + format?: string; + }>; + scripts: Array<{ + src: string; + content: string; + type?: string; + }>; + media: Array<{ + src: string; + dataUrl: string; + type: string; + }>; + }; + baseUrl: string; + viewport: { width: number; height: number }; + timestamp: number; + processingStats: { + discoveredResources: { + images: number; + stylesheets: number; + scripts: number; + fonts: number; + media: number; + }; + cachedResources: { + stylesheets: number; + images: number; + fonts: number; + scripts: number; + media: number; + }; + }; +} + chromium.use(stealthPlugin()); const MEMORY_CONFIG = { @@ -25,6 +91,13 @@ const MEMORY_CONFIG = { heapUsageThreshold: 0.7 // 70% (reduced threshold to react earlier) }; +const DEFAULT_VIEWPORT = { + width: 1280, + height: 720, + deviceScaleFactor: 1, + mobile: false +}; + const SCREENCAST_CONFIG: { format: "jpeg" | "png"; maxWidth: number; @@ -32,13 +105,17 @@ const SCREENCAST_CONFIG: { targetFPS: number; compressionQuality: number; maxQueueSize: number; + skipFrameThreshold: number, + enableAdaptiveQuality: boolean, } = { - format: 'png', - maxWidth: 1280, - maxHeight: 720, - targetFPS: 15, - compressionQuality: 0.95, - maxQueueSize: 1 + format: 'jpeg', + maxWidth: DEFAULT_VIEWPORT.width, + maxHeight: DEFAULT_VIEWPORT.height, + targetFPS: 30, + compressionQuality: 0.8, + maxQueueSize: 2, + skipFrameThreshold: 100, + enableAdaptiveQuality: true, }; /** @@ -112,6 +189,19 @@ export class RemoteBrowser { private screencastInterval: NodeJS.Timeout | null = null private isScreencastActive: boolean = false; + private isDOMStreamingActive: boolean = false; + private domUpdateInterval: NodeJS.Timeout | null = null; + private renderingMode: "screenshot" | "dom" = "screenshot"; + + private lastScrollPosition = { x: 0, y: 0 }; + private scrollThreshold = 200; // pixels + private snapshotDebounceTimeout: NodeJS.Timeout | null = null; + private isScrollTriggeredSnapshot = false; + + private networkRequestTimeout: NodeJS.Timeout | null = null; + private pendingNetworkRequests: string[] = []; + private readonly NETWORK_QUIET_PERIOD = 8000; + /** * Initializes a new instances of the {@link Generator} and {@link WorkflowInterpreter} classes and * assigns the socket instance everywhere. @@ -125,6 +215,65 @@ export class RemoteBrowser { this.generator = new WorkflowGenerator(socket, poolId); } + private cleanupMemory(): void { + if (this.screenshotQueue.length > 10) { + this.screenshotQueue = this.screenshotQueue.slice(-3); // Keep only last 3 + } + } + + private setupMemoryCleanup(): void { + setInterval(() => { + this.cleanupMemory(); + }, 30000); // Every 30 seconds + } + + private async processRRWebSnapshot( + snapshot: RRWebSnapshot + ): Promise { + const baseUrl = this.currentPage?.url() || ""; + + const resources = { + stylesheets: [] as Array<{ + href: string; + content: string; + media?: string; + }>, + images: [] as Array<{ src: string; dataUrl: string; alt?: string }>, + fonts: [] as Array<{ url: string; dataUrl: string; format?: string }>, + scripts: [] as Array<{ src: string; content: string; type?: string }>, + media: [] as Array<{ src: string; dataUrl: string; type: string }>, + }; + + const viewport = (await this.currentPage?.viewportSize()) || { + width: 1280, + height: 720, + }; + + return { + snapshot, + resources, + baseUrl, + viewport, + timestamp: Date.now(), + processingStats: { + discoveredResources: { + images: resources.images.length, + stylesheets: resources.stylesheets.length, + scripts: resources.scripts.length, + fonts: resources.fonts.length, + media: resources.media.length, + }, + cachedResources: { + stylesheets: resources.stylesheets.length, + images: resources.images.length, + fonts: resources.fonts.length, + scripts: resources.scripts.length, + media: resources.media.length, + }, + }, + }; + } + private initializeMemoryManagement(): void { setInterval(() => { const memoryUsage = process.memoryUsage(); @@ -216,6 +365,108 @@ export class RemoteBrowser { return normalizedNew !== normalizedLast; } + /** + * Setup scroll event listener to track user scrolling + */ + private setupScrollEventListener(): void { + this.socket.on( + "dom:scroll", + async (data: { deltaX: number; deltaY: number }) => { + if (!this.isDOMStreamingActive || !this.currentPage) return; + + try { + logger.debug( + `Received scroll event: deltaX=${data.deltaX}, deltaY=${data.deltaY}` + ); + + await this.currentPage.mouse.wheel(data.deltaX, data.deltaY); + + const scrollInfo = await this.currentPage.evaluate(() => ({ + x: window.scrollX, + y: window.scrollY, + maxX: Math.max( + 0, + document.documentElement.scrollWidth - window.innerWidth + ), + maxY: Math.max( + 0, + document.documentElement.scrollHeight - window.innerHeight + ), + documentHeight: document.documentElement.scrollHeight, + viewportHeight: window.innerHeight, + })); + + const scrollDelta = + Math.abs(scrollInfo.y - this.lastScrollPosition.y) + + Math.abs(scrollInfo.x - this.lastScrollPosition.x); + + logger.debug( + `Scroll delta: ${scrollDelta}, threshold: ${this.scrollThreshold}` + ); + + if (scrollDelta > this.scrollThreshold) { + this.lastScrollPosition = { x: scrollInfo.x, y: scrollInfo.y }; + this.isScrollTriggeredSnapshot = true; + + if (this.snapshotDebounceTimeout) { + clearTimeout(this.snapshotDebounceTimeout); + } + + this.snapshotDebounceTimeout = setTimeout(async () => { + logger.info( + `Triggering snapshot due to scroll. Position: ${scrollInfo.y}/${scrollInfo.maxY}` + ); + + await this.makeAndEmitDOMSnapshot(); + }, 300); + } + } catch (error) { + logger.error("Error handling scroll event:", error); + } + } + ); + } + + private setupPageChangeListeners(): void { + if (!this.currentPage) return; + + this.currentPage.on("domcontentloaded", async () => { + logger.info("DOM content loaded - triggering snapshot"); + await this.makeAndEmitDOMSnapshot(); + }); + + this.currentPage.on("response", async (response) => { + const url = response.url(); + if ( + response.request().resourceType() === "document" || + url.includes("api/") || + url.includes("ajax") + ) { + this.pendingNetworkRequests.push(url); + + if (this.networkRequestTimeout) { + clearTimeout(this.networkRequestTimeout); + this.networkRequestTimeout = null; + } + + logger.debug( + `Network request received: ${url}. Total pending: ${this.pendingNetworkRequests.length}` + ); + + this.networkRequestTimeout = setTimeout(async () => { + logger.info( + `Network quiet period reached. Processing ${this.pendingNetworkRequests.length} requests` + ); + + this.pendingNetworkRequests = []; + this.networkRequestTimeout = null; + + await this.makeAndEmitDOMSnapshot(); + }, this.NETWORK_QUIET_PERIOD); + } + }); + } + private async setupPageEventListeners(page: Page) { page.on('framenavigated', async (frame) => { if (frame === page.mainFrame()) { @@ -364,8 +615,11 @@ export class RemoteBrowser { patchedGetter.apply(navigator); patchedGetter.toString();` ); + + await this.context.addInitScript({ path: './server/src/browser-management/classes/rrweb-bundle.js' }); this.currentPage = await this.context.newPage(); + await this.setupPageEventListeners(this.currentPage); const viewportSize = await this.currentPage.viewportSize(); @@ -412,6 +666,7 @@ export class RemoteBrowser { } } + this.setupMemoryCleanup(); // this.initializeMemoryManagement(); }; @@ -448,6 +703,11 @@ export class RemoteBrowser { }>, limit: number = 5 ): Promise>> { + if (page.isClosed()) { + logger.warn("Page is closed, cannot extract list data"); + return []; + } + return await page.evaluate( async ({ listSelector, fields, limit }: { listSelector: string; @@ -963,125 +1223,244 @@ export class RemoteBrowser { ) as Array>; } + /** + * Captures a screenshot directly without running the workflow interpreter + * @param settings Screenshot settings containing fullPage, type, etc. + * @returns Promise + */ + public captureDirectScreenshot = async (settings: { + fullPage: boolean; + type: 'png' | 'jpeg'; + timeout?: number; + animations?: 'disabled' | 'allow'; + caret?: 'hide' | 'initial'; + scale?: 'css' | 'device'; + }): Promise => { + if (!this.currentPage) { + logger.error("No current page available for screenshot"); + this.socket.emit('screenshotError', { + userId: this.userId, + error: 'No active page available' + }); + return; + } + + try { + this.socket.emit('screenshotCaptureStarted', { + userId: this.userId, + fullPage: settings.fullPage + }); + + const screenshotBuffer = await this.currentPage.screenshot({ + fullPage: settings.fullPage, + type: settings.type || 'png', + timeout: settings.timeout || 30000, + animations: settings.animations || 'allow', + caret: settings.caret || 'hide', + scale: settings.scale || 'device' + }); + + const base64Data = screenshotBuffer.toString('base64'); + const mimeType = `image/${settings.type || 'png'}`; + const dataUrl = `data:${mimeType};base64,${base64Data}`; + + this.socket.emit('directScreenshotCaptured', { + userId: this.userId, + screenshot: dataUrl, + mimeType: mimeType, + fullPage: settings.fullPage, + timestamp: Date.now() + }); + } catch (error) { + logger.error('Failed to capture direct screenshot:', error); + this.socket.emit('screenshotError', { + userId: this.userId, + error: error instanceof Error ? error.message : 'Unknown error occurred' + }); + } + }; + /** * Registers all event listeners needed for the recording editor session. * Should be called only once after the full initialization of the remote browser. * @returns void */ public registerEditorEvents = (): void => { - // For each event, include userId to make sure events are handled for the correct browser - logger.log('debug', `Registering editor events for user: ${this.userId}`); - - // Listen for specific events for this user - this.socket.on(`rerender:${this.userId}`, async () => { - logger.debug(`Rerender event received for user ${this.userId}`); - await this.makeAndEmitScreenshot(); - }); - - // For backward compatibility, also listen to the general event - this.socket.on('rerender', async () => { - logger.debug(`General rerender event received, checking if for user ${this.userId}`); - await this.makeAndEmitScreenshot(); - }); - - this.socket.on(`settings:${this.userId}`, (settings) => { - this.interpreterSettings = settings; - logger.debug(`Settings updated for user ${this.userId}`); - }); - - this.socket.on(`changeTab:${this.userId}`, async (tabIndex) => { - logger.debug(`Tab change to ${tabIndex} requested for user ${this.userId}`); - await this.changeTab(tabIndex); - }); - - this.socket.on(`addTab:${this.userId}`, async () => { - logger.debug(`New tab requested for user ${this.userId}`); - await this.currentPage?.context().newPage(); - const lastTabIndex = this.currentPage ? this.currentPage.context().pages().length - 1 : 0; - await this.changeTab(lastTabIndex); - }); - - this.socket.on(`closeTab:${this.userId}`, async (tabInfo) => { - logger.debug(`Close tab ${tabInfo.index} requested for user ${this.userId}`); - const page = this.currentPage?.context().pages()[tabInfo.index]; - if (page) { - if (tabInfo.isCurrent) { - if (this.currentPage?.context().pages()[tabInfo.index + 1]) { - // next tab - await this.changeTab(tabInfo.index + 1); - } else { - //previous tab - await this.changeTab(tabInfo.index - 1); - } - } - await page.close(); - logger.log( - 'debug', - `Tab ${tabInfo.index} was closed for user ${this.userId}, new tab count: ${this.currentPage?.context().pages().length}` - ); + // For each event, include userId to make sure events are handled for the correct browser + logger.log("debug", `Registering editor events for user: ${this.userId}`); + + this.socket.on( + `captureDirectScreenshot:${this.userId}`, + async (settings) => { + logger.debug( + `Direct screenshot capture requested for user ${this.userId}` + ); + await this.captureDirectScreenshot(settings); + } + ); + + // For backward compatibility + this.socket.on("captureDirectScreenshot", async (settings) => { + await this.captureDirectScreenshot(settings); + }); + + // Listen for specific events for this user + this.socket.on(`rerender:${this.userId}`, async () => { + logger.debug(`Rerender event received for user ${this.userId}`); + if (this.renderingMode === "dom") { + await this.makeAndEmitDOMSnapshot(); + } else { + await this.makeAndEmitScreenshot(); + } + }); + + this.socket.on("rerender", async () => { + logger.debug( + `General rerender event received, checking if for user ${this.userId}` + ); + if (this.renderingMode === "dom") { + await this.makeAndEmitDOMSnapshot(); + } else { + await this.makeAndEmitScreenshot(); + } + }); + + this.socket.on(`settings:${this.userId}`, (settings) => { + this.interpreterSettings = settings; + logger.debug(`Settings updated for user ${this.userId}`); + }); + + this.socket.on(`changeTab:${this.userId}`, async (tabIndex) => { + logger.debug( + `Tab change to ${tabIndex} requested for user ${this.userId}` + ); + await this.changeTab(tabIndex); + }); + + this.socket.on(`addTab:${this.userId}`, async () => { + logger.debug(`New tab requested for user ${this.userId}`); + await this.currentPage?.context().newPage(); + const lastTabIndex = this.currentPage + ? this.currentPage.context().pages().length - 1 + : 0; + await this.changeTab(lastTabIndex); + }); + + this.socket.on(`closeTab:${this.userId}`, async (tabInfo) => { + logger.debug( + `Close tab ${tabInfo.index} requested for user ${this.userId}` + ); + const page = this.currentPage?.context().pages()[tabInfo.index]; + if (page) { + if (tabInfo.isCurrent) { + if (this.currentPage?.context().pages()[tabInfo.index + 1]) { + // next tab + await this.changeTab(tabInfo.index + 1); } else { - logger.log('error', `Tab index ${tabInfo.index} out of range for user ${this.userId}`); + //previous tab + await this.changeTab(tabInfo.index - 1); } - }); - - this.socket.on(`setViewportSize:${this.userId}`, async (data: { width: number, height: number }) => { - const { width, height } = data; - logger.log('debug', `Viewport size change to width=${width}, height=${height} requested for user ${this.userId}`); + } + await page.close(); + logger.log( + "debug", + `Tab ${tabInfo.index} was closed for user ${ + this.userId + }, new tab count: ${this.currentPage?.context().pages().length}` + ); + } else { + logger.log( + "error", + `Tab index ${tabInfo.index} out of range for user ${this.userId}` + ); + } + }); - // Update the browser context's viewport dynamically - if (this.context && this.browser) { - this.context = await this.browser.newContext({ viewport: { width, height } }); - logger.log('debug', `Viewport size updated to width=${width}, height=${height} for user ${this.userId}`); - } - }); - - // For backward compatibility, also register the standard events - this.socket.on('settings', (settings) => this.interpreterSettings = settings); - this.socket.on('changeTab', async (tabIndex) => await this.changeTab(tabIndex)); - this.socket.on('addTab', async () => { - await this.currentPage?.context().newPage(); - const lastTabIndex = this.currentPage ? this.currentPage.context().pages().length - 1 : 0; - await this.changeTab(lastTabIndex); - }); - this.socket.on('closeTab', async (tabInfo) => { - const page = this.currentPage?.context().pages()[tabInfo.index]; - if (page) { - if (tabInfo.isCurrent) { - if (this.currentPage?.context().pages()[tabInfo.index + 1]) { - await this.changeTab(tabInfo.index + 1); - } else { - await this.changeTab(tabInfo.index - 1); - } - } - await page.close(); - } - }); - this.socket.on('setViewportSize', async (data: { width: number, height: number }) => { - const { width, height } = data; - if (this.context && this.browser) { - this.context = await this.browser.newContext({ viewport: { width, height } }); - } - }); + this.socket.on( + `setViewportSize:${this.userId}`, + async (data: { width: number; height: number }) => { + const { width, height } = data; + logger.log( + "debug", + `Viewport size change to width=${width}, height=${height} requested for user ${this.userId}` + ); - this.socket.on('extractListData', async (data: { - listSelector: string, - fields: Record, - currentListId: number, - pagination: any + // Update the browser context's viewport dynamically + if (this.context && this.browser) { + this.context = await this.browser.newContext({ + viewport: { width, height }, + }); + logger.log( + "debug", + `Viewport size updated to width=${width}, height=${height} for user ${this.userId}` + ); + } + } + ); + + // For backward compatibility, also register the standard events + this.socket.on( + "settings", + (settings) => (this.interpreterSettings = settings) + ); + this.socket.on( + "changeTab", + async (tabIndex) => await this.changeTab(tabIndex) + ); + this.socket.on("addTab", async () => { + await this.currentPage?.context().newPage(); + const lastTabIndex = this.currentPage + ? this.currentPage.context().pages().length - 1 + : 0; + await this.changeTab(lastTabIndex); + }); + this.socket.on("closeTab", async (tabInfo) => { + const page = this.currentPage?.context().pages()[tabInfo.index]; + if (page) { + if (tabInfo.isCurrent) { + if (this.currentPage?.context().pages()[tabInfo.index + 1]) { + await this.changeTab(tabInfo.index + 1); + } else { + await this.changeTab(tabInfo.index - 1); + } + } + await page.close(); + } + }); + this.socket.on( + "setViewportSize", + async (data: { width: number; height: number }) => { + const { width, height } = data; + if (this.context && this.browser) { + this.context = await this.browser.newContext({ + viewport: { width, height }, + }); + } + } + ); + + this.socket.on( + "extractListData", + async (data: { + listSelector: string; + fields: Record; + currentListId: number; + pagination: any; }) => { - if (this.currentPage) { - const extractedData = await this.extractListData( - this.currentPage, - data.listSelector, - data.fields - ); - - this.socket.emit('listDataExtracted', { - currentListId: data.currentListId, - data: extractedData - }); - } - }); + if (this.currentPage) { + const extractedData = await this.extractListData( + this.currentPage, + data.listSelector, + data.fields + ); + + this.socket.emit("listDataExtracted", { + currentListId: data.currentListId, + data: extractedData, + }); + } + } + ); }; /** * Subscribes the remote browser for a screencast session @@ -1123,34 +1502,207 @@ export class RemoteBrowser { }; /** + * Subscribe to DOM streaming - simplified version following screenshot pattern + */ + public async subscribeToDOM(): Promise { + if (!this.client) { + logger.warn("DOM streaming requires scraping browser with CDP client"); + return; + } + + try { + // Enable required CDP domains + await this.client.send("DOM.enable"); + await this.client.send("CSS.enable"); + + this.isDOMStreamingActive = true; + logger.info("DOM streaming started successfully"); + + // Initial DOM snapshot + await this.makeAndEmitDOMSnapshot(); + + this.setupScrollEventListener(); + this.setupPageChangeListeners(); + } catch (error) { + logger.error("Failed to start DOM streaming:", error); + this.isDOMStreamingActive = false; + } + } + + /** + * CDP-based DOM snapshot creation using captured network resources + */ + public async makeAndEmitDOMSnapshot(): Promise { + if (!this.currentPage || !this.isDOMStreamingActive) { + return; + } + + try { + // Check if page is still valid and not closed + if (this.currentPage.isClosed()) { + logger.debug("Skipping DOM snapshot - page is closed"); + return; + } + + // Double-check page state after network wait + if (this.currentPage.isClosed()) { + logger.debug("Skipping DOM snapshot - page closed during network wait"); + return; + } + + // Get current scroll position + const currentScrollInfo = await this.currentPage.evaluate(() => ({ + x: window.scrollX, + y: window.scrollY, + maxX: Math.max( + 0, + document.documentElement.scrollWidth - window.innerWidth + ), + maxY: Math.max( + 0, + document.documentElement.scrollHeight - window.innerHeight + ), + documentHeight: document.documentElement.scrollHeight, + })); + + logger.info( + `Creating rrweb snapshot at scroll position: ${currentScrollInfo.y}/${currentScrollInfo.maxY}` + ); + + // Update our tracked scroll position + this.lastScrollPosition = { + x: currentScrollInfo.x, + y: currentScrollInfo.y, + }; + + // Final check before snapshot + if (this.currentPage.isClosed()) { + logger.debug("Skipping DOM snapshot - page closed before snapshot"); + return; + } + + // Capture snapshot using rrweb + const rawSnapshot = await this.currentPage.evaluate(() => { + if (typeof window.rrwebSnapshot === "undefined") { + throw new Error("rrweb-snapshot library not available"); + } + + return window.rrwebSnapshot.snapshot(document, { + inlineImages: true, + collectFonts: true, + }); + }); + + // Process the snapshot to proxy resources + const processedSnapshot = await this.processRRWebSnapshot(rawSnapshot); + + // Add scroll position information + const enhancedSnapshot = { + ...processedSnapshot, + scrollPosition: currentScrollInfo, + captureTime: Date.now(), + }; + + // Emit the processed snapshot + this.emitRRWebSnapshot(enhancedSnapshot); + } catch (error) { + // Handle navigation context destruction gracefully + if ( + error instanceof Error && + (error.message.includes("Execution context was destroyed") || + error.message.includes("most likely because of a navigation") || + error.message.includes("Target closed")) + ) { + logger.debug("DOM snapshot skipped due to page navigation or closure"); + return; // Don't emit error for navigation - this is expected + } + + logger.error("Failed to create rrweb snapshot:", error); + this.socket.emit("dom-mode-error", { + userId: this.userId, + message: "Failed to create rrweb snapshot", + error: error instanceof Error ? error.message : String(error), + timestamp: Date.now(), + }); + } + } + + /** + * Emit DOM snapshot to client - following screenshot pattern + */ + private emitRRWebSnapshot(processedSnapshot: ProcessedSnapshot): void { + this.socket.emit("domcast", { + snapshotData: processedSnapshot, + userId: this.userId, + timestamp: Date.now(), + }); + } + + /** + * Stop DOM streaming - following screencast pattern + */ + private async stopDOM(): Promise { + this.isDOMStreamingActive = false; + + if (this.domUpdateInterval) { + clearInterval(this.domUpdateInterval); + this.domUpdateInterval = null; + } + + if (this.networkRequestTimeout) { + clearTimeout(this.networkRequestTimeout); + this.networkRequestTimeout = null; + } + + this.pendingNetworkRequests = []; + + if (this.client) { + try { + await this.client.send("DOM.disable"); + await this.client.send("CSS.disable"); + } catch (error) { + logger.warn("Error stopping DOM stream:", error); + } + } + + logger.info("DOM streaming stopped successfully"); + } + + /**rrweb-bundle * Terminates the screencast session and closes the remote browser. * If an interpretation was running it will be stopped. * @returns {Promise} */ public async switchOff(): Promise { - try { - this.isScreencastActive = false; + try { + this.isScreencastActive = false; + this.isDOMStreamingActive = false; - await this.interpreter.stopInterpretation(); + await this.interpreter.stopInterpretation(); - if (this.screencastInterval) { - clearInterval(this.screencastInterval); - } - - if (this.client) { - await this.stopScreencast(); - } - - if (this.browser) { - await this.browser.close(); - } - - this.screenshotQueue = []; - //this.performanceMonitor.reset(); - - } catch (error) { - logger.error('Error during browser shutdown:', error); + if (this.screencastInterval) { + clearInterval(this.screencastInterval); } + + if (this.domUpdateInterval) { + clearInterval(this.domUpdateInterval); + } + + if (this.client) { + await this.stopScreencast(); + await this.stopDOM(); + } + + if (this.browser) { + await this.browser.close(); + } + + this.screenshotQueue = []; + //this.performanceMonitor.reset(); + + } catch (error) { + logger.error('Error during browser shutdown:', error); + } } private async optimizeScreenshot(screenshot: Buffer): Promise { @@ -1272,6 +1824,7 @@ export class RemoteBrowser { const page = this.currentPage?.context().pages()[tabIndex]; if (page) { await this.stopScreencast(); + await this.stopDOM(); this.currentPage = page; await this.setupPageEventListeners(this.currentPage); @@ -1283,8 +1836,13 @@ export class RemoteBrowser { url: this.currentPage.url(), userId: this.userId }); - await this.makeAndEmitScreenshot(); - await this.subscribeToScreencast(); + if (this.isDOMStreamingActive) { + await this.makeAndEmitDOMSnapshot(); + await this.subscribeToDOM(); + } else { + await this.makeAndEmitScreenshot(); + await this.subscribeToScreencast(); + } } else { logger.log('error', `${tabIndex} index out of range of pages`) } @@ -1309,7 +1867,11 @@ export class RemoteBrowser { await this.setupPageEventListeners(this.currentPage); this.client = await this.currentPage.context().newCDPSession(this.currentPage); - await this.subscribeToScreencast(); + if (this.renderingMode === "dom") { + await this.subscribeToDOM(); + } else { + await this.subscribeToScreencast(); + } } else { logger.log('error', 'Could not get a new page, returned undefined'); } @@ -1399,7 +1961,7 @@ export class RemoteBrowser { */ private emitScreenshot = async (payload: Buffer, viewportSize?: { width: number, height: number }): Promise => { if (this.screenshotQueue.length > SCREENCAST_CONFIG.maxQueueSize) { - this.screenshotQueue = this.screenshotQueue.slice(-SCREENCAST_CONFIG.maxQueueSize); + this.screenshotQueue = this.screenshotQueue.slice(-1); } if (this.isProcessingScreenshot) { @@ -1414,7 +1976,7 @@ export class RemoteBrowser { try { const optimizationPromise = this.optimizeScreenshot(payload); const timeoutPromise = new Promise((resolve) => { - setTimeout(() => resolve(payload), 150); + setTimeout(() => resolve(payload), 100); }); const optimizedScreenshot = await Promise.race([optimizationPromise, timeoutPromise]); @@ -1423,10 +1985,12 @@ export class RemoteBrowser { payload = null as any; - this.socket.emit('screencast', { + setImmediate(async () => { + this.socket.emit('screencast', { image: dataWithMimeType, userId: this.userId, viewport: viewportSize || await this.currentPage?.viewportSize() || null + }); }); } catch (error) { logger.error('Screenshot emission failed:', error); @@ -1434,24 +1998,27 @@ export class RemoteBrowser { const base64Data = payload.toString('base64'); const dataWithMimeType = `data:image/png;base64,${base64Data}`; - this.socket.emit('screencast', { - image: dataWithMimeType, - userId: this.userId, - viewport: viewportSize || await this.currentPage?.viewportSize() || null + setImmediate(async () => { + this.socket.emit('screencast', { + image: dataWithMimeType, + userId: this.userId, + viewport: viewportSize || await this.currentPage?.viewportSize() || null + }); }); } catch (e) { logger.error('Fallback screenshot emission also failed:', e); } } finally { this.isProcessingScreenshot = false; - + if (this.screenshotQueue.length > 0) { - const nextScreenshot = this.screenshotQueue.shift(); - if (nextScreenshot) { - setTimeout(() => { - this.emitScreenshot(nextScreenshot); - }, 1000 / SCREENCAST_CONFIG.targetFPS); - } + const nextScreenshot = this.screenshotQueue.shift(); + if (nextScreenshot) { + const delay = this.screenshotQueue.length > 0 ? 16 : 33; + setTimeout(() => { + this.emitScreenshot(nextScreenshot); + }, delay); + } } } }; diff --git a/server/src/browser-management/classes/bundle-rrweb.js b/server/src/browser-management/classes/bundle-rrweb.js new file mode 100644 index 00000000..c2fe8b8a --- /dev/null +++ b/server/src/browser-management/classes/bundle-rrweb.js @@ -0,0 +1,10 @@ +const esbuild = require('esbuild'); + +esbuild.build({ + entryPoints: ['rrweb-entry.js'], + bundle: true, + minify: true, + outfile: 'rrweb-bundle.js', + format: 'iife', // so that rrwebSnapshot is available on window + globalName: 'rrwebSnapshotBundle' +}).catch(() => process.exit(1)); diff --git a/server/src/browser-management/classes/rrweb-bundle.js b/server/src/browser-management/classes/rrweb-bundle.js new file mode 100644 index 00000000..58532e0a --- /dev/null +++ b/server/src/browser-management/classes/rrweb-bundle.js @@ -0,0 +1 @@ +"use strict";var rrwebSnapshotBundle=(()=>{var g;(function(e){e[e.Document=0]="Document",e[e.DocumentType=1]="DocumentType",e[e.Element=2]="Element",e[e.Text=3]="Text",e[e.CDATA=4]="CDATA",e[e.Comment=5]="Comment"})(g||(g={}));function ue(e){return e.nodeType===e.ELEMENT_NODE}function fe(e){var t=e?.host;return t?.shadowRoot===e}function Q(e){return Object.prototype.toString.call(e)==="[object ShadowRoot]"}function de(e){return e.includes(" background-clip: text;")&&!e.includes(" -webkit-background-clip: text;")&&(e=e.replace(" background-clip: text;"," -webkit-background-clip: text; background-clip: text;")),e}function Z(e){try{var t=e.rules||e.cssRules;return t?de(Array.from(t).map(me).join("")):null}catch{return null}}function me(e){var t=e.cssText;if(pe(e))try{t=Z(e.styleSheet)||t}catch{}return t}function pe(e){return"styleSheet"in e}var he=function(){function e(){this.idNodeMap=new Map,this.nodeMetaMap=new WeakMap}return e.prototype.getId=function(t){var r;if(!t)return-1;var a=(r=this.getMeta(t))===null||r===void 0?void 0:r.id;return a??-1},e.prototype.getNode=function(t){return this.idNodeMap.get(t)||null},e.prototype.getIds=function(){return Array.from(this.idNodeMap.keys())},e.prototype.getMeta=function(t){return this.nodeMetaMap.get(t)||null},e.prototype.removeNodeFromMap=function(t){var r=this,a=this.getId(t);this.idNodeMap.delete(a),t.childNodes&&t.childNodes.forEach(function(i){return r.removeNodeFromMap(i)})},e.prototype.has=function(t){return this.idNodeMap.has(t)},e.prototype.hasNode=function(t){return this.nodeMetaMap.has(t)},e.prototype.add=function(t,r){var a=r.id;this.idNodeMap.set(a,t),this.nodeMetaMap.set(t,r)},e.prototype.replace=function(t,r){var a=this.getNode(t);if(a){var i=this.nodeMetaMap.get(a);i&&this.nodeMetaMap.set(r,i)}this.idNodeMap.set(t,r)},e.prototype.reset=function(){this.idNodeMap=new Map,this.nodeMetaMap=new WeakMap},e}();function ve(e){var t=e.maskInputOptions,r=e.tagName,a=e.type,i=e.value,n=e.maskInputFn,l=i||"";return(t[r.toLowerCase()]||t[a])&&(n?l=n(l):l="*".repeat(l.length)),l}var oe="__rrweb_original__";function ge(e){var t=e.getContext("2d");if(!t)return!0;for(var r=50,a=0;a-1?t=e.split("/").slice(0,3).join("/"):t=e.split("/")[0],t=t.split("?")[0],t}var H,le,Te=/url\((?:(')([^']*)'|(")(.*?)"|([^)]*))\)/gm,Ie=/^(?!www\.|(?:http|ftp)s?:\/\/|[A-Za-z]:\\|\/\/|#).*/,Ee=/^(data:)([^,]*),(.*)/i;function $(e,t){return(e||"").replace(Te,function(r,a,i,n,l,u){var o=i||l||u,f=a||n||"";if(!o)return r;if(!Ie.test(o)||Ee.test(o))return"url(".concat(f).concat(o).concat(f,")");if(o[0]==="/")return"url(".concat(f).concat(we(t)+o).concat(f,")");var c=t.split("/"),p=o.split("/");c.pop();for(var C=0,k=p;C=t.length);){var n=a(Le);if(n.slice(-1)===",")n=j(e,n.substring(0,n.length-1)),i.push(n);else{var l="";n=j(e,n);for(var u=!1;;){var o=t.charAt(r);if(o===""){i.push((n+l).trim());break}else if(u)o===")"&&(u=!1);else if(o===","){r+=1,i.push((n+l).trim());break}else o==="("&&(u=!0);l+=o,r+=1}}}return i.join(", ")}function j(e,t){if(!t||t.trim()==="")return t;var r=e.createElement("a");return r.href=t,r.href}function Me(e){return!!(e.tagName==="svg"||e.ownerSVGElement)}function ee(){var e=document.createElement("a");return e.href="",e.href}function Oe(e,t,r,a){return r==="src"||r==="href"&&a&&!(t==="use"&&a[0]==="#")||r==="xlink:href"&&a&&a[0]!=="#"||r==="background"&&a&&(t==="table"||t==="td"||t==="th")?j(e,a):r==="srcset"&&a?Ne(e,a):r==="style"&&a?$(a,ee()):t==="object"&&r==="data"&&a?j(e,a):a}function Re(e,t,r){if(typeof t=="string"){if(e.classList.contains(t))return!0}else for(var a=e.classList.length;a--;){var i=e.classList[a];if(t.test(i))return!0}return r?e.matches(r):!1}function J(e,t,r){if(!e)return!1;if(e.nodeType!==e.ELEMENT_NODE)return r?J(e.parentNode,t,r):!1;for(var a=e.classList.length;a--;){var i=e.classList[a];if(t.test(i))return!0}return r?J(e.parentNode,t,r):!1}function Ae(e,t,r){var a=e.nodeType===e.ELEMENT_NODE?e:e.parentElement;if(a===null)return!1;if(typeof t=="string"){if(a.classList.contains(t)||a.closest(".".concat(t)))return!0}else if(J(a,t,!0))return!0;return!!(r&&(a.matches(r)||a.closest(r)))}function De(e,t,r){var a=e.contentWindow;if(a){var i=!1,n;try{n=a.document.readyState}catch{return}if(n!=="complete"){var l=setTimeout(function(){i||(t(),i=!0)},r);e.addEventListener("load",function(){clearTimeout(l),i=!0,t()});return}var u="about:blank";if(a.location.href!==u||e.src===u||e.src==="")return setTimeout(t,0),e.addEventListener("load",t);e.addEventListener("load",t)}}function Fe(e,t,r){var a=!1,i;try{i=e.sheet}catch{return}if(!i){var n=setTimeout(function(){a||(t(),a=!0)},r);e.addEventListener("load",function(){clearTimeout(n),a=!0,t()})}}function Ue(e,t){var r=t.doc,a=t.mirror,i=t.blockClass,n=t.blockSelector,l=t.maskTextClass,u=t.maskTextSelector,o=t.inlineStylesheet,f=t.maskInputOptions,c=f===void 0?{}:f,p=t.maskTextFn,C=t.maskInputFn,k=t.dataURLOptions,S=k===void 0?{}:k,T=t.inlineImages,I=t.recordCanvas,E=t.keepIframeSrcFn,m=t.newlyAddedElement,s=m===void 0?!1:m,y=We(r,a);switch(e.nodeType){case e.DOCUMENT_NODE:return e.compatMode!=="CSS1Compat"?{type:g.Document,childNodes:[],compatMode:e.compatMode}:{type:g.Document,childNodes:[]};case e.DOCUMENT_TYPE_NODE:return{type:g.DocumentType,name:e.name,publicId:e.publicId,systemId:e.systemId,rootId:y};case e.ELEMENT_NODE:return _e(e,{doc:r,blockClass:i,blockSelector:n,inlineStylesheet:o,maskInputOptions:c,maskInputFn:C,dataURLOptions:S,inlineImages:T,recordCanvas:I,keepIframeSrcFn:E,newlyAddedElement:s,rootId:y});case e.TEXT_NODE:return Pe(e,{maskTextClass:l,maskTextSelector:u,maskTextFn:p,rootId:y});case e.CDATA_SECTION_NODE:return{type:g.CDATA,textContent:"",rootId:y};case e.COMMENT_NODE:return{type:g.Comment,textContent:e.textContent||"",rootId:y};default:return!1}}function We(e,t){if(t.hasNode(e)){var r=t.getId(e);return r===1?void 0:r}}function Pe(e,t){var r,a=t.maskTextClass,i=t.maskTextSelector,n=t.maskTextFn,l=t.rootId,u=e.parentNode&&e.parentNode.tagName,o=e.textContent,f=u==="STYLE"?!0:void 0,c=u==="SCRIPT"?!0:void 0;if(f&&o){try{e.nextSibling||e.previousSibling||!((r=e.parentNode.sheet)===null||r===void 0)&&r.cssRules&&(o=Ce(e.parentNode.sheet))}catch(p){console.warn("Cannot get CSS styles from text's parentNode. Error: ".concat(p),e)}o=$(o,ee())}return c&&(o=""),!f&&!c&&o&&Ae(e,a,i)&&(o=n?n(o):o.replace(/[\S]/g,"*")),{type:g.Text,textContent:o||"",isStyle:f,rootId:l}}function _e(e,t){for(var r=t.doc,a=t.blockClass,i=t.blockSelector,n=t.inlineStylesheet,l=t.maskInputOptions,u=l===void 0?{}:l,o=t.maskInputFn,f=t.dataURLOptions,c=f===void 0?{}:f,p=t.inlineImages,C=t.recordCanvas,k=t.keepIframeSrcFn,S=t.newlyAddedElement,T=S===void 0?!1:S,I=t.rootId,E=Re(e,a,i),m=be(e),s={},y=e.attributes.length,A=0;A { +export const initializeRemoteBrowserForRecording = (userId: string, mode: string = "dom"): string => { const id = getActiveBrowserIdByState(userId, "recording") || uuid(); createSocketConnection( io.of(id), @@ -37,7 +37,15 @@ export const initializeRemoteBrowserForRecording = (userId: string): string => { browserSession.interpreter.subscribeToPausing(); await browserSession.initialize(userId); await browserSession.registerEditorEvents(); - await browserSession.subscribeToScreencast(); + + if (mode === "dom") { + await browserSession.subscribeToDOM(); + logger.info('DOM streaming started for scraping browser in recording mode'); + } else { + await browserSession.subscribeToScreencast(); + logger.info('Screenshot streaming started for local browser in recording mode'); + } + browserPool.addRemoteBrowser(id, browserSession, userId, false, "recording"); } socket.emit('loaded'); @@ -54,20 +62,23 @@ export const initializeRemoteBrowserForRecording = (userId: string): string => { * @category BrowserManagement-Controller */ export const createRemoteBrowserForRun = (userId: string): string => { - const id = uuid(); + if (!userId) { + logger.log('error', 'createRemoteBrowserForRun: Missing required parameter userId'); + throw new Error('userId is required'); + } + + const id = uuid(); + + const slotReserved = browserPool.reserveBrowserSlot(id, userId, "run"); + if (!slotReserved) { + logger.log('warn', `Cannot create browser for user ${userId}: no available slots`); + throw new Error('User has reached maximum browser limit'); + } + + logger.log('info', `createRemoteBrowserForRun: Reserved slot ${id} for user ${userId}`); + + initializeBrowserAsync(id, userId); - createSocketConnectionForRun( - io.of(id), - async (socket: Socket) => { - try { - const browserSession = new RemoteBrowser(socket, userId, id); - await browserSession.initialize(userId); - browserPool.addRemoteBrowser(id, browserSession, userId, false, "run"); - socket.emit('ready-for-run'); - } catch (error: any) { - logger.error(`Error initializing browser: ${error.message}`); - } - }); return id; }; @@ -135,6 +146,19 @@ export const getActiveBrowserIdByState = (userId: string, state: "recording" | " return browserPool.getActiveBrowserId(userId, state); }; +/** + * Checks if there are available browser slots for a user. + * Wrapper around {@link browserPool.hasAvailableBrowserSlots()} function. + * If state is provided, also checks that none of their active browsers are in that state. + * @param userId the user ID to check browser slots for + * @param state optional state to check - if provided, ensures no browser is in this state + * @returns {boolean} true if user has available slots (and no browsers in specified state if state is provided) + * @category BrowserManagement-Controller + */ +export const canCreateBrowserInState = (userId: string, state?: "recording" | "run"): boolean => { + return browserPool.hasAvailableBrowserSlots(userId, state); +}; + /** * Returns the url string from a remote browser if exists in the browser pool. * @param id instance id of the remote browser @@ -198,3 +222,87 @@ export const stopRunningInterpretation = async (userId: string) => { logger.log('error', 'Cannot stop interpretation: No active browser or generator.'); } }; + +const initializeBrowserAsync = async (id: string, userId: string) => { + try { + const namespace = io.of(id); + let clientConnected = false; + let connectionTimeout: NodeJS.Timeout; + + const waitForConnection = new Promise((resolve) => { + namespace.on('connection', (socket: Socket) => { + clientConnected = true; + clearTimeout(connectionTimeout); + logger.log('info', `Frontend connected to browser ${id} via socket ${socket.id}`); + resolve(socket); + }); + + connectionTimeout = setTimeout(() => { + if (!clientConnected) { + logger.log('warn', `No client connected to browser ${id} within timeout, proceeding with dummy socket`); + resolve(null); + } + }, 10000); + }); + + namespace.on('error', (error: any) => { + logger.log('error', `Socket namespace error for browser ${id}: ${error.message}`); + clearTimeout(connectionTimeout); + browserPool.failBrowserSlot(id); + }); + + const socket = await waitForConnection; + + try { + let browserSession: RemoteBrowser; + + if (socket) { + logger.log('info', `Using real socket for browser ${id}`); + browserSession = new RemoteBrowser(socket, userId, id); + } else { + logger.log('info', `Using dummy socket for browser ${id}`); + const dummySocket = { + emit: (event: string, data?: any) => { + logger.log('debug', `Browser ${id} dummy socket emitted ${event}:`, data); + }, + on: () => {}, + id: `dummy-${id}`, + } as any; + + browserSession = new RemoteBrowser(dummySocket, userId, id); + } + + await browserSession.initialize(userId); + + const upgraded = browserPool.upgradeBrowserSlot(id, browserSession); + if (!upgraded) { + throw new Error('Failed to upgrade reserved browser slot'); + } + + if (socket) { + socket.emit('ready-for-run'); + } else { + setTimeout(async () => { + try { + logger.log('info', `Starting execution for browser ${id} with dummy socket`); + } catch (error: any) { + logger.log('error', `Error executing run for browser ${id}: ${error.message}`); + } + }, 100); + } + + logger.log('info', `Browser ${id} successfully initialized for run with ${socket ? 'real' : 'dummy'} socket`); + + } catch (error: any) { + logger.log('error', `Error initializing browser ${id}: ${error.message}`); + browserPool.failBrowserSlot(id); + if (socket) { + socket.emit('error', { message: error.message }); + } + } + + } catch (error: any) { + logger.log('error', `Error setting up browser ${id}: ${error.message}`); + browserPool.failBrowserSlot(id); + } +}; diff --git a/server/src/browser-management/inputHandlers.ts b/server/src/browser-management/inputHandlers.ts index 9b6551a5..da48f4fd 100644 --- a/server/src/browser-management/inputHandlers.ts +++ b/server/src/browser-management/inputHandlers.ts @@ -11,6 +11,8 @@ import { WorkflowGenerator } from "../workflow-management/classes/Generator"; import { Page } from "playwright"; import { throttle } from "../../../src/helpers/inputHelpers"; import { CustomActions } from "../../../src/shared/types"; +import { WhereWhatPair } from "maxun-core"; +import { RemoteBrowser } from './classes/RemoteBrowser'; /** * A wrapper function for handling user input. @@ -27,7 +29,7 @@ import { CustomActions } from "../../../src/shared/types"; */ const handleWrapper = async ( handleCallback: ( - generator: WorkflowGenerator, + activeBrowser: RemoteBrowser, page: Page, args?: any ) => Promise, @@ -44,9 +46,9 @@ const handleWrapper = async ( const currentPage = activeBrowser?.getCurrentPage(); if (currentPage && activeBrowser) { if (args) { - await handleCallback(activeBrowser.generator, currentPage, args); + await handleCallback(activeBrowser, currentPage, args); } else { - await handleCallback(activeBrowser.generator, currentPage); + await handleCallback(activeBrowser, currentPage); } } else { logger.log('warn', `No active page for browser ${id}`); @@ -85,8 +87,19 @@ const onGenerateAction = async (customActionEventData: CustomActionEventData, us * @category BrowserManagement */ const handleGenerateAction = - async (generator: WorkflowGenerator, page: Page, { action, settings }: CustomActionEventData) => { - await generator.customAction(action, settings, page); + async (activeBrowser: RemoteBrowser, page: Page, { action, settings }: CustomActionEventData) => { + try { + if (page.isClosed()) { + logger.log("debug", `Ignoring generate action event: page is closed`); + return; + } + + const generator = activeBrowser.generator; + await generator.customAction(action, settings, page); + } catch (e) { + const { message } = e as Error; + logger.log("warn", `Error handling generate action event: ${message}`); + } } /** @@ -104,40 +117,51 @@ const onMousedown = async (coordinates: Coordinates, userId: string) => { * A mousedown event handler. * Reproduces the click on the remote browser instance * and generates pair data for the recorded workflow. - * @param generator - the workflow generator {@link Generator} + * @param activeBrowser - the active remote browser {@link RemoteBrowser} * @param page - the active page of the remote browser * @param x - the x coordinate of the mousedown event * @param y - the y coordinate of the mousedown event * @category BrowserManagement */ -const handleMousedown = async (generator: WorkflowGenerator, page: Page, { x, y }: Coordinates) => { +const handleMousedown = async (activeBrowser: RemoteBrowser, page: Page, { x, y }: Coordinates) => { + try { + if (page.isClosed()) { + logger.log("debug", `Ignoring mousedown event: page is closed`); + return; + } + + const generator = activeBrowser.generator; await generator.onClick({ x, y }, page); const previousUrl = page.url(); const tabsBeforeClick = page.context().pages().length; await page.mouse.click(x, y); // try if the click caused a navigation to a new url try { - await page.waitForNavigation({ timeout: 2000 }); - const currentUrl = page.url(); - if (currentUrl !== previousUrl) { - generator.notifyUrlChange(currentUrl); - } + await page.waitForNavigation({ timeout: 2000 }); + const currentUrl = page.url(); + if (currentUrl !== previousUrl) { + generator.notifyUrlChange(currentUrl); + } } catch (e) { - const { message } = e as Error; + const { message } = e as Error; } //ignore possible timeouts // check if any new page was opened by the click const tabsAfterClick = page.context().pages().length; const numOfNewPages = tabsAfterClick - tabsBeforeClick; if (numOfNewPages > 0) { - for (let i = 1; i <= numOfNewPages; i++) { - const newPage = page.context().pages()[tabsAfterClick - i]; - if (newPage) { - generator.notifyOnNewTab(newPage, tabsAfterClick - i); - } + for (let i = 1; i <= numOfNewPages; i++) { + const newPage = page.context().pages()[tabsAfterClick - i]; + if (newPage) { + generator.notifyOnNewTab(newPage, tabsAfterClick - i); } + } } - logger.log('debug', `Clicked on position x:${x}, y:${y}`); + logger.log("debug", `Clicked on position x:${x}, y:${y}`); + } catch (e) { + const { message } = e as Error; + logger.log("warn", `Error handling mousedown event: ${message}`); + } }; /** @@ -156,15 +180,16 @@ const onWheel = async (scrollDeltas: ScrollDeltas, userId: string) => { * Reproduces the wheel event on the remote browser instance. * Scroll is not generated for the workflow pair. This is because * Playwright scrolls elements into focus on any action. - * @param generator - the workflow generator {@link Generator} + * @param activeBrowser - the active remote browser {@link RemoteBrowser} * @param page - the active page of the remote browser * @param deltaX - the delta x of the wheel event * @param deltaY - the delta y of the wheel event * @category BrowserManagement */ -const handleWheel = async (generator: WorkflowGenerator, page: Page, { deltaX, deltaY }: ScrollDeltas) => { +const handleWheel = async (activeBrowser: RemoteBrowser, page: Page, { deltaX, deltaY }: ScrollDeltas) => { try { if (page.isClosed()) { + logger.log("debug", `Ignoring wheel event: page is closed`); return; } @@ -194,28 +219,30 @@ const onMousemove = async (coordinates: Coordinates, userId: string) => { * Reproduces the mousemove event on the remote browser instance * and generates data for the client's highlighter. * Mousemove is also not reflected in the workflow. - * @param generator - the workflow generator {@link Generator} + * @param activeBrowser - the active remote browser {@link RemoteBrowser} * @param page - the active page of the remote browser * @param x - the x coordinate of the mousemove event * @param y - the y coordinate of the mousemove event * @category BrowserManagement */ -const handleMousemove = async (generator: WorkflowGenerator, page: Page, { x, y }: Coordinates) => { +const handleMousemove = async (activeBrowser: RemoteBrowser, page: Page, { x, y }: Coordinates) => { try { if (page.isClosed()) { - logger.log('debug', `Ignoring mousemove event: page is closed`); + logger.log("debug", `Ignoring mousemove event: page is closed`); return; } + + const generator = activeBrowser.generator; await page.mouse.move(x, y); throttle(async () => { if (!page.isClosed()) { await generator.generateDataForHighlighter(page, { x, y }); } }, 100)(); - logger.log('debug', `Moved over position x:${x}, y:${y}`); + logger.log("debug", `Moved over position x:${x}, y:${y}`); } catch (e) { const { message } = e as Error; - logger.log('error', message); + logger.log("error", message); } } @@ -234,28 +261,50 @@ const onKeydown = async (keyboardInput: KeyboardInput, userId: string) => { * A keydown event handler. * Reproduces the keydown event on the remote browser instance * and generates the workflow pair data. - * @param generator - the workflow generator {@link Generator} + * @param activeBrowser - the active remote browser {@link RemoteBrowser} * @param page - the active page of the remote browser * @param key - the pressed key * @param coordinates - the coordinates, where the keydown event happened * @category BrowserManagement */ -const handleKeydown = async (generator: WorkflowGenerator, page: Page, { key, coordinates }: KeyboardInput) => { - await page.keyboard.down(key); - await generator.onKeyboardInput(key, coordinates, page); - logger.log('debug', `Key ${key} pressed`); +const handleKeydown = async (activeBrowser: RemoteBrowser, page: Page, { key, coordinates }: KeyboardInput) => { + try { + if (page.isClosed()) { + logger.log("debug", `Ignoring keydown event: page is closed`); + return; + } + + const generator = activeBrowser.generator; + await page.keyboard.down(key); + await generator.onKeyboardInput(key, coordinates, page); + logger.log("debug", `Key ${key} pressed`); + } catch (e) { + const { message } = e as Error; + logger.log("warn", `Error handling keydown event: ${message}`); + } }; /** * Handles the date selection event. - * @param generator - the workflow generator {@link Generator} + * @param activeBrowser - the active remote browser {@link RemoteBrowser} * @param page - the active page of the remote browser * @param data - the data of the date selection event {@link DatePickerEventData} * @category BrowserManagement */ -const handleDateSelection = async (generator: WorkflowGenerator, page: Page, data: DatePickerEventData) => { - await generator.onDateSelection(page, data); - logger.log('debug', `Date ${data.value} selected`); +const handleDateSelection = async (activeBrowser: RemoteBrowser, page: Page, data: DatePickerEventData) => { + try { + if (page.isClosed()) { + logger.log("debug", `Ignoring date selection event: page is closed`); + return; + } + + const generator = activeBrowser.generator; + await generator.onDateSelection(page, data); + logger.log("debug", `Date ${data.value} selected`); + } catch (e) { + const { message } = e as Error; + logger.log("warn", `Error handling date selection event: ${message}`); + } } /** @@ -271,14 +320,25 @@ const onDateSelection = async (data: DatePickerEventData, userId: string) => { /** * Handles the dropdown selection event. - * @param generator - the workflow generator {@link Generator} + * @param activeBrowser - the active remote browser {@link RemoteBrowser} * @param page - the active page of the remote browser * @param data - the data of the dropdown selection event * @category BrowserManagement */ -const handleDropdownSelection = async (generator: WorkflowGenerator, page: Page, data: { selector: string, value: string }) => { - await generator.onDropdownSelection(page, data); - logger.log('debug', `Dropdown value ${data.value} selected`); +const handleDropdownSelection = async (activeBrowser: RemoteBrowser, page: Page, data: { selector: string, value: string }) => { + try { + if (page.isClosed()) { + logger.log("debug", `Ignoring dropdown selection event: page is closed`); + return; + } + + const generator = activeBrowser.generator; + await generator.onDropdownSelection(page, data); + logger.log("debug", `Dropdown value ${data.value} selected`); + } catch (e) { + const { message } = e as Error; + logger.log("warn", `Error handling dropdown selection event: ${message}`); + } } /** @@ -294,14 +354,25 @@ const onDropdownSelection = async (data: { selector: string, value: string }, us /** * Handles the time selection event. - * @param generator - the workflow generator {@link Generator} + * @param activeBrowser - the active remote browser {@link RemoteBrowser} * @param page - the active page of the remote browser * @param data - the data of the time selection event * @category BrowserManagement */ -const handleTimeSelection = async (generator: WorkflowGenerator, page: Page, data: { selector: string, value: string }) => { - await generator.onTimeSelection(page, data); - logger.log('debug', `Time value ${data.value} selected`); +const handleTimeSelection = async (activeBrowser: RemoteBrowser, page: Page, data: { selector: string, value: string }) => { + try { + if (page.isClosed()) { + logger.log("debug", `Ignoring time selection event: page is closed`); + return; + } + + const generator = activeBrowser.generator; + await generator.onTimeSelection(page, data); + logger.log("debug", `Time value ${data.value} selected`); + } catch (e) { + const { message } = e as Error; + logger.log("warn", `Error handling time selection event: ${message}`); + } } /** @@ -317,14 +388,31 @@ const onTimeSelection = async (data: { selector: string, value: string }, userId /** * Handles the datetime-local selection event. - * @param generator - the workflow generator {@link Generator} + * @param activeBrowser - the active remote browser {@link RemoteBrowser} * @param page - the active page of the remote browser * @param data - the data of the datetime-local selection event * @category BrowserManagement */ -const handleDateTimeLocalSelection = async (generator: WorkflowGenerator, page: Page, data: { selector: string, value: string }) => { - await generator.onDateTimeLocalSelection(page, data); - logger.log('debug', `DateTime Local value ${data.value} selected`); +const handleDateTimeLocalSelection = async (activeBrowser: RemoteBrowser, page: Page, data: { selector: string, value: string }) => { + try { + if (page.isClosed()) { + logger.log( + "debug", + `Ignoring datetime-local selection event: page is closed` + ); + return; + } + + const generator = activeBrowser.generator; + await generator.onDateTimeLocalSelection(page, data); + logger.log("debug", `DateTime Local value ${data.value} selected`); + } catch (e) { + const { message } = e as Error; + logger.log( + "warn", + `Error handling datetime-local selection event: ${message}` + ); + } } /** @@ -353,14 +441,24 @@ const onKeyup = async (keyboardInput: KeyboardInput, userId: string) => { * A keyup event handler. * Reproduces the keyup event on the remote browser instance. * Does not generate any data - keyup is not reflected in the workflow. - * @param generator - the workflow generator {@link Generator} + * @param activeBrowser - the active remote browser {@link RemoteBrowser} * @param page - the active page of the remote browser * @param key - the released key * @category BrowserManagement */ -const handleKeyup = async (generator: WorkflowGenerator, page: Page, key: string) => { - await page.keyboard.up(key); - logger.log('debug', `Key ${key} unpressed`); +const handleKeyup = async (activeBrowser: RemoteBrowser, page: Page, key: string) => { + try { + if (page.isClosed()) { + logger.log("debug", `Ignoring keyup event: page is closed`); + return; + } + + await page.keyboard.up(key); + logger.log("debug", `Key ${key} unpressed`); + } catch (e) { + const { message } = e as Error; + logger.log("warn", `Error handling keyup event: ${message}`); + } }; /** @@ -377,23 +475,36 @@ const onChangeUrl = async (url: string, userId: string) => { /** * An url change event handler. * Navigates the page to the given url and generates data for the workflow. - * @param generator - the workflow generator {@link Generator} + * @param activeBrowser - the active remote browser {@link RemoteBrowser} * @param page - the active page of the remote browser * @param url - the new url of the page * @category BrowserManagement */ -const handleChangeUrl = async (generator: WorkflowGenerator, page: Page, url: string) => { - if (url) { - await generator.onChangeUrl(url, page); - try { - await page.goto(url, { waitUntil: 'networkidle', timeout: 10000 }); - logger.log('debug', `Went to ${url}`); - } catch (e) { - const { message } = e as Error; - logger.log('error', message); +const handleChangeUrl = async (activeBrowser: RemoteBrowser, page: Page, url: string) => { + try { + if (page.isClosed()) { + logger.log("debug", `Ignoring change url event: page is closed`); + return; } - } else { - logger.log('warn', `No url provided`); + + if (url) { + const generator = activeBrowser.generator; + await generator.onChangeUrl(url, page); + + try { + await page.goto(url, { waitUntil: "domcontentloaded", timeout: 30000 }); + await page.waitForTimeout(2000); + logger.log("debug", `Went to ${url}`); + } catch (e) { + const { message } = e as Error; + logger.log("error", message); + } + } else { + logger.log("warn", `No url provided`); + } + } catch (e) { + const { message } = e as Error; + logger.log("warn", `Error handling change url event: ${message}`); } }; @@ -410,13 +521,23 @@ const onRefresh = async (userId: string) => { /** * A refresh event handler. * Refreshes the page. This is not reflected in the workflow. - * @param generator - the workflow generator {@link Generator} + * @param activeBrowser - the active remote browser {@link RemoteBrowser} * @param page - the active page of the remote browser * @category BrowserManagement */ -const handleRefresh = async (generator: WorkflowGenerator, page: Page) => { - await page.reload(); - logger.log('debug', `Page refreshed.`); +const handleRefresh = async (activeBrowser: RemoteBrowser, page: Page) => { + try { + if (page.isClosed()) { + logger.log("debug", `Ignoring refresh event: page is closed`); + return; + } + + await page.reload(); + logger.log("debug", `Page refreshed.`); + } catch (e) { + const { message } = e as Error; + logger.log("warn", `Error handling refresh event: ${message}`); + } }; /** @@ -432,14 +553,25 @@ const onGoBack = async (userId: string) => { /** * A go back event handler. * Navigates the page back and generates data for the workflow. - * @param generator - the workflow generator {@link Generator} + * @param activeBrowser - the active remote browser {@link RemoteBrowser} * @param page - the active page of the remote browser * @category BrowserManagement */ -const handleGoBack = async (generator: WorkflowGenerator, page: Page) => { - await page.goBack({ waitUntil: 'commit' }); - generator.onGoBack(page.url()); - logger.log('debug', 'Page went back') +const handleGoBack = async (activeBrowser: RemoteBrowser, page: Page) => { + try { + if (page.isClosed()) { + logger.log("debug", `Ignoring go back event: page is closed`); + return; + } + + const generator = activeBrowser.generator; + await page.goBack({ waitUntil: "commit" }); + generator.onGoBack(page.url()); + logger.log("debug", "Page went back"); + } catch (e) { + const { message } = e as Error; + logger.log("warn", `Error handling go back event: ${message}`); + } }; /** @@ -455,14 +587,209 @@ const onGoForward = async (userId: string) => { /** * A go forward event handler. * Navigates the page forward and generates data for the workflow. - * @param generator - the workflow generator {@link Generator} + * @param activeBrowser - the active remote browser {@link RemoteBrowser} * @param page - the active page of the remote browser * @category BrowserManagement */ -const handleGoForward = async (generator: WorkflowGenerator, page: Page) => { - await page.goForward({ waitUntil: 'commit' }); - generator.onGoForward(page.url()); - logger.log('debug', 'Page went forward'); +const handleGoForward = async (activeBrowser: RemoteBrowser, page: Page) => { + try { + if (page.isClosed()) { + logger.log("debug", `Ignoring go forward event: page is closed`); + return; + } + + const generator = activeBrowser.generator; + await page.goForward({ waitUntil: "commit" }); + generator.onGoForward(page.url()); + logger.log("debug", "Page went forward"); + } catch (e) { + const { message } = e as Error; + logger.log("warn", `Error handling go forward event: ${message}`); + } +}; + +/** + * Handles the click action event. + * @param activeBrowser - the active remote browser {@link RemoteBrowser} + * @param page - the active page of the remote browser + * @param data - the data of the click action event + * @category BrowserManagement + */ +const handleClickAction = async ( + activeBrowser: RemoteBrowser, + page: Page, + data: { + selector: string; + url: string; + userId: string; + elementInfo?: any; + coordinates?: { x: number; y: number }; + isSPA?: boolean; + } +) => { + try { + if (page.isClosed()) { + logger.log("debug", `Ignoring click action event: page is closed`); + return; + } + + const { selector, url, elementInfo, coordinates, isSPA = false } = data; + const currentUrl = page.url(); + + await page.click(selector); + + const generator = activeBrowser.generator; + await generator.onDOMClickAction(page, data); + + logger.log("debug", `Click action processed: ${selector}`); + + if (isSPA) { + logger.log("debug", `SPA interaction detected for selector: ${selector}`); + + await new Promise((resolve) => setTimeout(resolve, 1500)); + } else { + const newUrl = page.url(); + const hasNavigated = newUrl !== currentUrl && !newUrl.endsWith("/#"); + + if (hasNavigated) { + logger.log("debug", `Navigation detected: ${currentUrl} -> ${newUrl}`); + + await generator.onDOMNavigation(page, { + url: newUrl, + currentUrl: currentUrl, + userId: data.userId, + }); + } + } + + await new Promise((resolve) => setTimeout(resolve, 2000)); + await activeBrowser.makeAndEmitDOMSnapshot(); + } catch (e) { + const { message } = e as Error; + logger.log( + "warn", + `Error handling enhanced click action event: ${message}` + ); + } +}; + +/** + * A wrapper function for handling the click action event. + * @param socket The socket connection + * @param data - the data of the click action event + * @category HelperFunctions + */ +const onDOMClickAction = async ( + data: { + selector: string; + url: string; + userId: string; + elementInfo?: any; + coordinates?: { x: number; y: number }; + }, + userId: string +) => { + logger.log("debug", "Handling click action event emitted from client"); + await handleWrapper(handleClickAction, userId, data); +}; + +/** + * Handles the keyboard action event. + * @param activeBrowser - the active remote browser {@link RemoteBrowser} + * @param page - the active page of the remote browser + * @param data - the data of the keyboard action event + * @category BrowserManagement + */ +const handleKeyboardAction = async ( + activeBrowser: RemoteBrowser, + page: Page, + data: { + selector: string; + key: string; + url: string; + userId: string; + inputType?: string; + } +) => { + try { + if (page.isClosed()) { + logger.log("debug", `Ignoring keyboard action event: page is closed`); + return; + } + + const generator = activeBrowser.generator; + + await page.press(data.selector, data.key); + await generator.onDOMKeyboardAction(page, data); + logger.log( + "debug", + `Keyboard action processed: ${data.key} on ${data.selector}` + ); + } catch (e) { + const { message } = e as Error; + logger.log("warn", `Error handling keyboard action event: ${message}`); + } +}; + +/** + * A wrapper function for handling the keyboard action event. + * @param socket The socket connection + * @param data - the data of the keyboard action event + * @category HelperFunctions + */ +const onDOMKeyboardAction = async ( + data: { + selector: string; + key: string; + url: string; + userId: string; + inputType?: string; + }, + userId: string +) => { + logger.log("debug", "Handling keyboard action event emitted from client"); + await handleWrapper(handleKeyboardAction, userId, data); +}; + +/** + * Handles the workflow pair event. + * @param activeBrowser - the active remote browser {@link RemoteBrowser} + * @param page - the active page of the remote browser + * @param data - the data of the workflow pair event + * @category BrowserManagement + */ +const handleWorkflowPair = async ( + activeBrowser: RemoteBrowser, + page: Page, + data: { pair: WhereWhatPair; userId: string } +) => { + try { + if (page.isClosed()) { + logger.log("debug", `Ignoring workflow pair event: page is closed`); + return; + } + + const generator = activeBrowser.generator; + await generator.onDOMWorkflowPair(page, data); + logger.log("debug", `Workflow pair processed from frontend`); + } catch (e) { + const { message } = e as Error; + logger.log("warn", `Error handling workflow pair event: ${message}`); + } +}; + +/** + * A wrapper function for handling the workflow pair event. + * @param socket The socket connection + * @param data - the data of the workflow pair event + * @category HelperFunctions + */ +const onDOMWorkflowPair = async ( + data: { pair: WhereWhatPair; userId: string }, + userId: string +) => { + logger.log("debug", "Handling workflow pair event emitted from client"); + await handleWrapper(handleWorkflowPair, userId, data); }; /** @@ -493,6 +820,10 @@ const registerInputHandlers = (socket: Socket, userId: string) => { socket.on("input:time", (data) => onTimeSelection(data, userId)); socket.on("input:datetime-local", (data) => onDateTimeLocalSelection(data, userId)); socket.on("action", (data) => onGenerateAction(data, userId)); + + socket.on("dom:click", (data) => onDOMClickAction(data, userId)); + socket.on("dom:keypress", (data) => onDOMKeyboardAction(data, userId)); + socket.on("dom:addpair", (data) => onDOMWorkflowPair(data, userId)); }; export default registerInputHandlers; diff --git a/server/src/db/config/database.js b/server/src/db/config/database.js index ae6972d1..4607c899 100644 --- a/server/src/db/config/database.js +++ b/server/src/db/config/database.js @@ -1,4 +1,4 @@ -import dotenv from 'dotenv'; +const dotenv = require('dotenv'); dotenv.config({ path: './.env' }); // Validate required environment variables diff --git a/server/src/db/migrations/20250527105655-add-webhooks.js b/server/src/db/migrations/20250527105655-add-webhooks.js new file mode 100644 index 00000000..60eefd19 --- /dev/null +++ b/server/src/db/migrations/20250527105655-add-webhooks.js @@ -0,0 +1,27 @@ +'use strict'; + +module.exports = { + async up(queryInterface, Sequelize) { + await queryInterface.addColumn('robot', 'webhooks', { + type: Sequelize.JSONB, + allowNull: true, + defaultValue: null, + comment: 'Webhook configurations for the robot' + }); + + // Optional: Add an index for better query performance if you plan to search within webhook data + await queryInterface.addIndex('robot', { + fields: ['webhooks'], + using: 'gin', // GIN index for JSONB columns + name: 'robot_webhooks_gin_idx' + }); + }, + + async down(queryInterface, Sequelize) { + // Remove the index first + await queryInterface.removeIndex('robot', 'robot_webhooks_gin_idx'); + + // Then remove the column + await queryInterface.removeColumn('robot', 'webhooks'); + } +}; \ No newline at end of file diff --git a/server/src/mcp-worker.ts b/server/src/mcp-worker.ts new file mode 100644 index 00000000..259ef2fd --- /dev/null +++ b/server/src/mcp-worker.ts @@ -0,0 +1,373 @@ +import { McpServer } from "@modelcontextprotocol/sdk/server/mcp.js"; +import { StdioServerTransport } from "@modelcontextprotocol/sdk/server/stdio.js"; +import { z } from "zod"; +import fetch from 'node-fetch'; +import dotenv from 'dotenv'; + +dotenv.config(); + +const log = (message: string) => { + if (process.env.NODE_ENV !== 'production') { + console.error(`[MCP Worker] ${message}`); + } +}; + +class MaxunMCPWorker { + private mcpServer: McpServer; + private apiKey: string; + private apiUrl: string; + + constructor() { + this.apiKey = process.env.MCP_API_KEY || ''; + this.apiUrl = process.env.BACKEND_URL || 'http://localhost:8080'; + + if (!this.apiKey) { + throw new Error('MCP_API_KEY environment variable is required'); + } + + this.mcpServer = new McpServer({ + name: 'Maxun Web Scraping Server', + version: '1.0.0' + }); + + this.setupTools(); + } + + private async makeApiRequest(endpoint: string, options: any = {}) { + const url = `${this.apiUrl}${endpoint}`; + const headers = { + 'Content-Type': 'application/json', + 'x-api-key': this.apiKey, + ...options.headers + }; + + const response = await fetch(url, { + ...options, + headers + }); + + if (!response.ok) { + throw new Error(`API request failed: ${response.status} ${response.statusText}`); + } + + return await response.json(); + } + + private setupTools() { + // Tool: List all robots + this.mcpServer.tool( + "list_robots", + {}, + async () => { + try { + const data = await this.makeApiRequest('/api/robots'); + + return { + content: [{ + type: "text", + text: `Found ${data.robots.totalCount} robots:\n\n${JSON.stringify(data.robots.items, null, 2)}` + }] + }; + } catch (error: any) { + return { + content: [{ + type: "text", + text: `Error fetching robots: ${error.message}` + }], + isError: true + }; + } + } + ); + + // Tool: Get robot details by ID + this.mcpServer.tool( + "get_robot", + { + robot_id: z.string().describe("ID of the robot to get details for") + }, + async ({ robot_id }: { robot_id: string }) => { + try { + const data = await this.makeApiRequest(`/api/robots/${robot_id}`); + + return { + content: [{ + type: "text", + text: `Robot Details:\n\n${JSON.stringify(data.robot, null, 2)}` + }] + }; + } catch (error: any) { + return { + content: [{ + type: "text", + text: `Error fetching robot: ${error.message}` + }], + isError: true + }; + } + } + ); + + // Tool: Run a robot and get results + this.mcpServer.tool( + "run_robot", + { + robot_id: z.string().describe("ID of the robot to run"), + wait_for_completion: z.boolean().default(true).describe("Whether to wait for the run to complete") + }, + async ({ robot_id, wait_for_completion }: { robot_id: string; wait_for_completion: boolean }) => { + try { + const data = await this.makeApiRequest(`/api/robots/${robot_id}/runs`, { + method: 'POST' + }); + + if (wait_for_completion) { + const extractedData = data.run.data; + const screenshots = data.run.screenshots; + + let resultText = `Robot run completed successfully!\n\n`; + resultText += `Run ID: ${data.run.runId}\n`; + resultText += `Status: ${data.run.status}\n`; + resultText += `Started: ${data.run.startedAt}\n`; + resultText += `Finished: ${data.run.finishedAt}\n\n`; + + if (extractedData.textData && extractedData.textData.length > 0) { + resultText += `Extracted Text Data (${extractedData.textData.length} items):\n`; + resultText += JSON.stringify(extractedData.textData, null, 2) + '\n\n'; + } + + if (extractedData.listData && extractedData.listData.length > 0) { + resultText += `Extracted List Data (${extractedData.listData.length} items):\n`; + resultText += JSON.stringify(extractedData.listData, null, 2) + '\n\n'; + } + + if (screenshots && screenshots.length > 0) { + resultText += `Screenshots captured: ${screenshots.length}\n`; + resultText += `Screenshot URLs:\n`; + screenshots.forEach((screenshot: any, index: any) => { + resultText += `${index + 1}. ${screenshot}\n`; + }); + } + + return { + content: [{ + type: "text", + text: resultText + }] + }; + } else { + return { + content: [{ + type: "text", + text: `Robot run started! Run ID: ${data.run.runId}\nStatus: ${data.run.status}` + }] + }; + } + } catch (error: any) { + return { + content: [{ + type: "text", + text: `Error running robot: ${error.message}` + }], + isError: true + }; + } + } + ); + + // Tool: Get all runs for a robot + this.mcpServer.tool( + "get_robot_runs", + { + robot_id: z.string().describe("ID of the robot") + }, + async ({ robot_id }: { robot_id: string }) => { + try { + const data = await this.makeApiRequest(`/api/robots/${robot_id}/runs`); + + return { + content: [{ + type: "text", + text: `Robot runs (${data.runs.totalCount} total):\n\n${JSON.stringify(data.runs.items, null, 2)}` + }] + }; + } catch (error: any) { + return { + content: [{ + type: "text", + text: `Error fetching runs: ${error.message}` + }], + isError: true + }; + } + } + ); + + // Tool: Get specific run details + this.mcpServer.tool( + "get_run_details", + { + robot_id: z.string().describe("ID of the robot"), + run_id: z.string().describe("ID of the specific run") + }, + async ({ robot_id, run_id }: { robot_id: string; run_id: string }) => { + try { + const data = await this.makeApiRequest(`/api/robots/${robot_id}/runs/${run_id}`); + + const run = data.run; + let resultText = `Run Details:\n\n`; + resultText += `Run ID: ${run.runId}\n`; + resultText += `Status: ${run.status}\n`; + resultText += `Robot ID: ${run.robotId}\n`; + resultText += `Started: ${run.startedAt}\n`; + resultText += `Finished: ${run.finishedAt}\n\n`; + + if (run.data.textData && run.data.textData.length > 0) { + resultText += `Extracted Text Data:\n${JSON.stringify(run.data.textData, null, 2)}\n\n`; + } + + if (run.data.listData && run.data.listData.length > 0) { + resultText += `Extracted List Data:\n${JSON.stringify(run.data.listData, null, 2)}\n\n`; + } + + if (run.screenshots && run.screenshots.length > 0) { + resultText += `Screenshots:\n`; + run.screenshots.forEach((screenshot: any, index: any) => { + resultText += `${index + 1}. ${screenshot}\n`; + }); + } + + return { + content: [{ + type: "text", + text: resultText + }] + }; + } catch (error: any) { + return { + content: [{ + type: "text", + text: `Error fetching run details: ${error.message}` + }], + isError: true + }; + } + } + ); + + // Tool: Get robot performance summary + this.mcpServer.tool( + "get_robot_summary", + { + robot_id: z.string().describe("ID of the robot") + }, + async ({ robot_id }: { robot_id: string }) => { + try { + const [robotData, runsData] = await Promise.all([ + this.makeApiRequest(`/api/robots/${robot_id}`), + this.makeApiRequest(`/api/robots/${robot_id}/runs`) + ]); + + const robot = robotData.robot; + const runs = runsData.runs.items; + + const successfulRuns = runs.filter((run: any) => run.status === 'success'); + const failedRuns = runs.filter((run: any) => run.status === 'failed'); + + let totalTextItems = 0; + let totalListItems = 0; + let totalScreenshots = 0; + + successfulRuns.forEach((run: any) => { + if (run.data.textData) totalTextItems += run.data.textData.length; + if (run.data.listData) totalListItems += run.data.listData.length; + if (run.screenshots) totalScreenshots += run.screenshots.length; + }); + + const summary = `Robot Performance Summary: + +Robot Name: ${robot.name} +Robot ID: ${robot.id} +Created: ${robot.createdAt ? new Date(robot.createdAt).toLocaleString() : 'N/A'} + +Performance Metrics: +- Total Runs: ${runs.length} +- Successful Runs: ${successfulRuns.length} +- Failed Runs: ${failedRuns.length} +- Success Rate: ${runs.length > 0 ? ((successfulRuns.length / runs.length) * 100).toFixed(1) : 0}% + +Data Extracted: +- Total Text Items: ${totalTextItems} +- Total List Items: ${totalListItems} +- Total Screenshots: ${totalScreenshots} +- Total Data Points: ${totalTextItems + totalListItems} + +Input Parameters: +${JSON.stringify(robot.inputParameters, null, 2)}`; + + return { + content: [{ + type: "text", + text: summary + }] + }; + } catch (error: any) { + return { + content: [{ + type: "text", + text: `Error generating robot summary: ${error.message}` + }], + isError: true + }; + } + } + ); + } + + async start() { + try { + const transport = new StdioServerTransport(); + await this.mcpServer.connect(transport); + log('Maxun MCP Worker connected and ready'); + } catch (error: any) { + log(`Failed to start MCP Worker: ${error.message}`); + throw error; + } + } + + async stop() { + try { + await this.mcpServer.close(); + log('Maxun MCP Worker stopped'); + } catch (error: any) { + log(`Error stopping MCP Worker: ${error.message}`); + } + } +} + +async function main() { + try { + const worker = new MaxunMCPWorker(); + await worker.start(); + + // Handle graceful shutdown + process.on('SIGTERM', async () => { + await worker.stop(); + process.exit(0); + }); + + process.on('SIGINT', async () => { + await worker.stop(); + process.exit(0); + }); + + } catch (error) { + console.error('Failed to start MCP Worker:', error); + process.exit(1); + } +} + +// Only start if this is run as a worker or directly +if (process.env.MCP_WORKER === 'true' || require.main === module) { + main(); +} \ No newline at end of file diff --git a/server/src/models/Robot.ts b/server/src/models/Robot.ts index 1681eaac..eae9438e 100644 --- a/server/src/models/Robot.ts +++ b/server/src/models/Robot.ts @@ -15,6 +15,19 @@ interface RobotWorkflow { workflow: WhereWhatPair[]; } +interface WebhookConfig { + id: string; + url: string; + events: string[]; + active: boolean; + createdAt: string; + updatedAt: string; + lastCalledAt?: string | null; + retryAttempts?: number; + retryDelay?: number; + timeout?: number; +} + interface RobotAttributes { id: string; userId?: number; @@ -32,6 +45,7 @@ interface RobotAttributes { airtable_refresh_token?: string | null; schedule?: ScheduleConfig | null; airtable_table_id?: string | null; + webhooks?: WebhookConfig[] | null; } interface ScheduleConfig { @@ -66,6 +80,7 @@ class Robot extends Model implements R public airtable_refresh_token!: string | null; public airtable_table_id!: string | null; public schedule!: ScheduleConfig | null; + public webhooks!: WebhookConfig[] | null; } Robot.init( @@ -135,6 +150,11 @@ Robot.init( type: DataTypes.JSONB, allowNull: true, }, + webhooks: { + type: DataTypes.JSONB, + allowNull: true, + defaultValue: null, + }, }, { sequelize, diff --git a/server/src/pgboss-worker.ts b/server/src/pgboss-worker.ts index 54a70697..2c3ae1ac 100644 --- a/server/src/pgboss-worker.ts +++ b/server/src/pgboss-worker.ts @@ -20,6 +20,7 @@ import { googleSheetUpdateTasks, processGoogleSheetUpdates } from './workflow-ma import { airtableUpdateTasks, processAirtableUpdates } from './workflow-management/integrations/airtable'; import { RemoteBrowser } from './browser-management/classes/RemoteBrowser'; import { io as serverIo } from "./server"; +import { sendWebhook } from './routes/webhook'; if (!process.env.DB_USER || !process.env.DB_PASSWORD || !process.env.DB_HOST || !process.env.DB_PORT || !process.env.DB_NAME) { throw new Error('Failed to start pgboss worker: one or more required environment variables are missing.'); @@ -55,7 +56,10 @@ interface AbortRunData { runId: string; } -const pgBoss = new PgBoss({connectionString: pgBossConnectionString }); +const pgBoss = new PgBoss({ + connectionString: pgBossConnectionString, + expireInHours: 23 +}); /** * Extract data safely from a job (single job or job array) @@ -82,90 +86,128 @@ function AddGeneratedFlags(workflow: WorkflowFile) { }; /** - * Function to reset browser state without creating a new browser + * Helper function to extract and process scraped data from browser interpreter */ -async function resetBrowserState(browser: RemoteBrowser): Promise { - try { - const currentPage = browser.getCurrentPage(); - if (!currentPage) { - logger.log('error', 'No current page available to reset browser state'); - return false; - } - - // Navigate to blank page to reset state - await currentPage.goto('about:blank', { waitUntil: 'networkidle', timeout: 10000 }); - - // Clear browser storage - await currentPage.evaluate(() => { - try { - localStorage.clear(); - sessionStorage.clear(); - } catch (e) { - // Ignore errors in cleanup +async function extractAndProcessScrapedData( + browser: RemoteBrowser, + run: any +): Promise<{ + categorizedOutput: any; + uploadedBinaryOutput: any; + totalDataPointsExtracted: number; + totalSchemaItemsExtracted: number; + totalListItemsExtracted: number; + extractedScreenshotsCount: number; +}> { + let categorizedOutput: { + scrapeSchema: Record; + scrapeList: Record; + } = { + scrapeSchema: {}, + scrapeList: {} + }; + + if ((browser?.interpreter?.serializableDataByType?.scrapeSchema ?? []).length > 0) { + browser?.interpreter?.serializableDataByType?.scrapeSchema?.forEach((schemaItem: any, index: any) => { + categorizedOutput.scrapeSchema[`schema-${index}`] = schemaItem; + }); + } + + if ((browser?.interpreter?.serializableDataByType?.scrapeList ?? []).length > 0) { + browser?.interpreter?.serializableDataByType?.scrapeList?.forEach((listItem: any, index: any) => { + categorizedOutput.scrapeList[`list-${index}`] = listItem; + }); + } + + const binaryOutput = browser?.interpreter?.binaryData?.reduce( + (reducedObject: Record, item: any, index: number): Record => { + return { + [`item-${index}`]: item, + ...reducedObject, + }; + }, + {} + ) || {}; + + let totalDataPointsExtracted = 0; + let totalSchemaItemsExtracted = 0; + let totalListItemsExtracted = 0; + let extractedScreenshotsCount = 0; + + if (categorizedOutput.scrapeSchema) { + Object.values(categorizedOutput.scrapeSchema).forEach((schemaResult: any) => { + if (Array.isArray(schemaResult)) { + schemaResult.forEach(obj => { + if (obj && typeof obj === 'object') { + totalDataPointsExtracted += Object.keys(obj).length; + } + }); + totalSchemaItemsExtracted += schemaResult.length; + } else if (schemaResult && typeof schemaResult === 'object') { + totalDataPointsExtracted += Object.keys(schemaResult).length; + totalSchemaItemsExtracted += 1; } }); - - // Clear cookies - const context = currentPage.context(); - await context.clearCookies(); - - return true; - } catch (error) { - logger.log('error', `Failed to reset browser state`); - return false; } + + if (categorizedOutput.scrapeList) { + Object.values(categorizedOutput.scrapeList).forEach((listResult: any) => { + if (Array.isArray(listResult)) { + listResult.forEach(obj => { + if (obj && typeof obj === 'object') { + totalDataPointsExtracted += Object.keys(obj).length; + } + }); + totalListItemsExtracted += listResult.length; + } + }); + } + + if (binaryOutput) { + extractedScreenshotsCount = Object.keys(binaryOutput).length; + totalDataPointsExtracted += extractedScreenshotsCount; + } + + const binaryOutputService = new BinaryOutputService('maxun-run-screenshots'); + const uploadedBinaryOutput = await binaryOutputService.uploadAndStoreBinaryOutput( + run, + binaryOutput + ); + + return { + categorizedOutput: { + scrapeSchema: categorizedOutput.scrapeSchema || {}, + scrapeList: categorizedOutput.scrapeList || {} + }, + uploadedBinaryOutput, + totalDataPointsExtracted, + totalSchemaItemsExtracted, + totalListItemsExtracted, + extractedScreenshotsCount + }; } -/** - * Modified checkAndProcessQueuedRun function - only changes browser reset logic - */ -async function checkAndProcessQueuedRun(userId: string, browserId: string): Promise { +// Helper function to handle integration updates +async function triggerIntegrationUpdates(runId: string, robotMetaId: string): Promise { try { - // Find the oldest queued run for this specific browser - const queuedRun = await Run.findOne({ - where: { - browserId: browserId, - runByUserId: userId, - status: 'queued' - }, - order: [['startedAt', 'ASC']] - }); - - if (!queuedRun) { - logger.log('info', `No queued runs found for browser ${browserId}`); - return false; - } - - // Reset the browser state before next run - const browser = browserPool.getRemoteBrowser(browserId); - if (browser) { - logger.log('info', `Resetting browser state for browser ${browserId} before next run`); - await resetBrowserState(browser); - } - - // Update the queued run to running status - await queuedRun.update({ - status: 'running', - log: 'Run started - using browser from previous run' - }); - - // Use user-specific queue - const userQueueName = `execute-run-user-${userId}`; - - // Schedule the run execution - await pgBoss.createQueue(userQueueName); - const executeJobId = await pgBoss.send(userQueueName, { - userId: userId, - runId: queuedRun.runId, - browserId: browserId - }); - - logger.log('info', `Scheduled queued run ${queuedRun.runId} to use browser ${browserId}, job ID: ${executeJobId}`); - return true; - } catch (error: unknown) { - const errorMessage = error instanceof Error ? error.message : String(error); - logger.log('error', `Error checking for queued runs: ${errorMessage}`); - return false; + googleSheetUpdateTasks[runId] = { + robotId: robotMetaId, + runId: runId, + status: 'pending', + retries: 5, + }; + + airtableUpdateTasks[runId] = { + robotId: robotMetaId, + runId: runId, + status: 'pending', + retries: 5, + }; + + processAirtableUpdates(); + processGoogleSheetUpdates(); + } catch (err: any) { + logger.log('error', `Failed to update integrations for run: ${runId}: ${err.message}`); } } @@ -173,10 +215,12 @@ async function checkAndProcessQueuedRun(userId: string, browserId: string): Prom * Modified processRunExecution function - only add browser reset */ async function processRunExecution(job: Job) { - try { - const data = job.data; - logger.log('info', `Processing run execution job for runId: ${data.runId}, browserId: ${data.browserId}`); - + const BROWSER_INIT_TIMEOUT = 30000; + + const data = job.data; + logger.log('info', `Processing run execution job for runId: ${data.runId}, browserId: ${data.browserId}`); + + try { // Find the run const run = await Run.findOne({ where: { runId: data.runId } }); if (!run) { @@ -190,48 +234,56 @@ async function processRunExecution(job: Job) { } const plainRun = run.toJSON(); + const browserId = data.browserId || plainRun.browserId; - // Find the recording - const recording = await Robot.findOne({ where: { 'recording_meta.id': plainRun.robotMetaId }, raw: true }); - if (!recording) { - logger.log('error', `Recording for run ${data.runId} not found`); - - const currentRun = await Run.findOne({ where: { runId: data.runId } }); - if (currentRun && (currentRun.status !== 'aborted' && currentRun.status !== 'aborting')) { - await run.update({ - status: 'failed', - finishedAt: new Date().toLocaleString(), - log: 'Failed: Recording not found', - }); - } - - // Check for queued runs even if this one failed - await checkAndProcessQueuedRun(data.userId, data.browserId); - - return { success: false }; + if (!browserId) { + throw new Error(`No browser ID available for run ${data.runId}`); } - // Get the browser and execute the run - const browser = browserPool.getRemoteBrowser(plainRun.browserId); - let currentPage = browser?.getCurrentPage(); + logger.log('info', `Looking for browser ${browserId} for run ${data.runId}`); + + let browser = browserPool.getRemoteBrowser(browserId); + const browserWaitStart = Date.now(); - if (!browser || !currentPage) { - logger.log('error', `Browser or page not available for run ${data.runId}`); - - // Even if this run failed, check for queued runs - await checkAndProcessQueuedRun(data.userId, data.browserId); - - return { success: false }; + while (!browser && (Date.now() - browserWaitStart) < BROWSER_INIT_TIMEOUT) { + logger.log('debug', `Browser ${browserId} not ready yet, waiting...`); + await new Promise(resolve => setTimeout(resolve, 1000)); + browser = browserPool.getRemoteBrowser(browserId); } - try { - // Reset the browser state before executing this run - await resetBrowserState(browser); + if (!browser) { + throw new Error(`Browser ${browserId} not found in pool after timeout`); + } + + logger.log('info', `Browser ${browserId} found and ready for execution`); + + try { + // Find the recording + const recording = await Robot.findOne({ where: { 'recording_meta.id': plainRun.robotMetaId }, raw: true }); + + if (!recording) { + throw new Error(`Recording for run ${data.runId} not found`); + } const isRunAborted = async (): Promise => { const currentRun = await Run.findOne({ where: { runId: data.runId } }); return currentRun ? (currentRun.status === 'aborted' || currentRun.status === 'aborting') : false; }; + + let currentPage = browser.getCurrentPage(); + + const pageWaitStart = Date.now(); + while (!currentPage && (Date.now() - pageWaitStart) < 30000) { + logger.log('debug', `Page not ready for browser ${browserId}, waiting...`); + await new Promise(resolve => setTimeout(resolve, 1000)); + currentPage = browser.getCurrentPage(); + } + + if (!currentPage) { + throw new Error(`No current page available for browser ${browserId} after timeout`); + } + + logger.log('info', `Starting workflow execution for run ${data.runId}`); // Execute the workflow const workflow = AddGeneratedFlags(recording.recording); @@ -244,30 +296,27 @@ async function processRunExecution(job: Job) { if (await isRunAborted()) { logger.log('info', `Run ${data.runId} was aborted during execution, not updating status`); - - const queuedRunProcessed = await checkAndProcessQueuedRun(data.userId, plainRun.browserId); - - if (!queuedRunProcessed) { - await destroyRemoteBrowser(plainRun.browserId, data.userId); - logger.log('info', `No queued runs found for browser ${plainRun.browserId}, browser destroyed`); - } + + await destroyRemoteBrowser(plainRun.browserId, data.userId); return { success: true }; } + + logger.log('info', `Workflow execution completed for run ${data.runId}`); const binaryOutputService = new BinaryOutputService('maxun-run-screenshots'); const uploadedBinaryOutput = await binaryOutputService.uploadAndStoreBinaryOutput(run, interpretationInfo.binaryOutput); - if (await isRunAborted()) { - logger.log('info', `Run ${data.runId} was aborted while processing results, not updating status`); - return { success: true }; - } - const categorizedOutput = { scrapeSchema: interpretationInfo.scrapeSchemaOutput || {}, scrapeList: interpretationInfo.scrapeListOutput || {} }; + if (await isRunAborted()) { + logger.log('info', `Run ${data.runId} was aborted while processing results, not updating status`); + return { success: true }; + } + await run.update({ ...run, status: 'success', @@ -282,6 +331,7 @@ async function processRunExecution(job: Job) { }); // Track extraction metrics + let totalDataPointsExtracted = 0; let totalSchemaItemsExtracted = 0; let totalListItemsExtracted = 0; let extractedScreenshotsCount = 0; @@ -289,23 +339,35 @@ async function processRunExecution(job: Job) { if (categorizedOutput.scrapeSchema) { Object.values(categorizedOutput.scrapeSchema).forEach((schemaResult: any) => { if (Array.isArray(schemaResult)) { + schemaResult.forEach(obj => { + if (obj && typeof obj === 'object') { + totalDataPointsExtracted += Object.keys(obj).length; + } + }); totalSchemaItemsExtracted += schemaResult.length; } else if (schemaResult && typeof schemaResult === 'object') { + totalDataPointsExtracted += Object.keys(schemaResult).length; totalSchemaItemsExtracted += 1; } }); } - + if (categorizedOutput.scrapeList) { Object.values(categorizedOutput.scrapeList).forEach((listResult: any) => { if (Array.isArray(listResult)) { + listResult.forEach(obj => { + if (obj && typeof obj === 'object') { + totalDataPointsExtracted += Object.keys(obj).length; + } + }); totalListItemsExtracted += listResult.length; } }); } - + if (uploadedBinaryOutput) { extractedScreenshotsCount = Object.keys(uploadedBinaryOutput).length; + totalDataPointsExtracted += extractedScreenshotsCount; } const totalRowsExtracted = totalSchemaItemsExtracted + totalListItemsExtracted; @@ -314,7 +376,8 @@ async function processRunExecution(job: Job) { console.log(`Extracted List Items Count: ${totalListItemsExtracted}`); console.log(`Extracted Screenshots Count: ${extractedScreenshotsCount}`); console.log(`Total Rows Extracted: ${totalRowsExtracted}`); - + console.log(`Total Data Points Extracted: ${totalDataPointsExtracted}`); + // Capture metrics capture( 'maxun-oss-run-created-manual', @@ -330,103 +393,244 @@ async function processRunExecution(job: Job) { } ); - // Schedule updates for Google Sheets and Airtable + // Trigger webhooks for run completion + const webhookPayload = { + robot_id: plainRun.robotMetaId, + run_id: data.runId, + robot_name: recording.recording_meta.name, + status: 'success', + started_at: plainRun.startedAt, + finished_at: new Date().toLocaleString(), + extracted_data: { + captured_texts: Object.values(categorizedOutput.scrapeSchema).flat() || [], + captured_lists: categorizedOutput.scrapeList, + total_rows: totalRowsExtracted, + captured_texts_count: totalSchemaItemsExtracted, + captured_lists_count: totalListItemsExtracted, + screenshots_count: extractedScreenshotsCount, + total_data_points_extracted: totalDataPointsExtracted, + }, + metadata: { + browser_id: plainRun.browserId, + user_id: data.userId, + } + }; + try { - googleSheetUpdateTasks[plainRun.runId] = { - robotId: plainRun.robotMetaId, - runId: plainRun.runId, - status: 'pending', - retries: 5, - }; - - airtableUpdateTasks[plainRun.runId] = { - robotId: plainRun.robotMetaId, - runId: plainRun.runId, - status: 'pending', - retries: 5, - }; - - processAirtableUpdates(); - processGoogleSheetUpdates(); - } catch (err: any) { - logger.log('error', `Failed to update Google Sheet for run: ${plainRun.runId}: ${err.message}`); + await sendWebhook(plainRun.robotMetaId, 'run_completed', webhookPayload); + logger.log('info', `Webhooks sent successfully for completed run ${data.runId}`); + } catch (webhookError: any) { + logger.log('error', `Failed to send webhooks for run ${data.runId}: ${webhookError.message}`); } - serverIo.of(plainRun.browserId).emit('run-completed', { + // Schedule updates for Google Sheets and Airtable + await triggerIntegrationUpdates(plainRun.runId, plainRun.robotMetaId); + + const completionData = { runId: data.runId, robotMetaId: plainRun.robotMetaId, robotName: recording.recording_meta.name, status: 'success', finishedAt: new Date().toLocaleString() - }); - - // Check for and process queued runs before destroying the browser - const queuedRunProcessed = await checkAndProcessQueuedRun(data.userId, plainRun.browserId); - - // Only destroy the browser if no queued run was found - if (!queuedRunProcessed) { - await destroyRemoteBrowser(plainRun.browserId, data.userId); - logger.log('info', `No queued runs found for browser ${plainRun.browserId}, browser destroyed`); - } + }; + + serverIo.of(browserId).emit('run-completed', completionData); + serverIo.of('/queued-run').to(`user-${data.userId}`).emit('run-completed', completionData); + + await destroyRemoteBrowser(browserId, data.userId); + logger.log('info', `Browser ${browserId} destroyed after successful run ${data.runId}`); return { success: true }; } catch (executionError: any) { logger.log('error', `Run execution failed for run ${data.runId}: ${executionError.message}`); - const currentRun = await Run.findOne({ where: { runId: data.runId } }); - if (currentRun && (currentRun.status !== 'aborted' && currentRun.status !== 'aborting')) { - await run.update({ + let partialDataExtracted = false; + let partialData: any = null; + let partialUpdateData: any = { + status: 'failed', + finishedAt: new Date().toLocaleString(), + log: `Failed: ${executionError.message}`, + }; + + try { + if (browser && browser.interpreter) { + const hasSchemaData = (browser.interpreter.serializableDataByType?.scrapeSchema ?? []).length > 0; + const hasListData = (browser.interpreter.serializableDataByType?.scrapeList ?? []).length > 0; + const hasBinaryData = (browser.interpreter.binaryData ?? []).length > 0; + + if (hasSchemaData || hasListData || hasBinaryData) { + logger.log('info', `Extracting partial data from failed run ${data.runId}`); + + partialData = await extractAndProcessScrapedData(browser, run); + + partialUpdateData.serializableOutput = { + scrapeSchema: Object.values(partialData.categorizedOutput.scrapeSchema), + scrapeList: Object.values(partialData.categorizedOutput.scrapeList), + }; + partialUpdateData.binaryOutput = partialData.uploadedBinaryOutput; + + partialDataExtracted = true; + logger.log('info', `Partial data extracted for failed run ${data.runId}: ${partialData.totalDataPointsExtracted} data points`); + + await triggerIntegrationUpdates(plainRun.runId, plainRun.robotMetaId); + } + } + } catch (partialDataError: any) { + logger.log('warn', `Failed to extract partial data for run ${data.runId}: ${partialDataError.message}`); + } + + await run.update(partialUpdateData); + + try { + const recording = await Robot.findOne({ where: { 'recording_meta.id': run.robotMetaId }, raw: true }); + + const failureData = { + runId: data.runId, + robotMetaId: plainRun.robotMetaId, + robotName: recording ? recording.recording_meta.name : 'Unknown Robot', status: 'failed', finishedAt: new Date().toLocaleString(), - log: `Failed: ${executionError.message}`, - }); - - // Capture failure metrics - capture( - 'maxun-oss-run-created-manual', - { - runId: data.runId, - user_id: data.userId, - created_at: new Date().toISOString(), - status: 'failed', - error_message: executionError.message, - } - ); - } else { - logger.log('info', `Run ${data.runId} was aborted, not updating status to failed`); + hasPartialData: partialDataExtracted + }; + + serverIo.of(browserId).emit('run-completed', failureData); + serverIo.of('/queued-run').to(`user-${data.userId}`).emit('run-completed', failureData); + } catch (emitError: any) { + logger.log('warn', `Failed to emit failure event: ${emitError.message}`); } - - // Check for queued runs before destroying the browser - const queuedRunProcessed = await checkAndProcessQueuedRun(data.userId, plainRun.browserId); - - // Only destroy the browser if no queued run was found - if (!queuedRunProcessed) { - try { - await destroyRemoteBrowser(plainRun.browserId, data.userId); - logger.log('info', `No queued runs found for browser ${plainRun.browserId}, browser destroyed`); - } catch (cleanupError: any) { - logger.log('warn', `Failed to clean up browser for failed run ${data.runId}: ${cleanupError.message}`); + + const recording = await Robot.findOne({ where: { 'recording_meta.id': run.robotMetaId }, raw: true }); + + const failedWebhookPayload = { + robot_id: plainRun.robotMetaId, + run_id: data.runId, + robot_name: recording ? recording.recording_meta.name : 'Unknown Robot', + status: 'failed', + started_at: plainRun.startedAt, + finished_at: new Date().toLocaleString(), + error: { + message: executionError.message, + stack: executionError.stack, + type: 'ExecutionError', + }, + partial_data_extracted: partialDataExtracted, + extracted_data: partialDataExtracted ? { + captured_texts: Object.values(partialUpdateData.serializableOutput?.scrapeSchema || []).flat() || [], + captured_lists: partialUpdateData.serializableOutput?.scrapeList || {}, + total_data_points_extracted: partialData?.totalDataPointsExtracted || 0, + captured_texts_count: partialData?.totalSchemaItemsExtracted || 0, + captured_lists_count: partialData?.totalListItemsExtracted || 0, + screenshots_count: partialData?.extractedScreenshotsCount || 0 + } : null, + metadata: { + browser_id: plainRun.browserId, + user_id: data.userId, } + }; + + try { + await sendWebhook(plainRun.robotMetaId, 'run_failed', failedWebhookPayload); + logger.log('info', `Failure webhooks sent successfully for run ${data.runId}`); + } catch (webhookError: any) { + logger.log('error', `Failed to send failure webhooks for run ${data.runId}: ${webhookError.message}`); } - - return { success: false }; + + try { + const failureSocketData = { + runId: data.runId, + robotMetaId: run.robotMetaId, + robotName: recording ? recording.recording_meta.name : 'Unknown Robot', + status: 'failed', + finishedAt: new Date().toLocaleString() + }; + + serverIo.of(run.browserId).emit('run-completed', failureSocketData); + serverIo.of('/queued-run').to(`user-${data.userId}`).emit('run-completed', failureSocketData); + } catch (socketError: any) { + logger.log('warn', `Failed to emit failure event in main catch: ${socketError.message}`); + } + + capture('maxun-oss-run-created-manual', { + runId: data.runId, + user_id: data.userId, + created_at: new Date().toISOString(), + status: 'failed', + error_message: executionError.message, + partial_data_extracted: partialDataExtracted, + totalRowsExtracted: partialData?.totalSchemaItemsExtracted + partialData?.totalListItemsExtracted + partialData?.extractedScreenshotsCount || 0, + }); + + await destroyRemoteBrowser(browserId, data.userId); + logger.log('info', `Browser ${browserId} destroyed after failed run`); + + return { success: false, partialDataExtracted }; } } catch (error: unknown) { const errorMessage = error instanceof Error ? error.message : String(error); logger.log('error', `Failed to process run execution job: ${errorMessage}`); + + try { + const run = await Run.findOne({ where: { runId: data.runId }}); + + if (run) { + await run.update({ + status: 'failed', + finishedAt: new Date().toLocaleString(), + log: `Failed: ${errorMessage}`, + }); + + const recording = await Robot.findOne({ where: { 'recording_meta.id': run.robotMetaId }, raw: true }); + + const failedWebhookPayload = { + robot_id: run.robotMetaId, + run_id: data.runId, + robot_name: recording ? recording.recording_meta.name : 'Unknown Robot', + status: 'failed', + started_at: run.startedAt, + finished_at: new Date().toLocaleString(), + error: { + message: errorMessage, + }, + metadata: { + browser_id: run.browserId, + user_id: data.userId, + } + }; + + try { + await sendWebhook(run.robotMetaId, 'run_failed', failedWebhookPayload); + logger.log('info', `Failure webhooks sent successfully for run ${data.runId}`); + } catch (webhookError: any) { + logger.log('error', `Failed to send failure webhooks for run ${data.runId}: ${webhookError.message}`); + } + + try { + const failureSocketData = { + runId: data.runId, + robotMetaId: run.robotMetaId, + robotName: recording ? recording.recording_meta.name : 'Unknown Robot', + status: 'failed', + finishedAt: new Date().toLocaleString() + }; + + serverIo.of(run.browserId).emit('run-completed', failureSocketData); + serverIo.of('/queued-run').to(`user-${data.userId}`).emit('run-completed', failureSocketData); + } catch (socketError: any) { + logger.log('warn', `Failed to emit failure event in main catch: ${socketError.message}`); + } + } + } catch (updateError: any) { + logger.log('error', `Failed to update run status: ${updateError.message}`); + } + return { success: false }; } } async function abortRun(runId: string, userId: string): Promise { try { - const run = await Run.findOne({ - where: { - runId: runId, - runByUserId: userId - } - }); + const run = await Run.findOne({ where: { runId: runId } }); if (!run) { logger.log('warn', `Run ${runId} not found or does not belong to user ${userId}`); @@ -477,32 +681,9 @@ async function abortRun(runId: string, userId: string): Promise { } let currentLog = 'Run aborted by user'; - let categorizedOutput = { - scrapeSchema: {}, - scrapeList: {}, - }; - let binaryOutput: Record = {}; - - try { - if (browser.interpreter) { - if (browser.interpreter.debugMessages) { - currentLog = browser.interpreter.debugMessages.join('\n') || currentLog; - } - - if (browser.interpreter.serializableDataByType) { - categorizedOutput = { - scrapeSchema: collectDataByType(browser.interpreter.serializableDataByType.scrapeSchema || []), - scrapeList: collectDataByType(browser.interpreter.serializableDataByType.scrapeList || []), - }; - } - - if (browser.interpreter.binaryData) { - binaryOutput = collectBinaryData(browser.interpreter.binaryData); - } - } - } catch (interpreterError) { - logger.log('warn', `Error collecting data from interpreter: ${interpreterError}`); - } + const extractedData = await extractAndProcessScrapedData(browser, run); + + console.log(`Total Data Points Extracted in aborted run: ${extractedData.totalDataPointsExtracted}`); await run.update({ status: 'aborted', @@ -510,12 +691,16 @@ async function abortRun(runId: string, userId: string): Promise { browserId: plainRun.browserId, log: currentLog, serializableOutput: { - scrapeSchema: Object.values(categorizedOutput.scrapeSchema), - scrapeList: Object.values(categorizedOutput.scrapeList), + scrapeSchema: Object.values(extractedData.categorizedOutput.scrapeSchema), + scrapeList: Object.values(extractedData.categorizedOutput.scrapeList), }, - binaryOutput, + binaryOutput: extractedData.uploadedBinaryOutput, }); + if (extractedData.totalDataPointsExtracted > 0) { + await triggerIntegrationUpdates(runId, plainRun.robotMetaId); + } + try { serverIo.of(plainRun.browserId).emit('run-aborted', { runId, @@ -527,22 +712,13 @@ async function abortRun(runId: string, userId: string): Promise { logger.log('warn', `Failed to emit run-aborted event: ${socketError}`); } - let queuedRunProcessed = false; try { - queuedRunProcessed = await checkAndProcessQueuedRun(userId, plainRun.browserId); - } catch (queueError) { - logger.log('warn', `Error checking queued runs: ${queueError}`); - } - - if (!queuedRunProcessed) { - try { - await new Promise(resolve => setTimeout(resolve, 500)); - - await destroyRemoteBrowser(plainRun.browserId, userId); - logger.log('info', `Browser ${plainRun.browserId} destroyed successfully after abort`); - } catch (cleanupError) { - logger.log('warn', `Failed to clean up browser for aborted run ${runId}: ${cleanupError}`); - } + await new Promise(resolve => setTimeout(resolve, 500)); + + await destroyRemoteBrowser(plainRun.browserId, userId); + logger.log('info', `Browser ${plainRun.browserId} destroyed successfully after abort`); + } catch (cleanupError) { + logger.log('warn', `Failed to clean up browser for aborted run ${runId}: ${cleanupError}`); } return true; @@ -553,30 +729,6 @@ async function abortRun(runId: string, userId: string): Promise { } } -/** - * Helper function to collect data from arrays into indexed objects - * @param dataArray Array of data to be transformed into an object with indexed keys - * @returns Object with indexed keys - */ -function collectDataByType(dataArray: any[]): Record { - return dataArray.reduce((result: Record, item, index) => { - result[`item-${index}`] = item; - return result; - }, {}); -} - -/** - * Helper function to collect binary data (like screenshots) - * @param binaryDataArray Array of binary data objects to be transformed - * @returns Object with indexed keys - */ -function collectBinaryData(binaryDataArray: { mimetype: string, data: string, type?: string }[]): Record { - return binaryDataArray.reduce((result: Record, item, index) => { - result[`item-${index}`] = item; - return result; - }, {}); -} - async function registerRunExecutionWorker() { try { const registeredUserQueues = new Map(); diff --git a/server/src/routes/auth.ts b/server/src/routes/auth.ts index 1ce415b1..34933466 100644 --- a/server/src/routes/auth.ts +++ b/server/src/routes/auth.ts @@ -33,6 +33,14 @@ router.post("/register", async (req, res) => { }); } + const emailRegex = /^[^\s@]+@[^\s@]+\.[^\s@]+$/; + if (!emailRegex.test(email)) { + return res.status(400).json({ + error: "VALIDATION_ERROR", + code: "register.validation.invalid_email_format" + }); + } + if (!password || password.length < 6) { return res.status(400).json({ error: "VALIDATION_ERROR", @@ -74,16 +82,16 @@ router.post("/register", async (req, res) => { res.cookie("token", token, { httpOnly: true, }); - + capture("maxun-oss-user-registered", { email: user.email, userId: user.id, registeredAt: new Date().toISOString(), }); - + console.log(`User registered`); res.json(user); - + } catch (error: any) { console.log(`Could not register user - ${error}`); return res.status(500).json({ @@ -150,23 +158,23 @@ router.post("/login", async (req, res) => { }); router.get("/logout", async (req, res) => { - try { - res.clearCookie("token"); - return res.status(200).json({ - ok: true, - message: "Logged out successfully", - code: "success" - }); - } catch (error) { - console.error('Logout error:', error); - return res.status(500).json({ - ok: false, - message: "Error during logout", - code: "server", - error: process.env.NODE_ENV === 'development' ? error : undefined - }); - } + try { + res.clearCookie("token"); + return res.status(200).json({ + ok: true, + message: "Logged out successfully", + code: "success" + }); + } catch (error) { + console.error('Logout error:', error); + return res.status(500).json({ + ok: false, + message: "Error during logout", + code: "server", + error: process.env.NODE_ENV === 'development' ? error : undefined + }); } +} ); router.get( @@ -678,7 +686,7 @@ router.get("/airtable", requireSignIn, (req: Request, res) => { router.get("/airtable/callback", requireSignIn, async (req: Request, res) => { const authenticatedReq = req as AuthenticatedRequest; const baseUrl = process.env.PUBLIC_URL || "http://localhost:5173"; - + try { const { code, state, error } = authenticatedReq.query; @@ -694,7 +702,7 @@ router.get("/airtable/callback", requireSignIn, async (req: Request, res) => { // Verify session data if (!authenticatedReq.session?.code_verifier || authenticatedReq.session.robotId !== state.toString()) { - return res.status(400).json({ + return res.status(400).json({ message: "Session expired - please restart the OAuth flow" }); } @@ -708,7 +716,7 @@ router.get("/airtable/callback", requireSignIn, async (req: Request, res) => { body: new URLSearchParams({ grant_type: "authorization_code", code: code.toString(), - client_id: process.env.AIRTABLE_CLIENT_ID!, + client_id: process.env.AIRTABLE_CLIENT_ID!, redirect_uri: process.env.AIRTABLE_REDIRECT_URI!, code_verifier: authenticatedReq.session.code_verifier }), @@ -811,7 +819,7 @@ router.get("/airtable/bases", requireSignIn, async (req: Request, res) => { // Update robot with selected base router.post("/airtable/update", requireSignIn, async (req: Request, res) => { const authenticatedReq = req as AuthenticatedRequest; - const { baseId, robotId , baseName, tableName, tableId} = req.body; + const { baseId, robotId, baseName, tableName, tableId } = req.body; if (!baseId || !robotId) { return res.status(400).json({ message: "Base ID and Robot ID are required" }); diff --git a/server/src/routes/index.ts b/server/src/routes/index.ts index bc616273..3d8a3644 100644 --- a/server/src/routes/index.ts +++ b/server/src/routes/index.ts @@ -4,6 +4,7 @@ import { router as storage } from './storage'; import { router as auth } from './auth'; import { router as integration } from './integration'; import { router as proxy } from './proxy'; +import { router as webhook } from './webhook'; export { record, @@ -11,5 +12,6 @@ export { storage, auth, integration, - proxy + proxy, + webhook }; diff --git a/server/src/routes/record.ts b/server/src/routes/record.ts index 374f837a..8a589811 100644 --- a/server/src/routes/record.ts +++ b/server/src/routes/record.ts @@ -11,6 +11,7 @@ import { getRemoteBrowserCurrentTabs, getActiveBrowserIdByState, destroyRemoteBrowser, + canCreateBrowserInState, } from '../browser-management/controller'; import { chromium } from 'playwright-extra'; import stealthPlugin from 'puppeteer-extra-plugin-stealth'; @@ -181,6 +182,18 @@ router.get('/active', requireSignIn, (req: AuthenticatedRequest, res) => { return res.send(id); }); +/** + * GET endpoint for checking if the user can create a new remote browser. + */ +router.get('/can-create/:state', requireSignIn, (req: AuthenticatedRequest, res) => { + if (!req.user) { + return res.status(401).send('User not authenticated'); + } + const state = req.params.state as "recording" | "run"; + const canCreate = canCreateBrowserInState(req.user.id, state); + return res.json({ canCreate }); +}); + /** * GET endpoint for getting the current url of the active remote browser. */ diff --git a/server/src/routes/storage.ts b/server/src/routes/storage.ts index 0942600c..35491f8c 100644 --- a/server/src/routes/storage.ts +++ b/server/src/routes/storage.ts @@ -1,10 +1,10 @@ import { Router } from 'express'; import logger from "../logger"; -import { createRemoteBrowserForRun, getActiveBrowserIdByState } from "../browser-management/controller"; +import { createRemoteBrowserForRun, destroyRemoteBrowser, getActiveBrowserIdByState } from "../browser-management/controller"; import { chromium } from 'playwright-extra'; import stealthPlugin from 'puppeteer-extra-plugin-stealth'; import { browserPool } from "../server"; -import { uuid } from "uuidv4"; +import { v4 as uuid } from "uuid"; import moment from 'moment-timezone'; import cron from 'node-cron'; import { getDecryptedProxyConfig } from './proxy'; @@ -23,7 +23,7 @@ chromium.use(stealthPlugin()); export const router = Router(); export const processWorkflowActions = async (workflow: any[], checkLimit: boolean = false): Promise => { - const processedWorkflow = JSON.parse(JSON.stringify(workflow)); + const processedWorkflow = JSON.parse(JSON.stringify(workflow)); processedWorkflow.forEach((pair: any) => { pair.what.forEach((action: any) => { @@ -108,52 +108,52 @@ router.get('/recordings/:id', requireSignIn, async (req, res) => { router.get(('/recordings/:id/runs'), requireSignIn, async (req, res) => { try { const runs = await Run.findAll({ - where: { - robotMetaId: req.params.id - }, - raw: true + where: { + robotMetaId: req.params.id + }, + raw: true }); const formattedRuns = runs.map(formatRunResponse); const response = { - statusCode: 200, - messageCode: "success", - runs: { + statusCode: 200, + messageCode: "success", + runs: { totalCount: formattedRuns.length, items: formattedRuns, - }, + }, }; res.status(200).json(response); -} catch (error) { + } catch (error) { console.error("Error fetching runs:", error); res.status(500).json({ - statusCode: 500, - messageCode: "error", - message: "Failed to retrieve runs", + statusCode: 500, + messageCode: "error", + message: "Failed to retrieve runs", }); -} + } }) function formatRunResponse(run: any) { const formattedRun = { - id: run.id, - status: run.status, - name: run.name, - robotId: run.robotMetaId, // Renaming robotMetaId to robotId - startedAt: run.startedAt, - finishedAt: run.finishedAt, - runId: run.runId, - runByUserId: run.runByUserId, - runByScheduleId: run.runByScheduleId, - runByAPI: run.runByAPI, - data: {}, - screenshot: null, + id: run.id, + status: run.status, + name: run.name, + robotId: run.robotMetaId, // Renaming robotMetaId to robotId + startedAt: run.startedAt, + finishedAt: run.finishedAt, + runId: run.runId, + runByUserId: run.runByUserId, + runByScheduleId: run.runByScheduleId, + runByAPI: run.runByAPI, + data: {}, + screenshot: null, }; if (run.serializableOutput && run.serializableOutput['item-0']) { - formattedRun.data = run.serializableOutput['item-0']; + formattedRun.data = run.serializableOutput['item-0']; } else if (run.binaryOutput && run.binaryOutput['item-0']) { - formattedRun.screenshot = run.binaryOutput['item-0']; + formattedRun.screenshot = run.binaryOutput['item-0']; } return formattedRun; @@ -170,81 +170,81 @@ interface Credentials { function handleWorkflowActions(workflow: any[], credentials: Credentials) { return workflow.map(step => { - if (!step.what) return step; + if (!step.what) return step; - const newWhat: any[] = []; - const processedSelectors = new Set(); - - for (let i = 0; i < step.what.length; i++) { - const action = step.what[i]; - - if (!action?.action || !action?.args?.[0]) { - newWhat.push(action); - continue; - } + const newWhat: any[] = []; + const processedSelectors = new Set(); - const selector = action.args[0]; - const credential = credentials[selector]; + for (let i = 0; i < step.what.length; i++) { + const action = step.what[i]; - if (!credential) { - newWhat.push(action); - continue; - } - - if (action.action === 'click') { - newWhat.push(action); - - if (!processedSelectors.has(selector) && - i + 1 < step.what.length && - (step.what[i + 1].action === 'type' || step.what[i + 1].action === 'press')) { - - newWhat.push({ - action: 'type', - args: [selector, encrypt(credential.value), credential.type] - }); - - newWhat.push({ - action: 'waitForLoadState', - args: ['networkidle'] - }); - - processedSelectors.add(selector); - - while (i + 1 < step.what.length && - (step.what[i + 1].action === 'type' || - step.what[i + 1].action === 'press' || - step.what[i + 1].action === 'waitForLoadState')) { - i++; - } - } - } else if ((action.action === 'type' || action.action === 'press') && - !processedSelectors.has(selector)) { - newWhat.push({ - action: 'type', - args: [selector, encrypt(credential.value), credential.type] - }); - - newWhat.push({ - action: 'waitForLoadState', - args: ['networkidle'] - }); - - processedSelectors.add(selector); - - // Skip subsequent type/press/waitForLoadState actions for this selector - while (i + 1 < step.what.length && - (step.what[i + 1].action === 'type' || - step.what[i + 1].action === 'press' || - step.what[i + 1].action === 'waitForLoadState')) { - i++; - } - } + if (!action?.action || !action?.args?.[0]) { + newWhat.push(action); + continue; } - return { - ...step, - what: newWhat - }; + const selector = action.args[0]; + const credential = credentials[selector]; + + if (!credential) { + newWhat.push(action); + continue; + } + + if (action.action === 'click') { + newWhat.push(action); + + if (!processedSelectors.has(selector) && + i + 1 < step.what.length && + (step.what[i + 1].action === 'type' || step.what[i + 1].action === 'press')) { + + newWhat.push({ + action: 'type', + args: [selector, encrypt(credential.value), credential.type] + }); + + newWhat.push({ + action: 'waitForLoadState', + args: ['networkidle'] + }); + + processedSelectors.add(selector); + + while (i + 1 < step.what.length && + (step.what[i + 1].action === 'type' || + step.what[i + 1].action === 'press' || + step.what[i + 1].action === 'waitForLoadState')) { + i++; + } + } + } else if ((action.action === 'type' || action.action === 'press') && + !processedSelectors.has(selector)) { + newWhat.push({ + action: 'type', + args: [selector, encrypt(credential.value), credential.type] + }); + + newWhat.push({ + action: 'waitForLoadState', + args: ['networkidle'] + }); + + processedSelectors.add(selector); + + // Skip subsequent type/press/waitForLoadState actions for this selector + while (i + 1 < step.what.length && + (step.what[i + 1].action === 'type' || + step.what[i + 1].action === 'press' || + step.what[i + 1].action === 'waitForLoadState')) { + i++; + } + } + } + + return { + ...step, + what: newWhat + }; }); } @@ -275,7 +275,7 @@ router.put('/recordings/:id', requireSignIn, async (req: AuthenticatedRequest, r if (targetUrl) { const updatedWorkflow = [...robot.recording.workflow]; - + for (let i = updatedWorkflow.length - 1; i >= 0; i--) { const step = updatedWorkflow[i]; for (let j = 0; j < step.what.length; j++) { @@ -286,7 +286,7 @@ router.put('/recordings/:id', requireSignIn, async (req: AuthenticatedRequest, r if (step.where?.url && step.where.url !== "about:blank") { step.where.url = targetUrl; } - + robot.set('recording', { ...robot.recording, workflow: updatedWorkflow }); robot.changed('recording', true); i = -1; @@ -307,16 +307,16 @@ router.put('/recordings/:id', requireSignIn, async (req: AuthenticatedRequest, r if (limits && Array.isArray(limits) && limits.length > 0) { for (const limitInfo of limits) { const { pairIndex, actionIndex, argIndex, limit } = limitInfo; - + const pair = workflow[pairIndex]; if (!pair || !pair.what) continue; - + const action = pair.what[actionIndex]; if (!action || !action.args) continue; - + const arg = action.args[argIndex]; if (!arg || typeof arg !== 'object') continue; - + (arg as { limit: number }).limit = limit; } } @@ -384,7 +384,7 @@ router.post('/recordings/:id/duplicate', requireSignIn, async (req: Authenticate step.what.forEach((action) => { if (action.action === "goto" && action.args?.length) { - action.args[0] = targetUrl; + action.args[0] = targetUrl; } }); @@ -394,22 +394,22 @@ router.post('/recordings/:id/duplicate', requireSignIn, async (req: Authenticate const currentTimestamp = new Date().toLocaleString(); const newRobot = await Robot.create({ - id: uuid(), - userId: originalRobot.userId, + id: uuid(), + userId: originalRobot.userId, recording_meta: { ...originalRobot.recording_meta, id: uuid(), name: `${originalRobot.recording_meta.name} (${lastWord})`, - createdAt: currentTimestamp, - updatedAt: currentTimestamp, - }, - recording: { ...originalRobot.recording, workflow }, - google_sheet_email: null, + createdAt: currentTimestamp, + updatedAt: currentTimestamp, + }, + recording: { ...originalRobot.recording, workflow }, + google_sheet_email: null, google_sheet_name: null, google_sheet_id: null, google_access_token: null, google_refresh_token: null, - schedule: null, + schedule: null, }); logger.log('info', `Robot with ID ${id} duplicated successfully as ${newRobot.id}.`); @@ -517,98 +517,124 @@ router.put('/runs/:id', requireSignIn, async (req: AuthenticatedRequest, res) => return res.status(401).send({ error: 'Unauthorized' }); } - const proxyConfig = await getDecryptedProxyConfig(req.user.id); - let proxyOptions: any = {}; - - if (proxyConfig.proxy_url) { - proxyOptions = { - server: proxyConfig.proxy_url, - ...(proxyConfig.proxy_username && proxyConfig.proxy_password && { - username: proxyConfig.proxy_username, - password: proxyConfig.proxy_password, - }), - }; - } - - console.log(`Proxy config for run: ${JSON.stringify(proxyOptions)}`); - // Generate runId first const runId = uuid(); - // Check if user has reached browser limit - const userBrowserIds = browserPool.getAllBrowserIdsForUser(req.user.id); - const canCreateBrowser = userBrowserIds.length < 2; - - if (canCreateBrowser) { - // User has available browser slots, create it directly - const id = createRemoteBrowserForRun(req.user.id); + const canCreateBrowser = await browserPool.hasAvailableBrowserSlots(req.user.id, "run"); - const run = await Run.create({ - status: 'running', + if (canCreateBrowser) { + let browserId: string; + + try { + browserId = await createRemoteBrowserForRun(req.user.id); + + if (!browserId || browserId.trim() === '') { + throw new Error('Failed to generate valid browser ID'); + } + + logger.log('info', `Created browser ${browserId} for run ${runId}`); + + } catch (browserError: any) { + logger.log('error', `Failed to create browser: ${browserError.message}`); + return res.status(500).send({ error: 'Failed to create browser instance' }); + } + + try { + await Run.create({ + status: 'running', + name: recording.recording_meta.name, + robotId: recording.id, + robotMetaId: recording.recording_meta.id, + startedAt: new Date().toLocaleString(), + finishedAt: '', + browserId: browserId, + interpreterSettings: req.body, + log: '', + runId, + runByUserId: req.user.id, + serializableOutput: {}, + binaryOutput: {}, + }); + + logger.log('info', `Created run ${runId} with browser ${browserId}`); + + } catch (dbError: any) { + logger.log('error', `Database error creating run: ${dbError.message}`); + + try { + await destroyRemoteBrowser(browserId, req.user.id); + } catch (cleanupError: any) { + logger.log('warn', `Failed to cleanup browser after run creation failure: ${cleanupError.message}`); + } + + return res.status(500).send({ error: 'Failed to create run record' }); + } + + try { + const userQueueName = `execute-run-user-${req.user.id}`; + await pgBoss.createQueue(userQueueName); + + const jobId = await pgBoss.send(userQueueName, { + userId: req.user.id, + runId: runId, + browserId: browserId, + }); + + logger.log('info', `Queued run execution job with ID: ${jobId} for run: ${runId}`); + } catch (queueError: any) { + logger.log('error', `Failed to queue run execution: ${queueError.message}`); + + try { + await Run.update({ + status: 'failed', + finishedAt: new Date().toLocaleString(), + log: 'Failed to queue execution job' + }, { where: { runId: runId } }); + + await destroyRemoteBrowser(browserId, req.user.id); + } catch (cleanupError: any) { + logger.log('warn', `Failed to cleanup after queue error: ${cleanupError.message}`); + } + + return res.status(503).send({ error: 'Unable to queue run, please try again later' }); + } + + return res.send({ + browserId: browserId, + runId: runId, + robotMetaId: recording.recording_meta.id, + queued: false + }); + } else { + const browserId = uuid(); + + await Run.create({ + status: 'queued', name: recording.recording_meta.name, robotId: recording.id, robotMetaId: recording.recording_meta.id, startedAt: new Date().toLocaleString(), finishedAt: '', - browserId: id, + browserId, interpreterSettings: req.body, - log: '', + log: 'Run queued - waiting for available browser slot', runId, runByUserId: req.user.id, serializableOutput: {}, binaryOutput: {}, }); - - const plainRun = run.toJSON(); - + return res.send({ - browserId: id, - runId: plainRun.runId, + browserId: browserId, + runId: runId, robotMetaId: recording.recording_meta.id, - queued: false + queued: true }); - } else { - const browserId = getActiveBrowserIdByState(req.user.id, "run") - - if (browserId) { - // User has reached the browser limit, queue the run - try { - // Create the run record with 'queued' status - await Run.create({ - status: 'queued', - name: recording.recording_meta.name, - robotId: recording.id, - robotMetaId: recording.recording_meta.id, - startedAt: new Date().toLocaleString(), - finishedAt: '', - browserId: browserId, // Random will be updated later - interpreterSettings: req.body, - log: 'Run queued - waiting for available browser slot', - runId, - runByUserId: req.user.id, - serializableOutput: {}, - binaryOutput: {}, - }); - - return res.send({ - browserId: browserId, - runId: runId, - robotMetaId: recording.recording_meta.id, - queued: true, - }); - } catch (queueError: any) { - logger.log('error', `Failed to queue run job: ${queueError.message}`); - return res.status(503).send({ error: 'Unable to queue run, please try again later' }); - } - } else { - logger.log('info', "Browser id does not exist"); - return res.send(''); - } - } + } } catch (e) { const { message } = e as Error; - logger.log('info', `Error while creating a run with robot id: ${req.params.id} - ${message}`); - return res.send(''); + logger.log('error', `Error while creating a run with robot id: ${req.params.id} - ${message}`); + return res.status(500).send({ error: 'Internal server error' }); } }); @@ -664,17 +690,17 @@ router.post('/runs/run/:id', requireSignIn, async (req: AuthenticatedRequest, re // Queue the execution job await pgBoss.createQueue(userQueueName); - + const jobId = await pgBoss.send(userQueueName, { userId: req.user.id, runId: req.params.id, browserId: plainRun.browserId }); - + logger.log('info', `Queued run execution job with ID: ${jobId} for run: ${req.params.id}`); } catch (queueError: any) { logger.log('error', `Failed to queue run execution`); - + } } catch (e) { const { message } = e as Error; @@ -891,31 +917,128 @@ router.post('/runs/abort/:id', requireSignIn, async (req: AuthenticatedRequest, try { if (!req.user) { return res.status(401).send({ error: 'Unauthorized' }); } - const run = await Run.findOne({ where: { - runId: req.params.id, - runByUserId: req.user.id, - } }); + const run = await Run.findOne({ where: { runId: req.params.id } }); if (!run) { - return res.status(404).send(false); + return res.status(404).send({ error: 'Run not found' }); } - - const userQueueName = `abort-run-user-${req.user.id}`; - await pgBoss.createQueue(userQueueName); - - await pgBoss.send(userQueueName, { - userId: req.user.id, - runId: req.params.id - }); - + + if (!['running', 'queued'].includes(run.status)) { + return res.status(400).send({ + error: `Cannot abort run with status: ${run.status}` + }); + } + + const isQueued = run.status === 'queued'; + await run.update({ status: 'aborting' }); + + if (isQueued) { + await run.update({ + status: 'aborted', + finishedAt: new Date().toLocaleString(), + log: 'Run aborted while queued' + }); + + return res.send({ + success: true, + message: 'Queued run aborted', + isQueued: true + }); + } + + const userQueueName = `abort-run-user-${req.user.id}`; + await pgBoss.createQueue(userQueueName); + + const jobId = await pgBoss.send(userQueueName, { + userId: req.user.id, + runId: req.params.id + }); + + logger.log('info', `Abort signal sent for run ${req.params.id}, job ID: ${jobId}`); + + return res.send({ + success: true, + message: 'Abort signal sent', + jobId, + isQueued: false + }); - return res.send(true); } catch (e) { const { message } = e as Error; - logger.log('info', `Error while aborting run with id: ${req.params.id} - ${message}`); - return res.send(false); + logger.log('error', `Error aborting run ${req.params.id}: ${message}`); + return res.status(500).send({ error: 'Failed to abort run' }); } }); + +async function processQueuedRuns() { + try { + const queuedRun = await Run.findOne({ + where: { status: 'queued' }, + order: [['startedAt', 'ASC']] + }); + + if (!queuedRun) return; + + const userId = queuedRun.runByUserId; + + const canCreateBrowser = await browserPool.hasAvailableBrowserSlots(userId, "run"); + + if (canCreateBrowser) { + logger.log('info', `Processing queued run ${queuedRun.runId} for user ${userId}`); + + const recording = await Robot.findOne({ + where: { + 'recording_meta.id': queuedRun.robotMetaId + }, + raw: true + }); + + if (!recording) { + await queuedRun.update({ + status: 'failed', + finishedAt: new Date().toLocaleString(), + log: 'Recording not found' + }); + return; + } + + try { + const newBrowserId = await createRemoteBrowserForRun(userId); + + logger.log('info', `Created and initialized browser ${newBrowserId} for queued run ${queuedRun.runId}`); + + await queuedRun.update({ + status: 'running', + browserId: newBrowserId, + log: 'Browser created and ready for execution' + }); + + const userQueueName = `execute-run-user-${userId}`; + await pgBoss.createQueue(userQueueName); + + const jobId = await pgBoss.send(userQueueName, { + userId: userId, + runId: queuedRun.runId, + browserId: newBrowserId, + }); + + logger.log('info', `Queued execution for run ${queuedRun.runId} with ready browser ${newBrowserId}, job ID: ${jobId}`); + + } catch (browserError: any) { + logger.log('error', `Failed to create browser for queued run: ${browserError.message}`); + await queuedRun.update({ + status: 'failed', + finishedAt: new Date().toLocaleString(), + log: `Failed to create browser: ${browserError.message}` + }); + } + } + } catch (error: any) { + logger.log('error', `Error processing queued runs: ${error.message}`); + } +} + +export { processQueuedRuns }; diff --git a/server/src/routes/webhook.ts b/server/src/routes/webhook.ts new file mode 100644 index 00000000..bba8ec8f --- /dev/null +++ b/server/src/routes/webhook.ts @@ -0,0 +1,493 @@ +import { Router, Request, Response } from 'express'; +import Robot from '../models/Robot'; +import { requireSignIn } from '../middlewares/auth'; +import axios from 'axios'; +import { v4 as uuid } from "uuid"; + +export const router = Router(); + +interface AuthenticatedRequest extends Request { + user?: { id: string }; +} + +interface WebhookConfig { + id: string; + url: string; + events: string[]; + active: boolean; + createdAt: string; + updatedAt: string; + lastCalledAt?: string | null; + retryAttempts?: number; + retryDelay?: number; + timeout?: number; +} + +const updateWebhookLastCalled = async (robotId: string, webhookId: string): Promise => { + try { + const robot = await Robot.findOne({ where: { 'recording_meta.id': robotId } }); + if (!robot || !robot.webhooks) { + return; + } + + const updatedWebhooks = robot.webhooks.map((w: WebhookConfig) => { + if (w.id === webhookId) { + return { + ...w, + lastCalledAt: new Date().toISOString() + }; + } + return w; + }); + + await robot.update({ webhooks: updatedWebhooks }); + } catch (error) { + console.error('Error updating webhook lastCalledAt:', error); + } +}; + +// Add new webhook +router.post('/add', requireSignIn, async (req: Request, res: Response) => { + const { webhook, robotId } = req.body; + const authenticatedReq = req as AuthenticatedRequest; + + try { + if (!authenticatedReq.user) { + return res.status(401).json({ ok: false, error: 'Unauthorized' }); + } + + if (!webhook || !robotId) { + return res.status(400).json({ ok: false, error: 'Webhook configuration and robot ID are required' }); + } + + if (!webhook.url) { + return res.status(400).json({ ok: false, error: 'Webhook URL is required' }); + } + + // Validate URL format + try { + new URL(webhook.url); + } catch (error) { + return res.status(400).json({ ok: false, error: 'Invalid webhook URL format' }); + } + + const robot = await Robot.findOne({ where: { 'recording_meta.id': robotId } }); + + if (!robot) { + return res.status(404).json({ ok: false, error: 'Robot not found' }); + } + + const currentWebhooks = robot.webhooks || []; + + const existingWebhook = currentWebhooks.find((w: WebhookConfig) => w.url === webhook.url); + if (existingWebhook) { + return res.status(400).json({ ok: false, error: 'Webhook with this url already exists' }); + } + + const newWebhook: WebhookConfig = { + ...webhook, + id: webhook.id || uuid(), + createdAt: new Date().toISOString(), + updatedAt: new Date().toISOString(), + lastCalledAt: null, + retryAttempts: webhook.retryAttempts || 3, + retryDelay: webhook.retryDelay || 5, + timeout: webhook.timeout || 30, + }; + + const updatedWebhooks = [...currentWebhooks, newWebhook]; + + await robot.update({ webhooks: updatedWebhooks }); + + res.status(200).json({ + ok: true, + message: 'Webhook added successfully', + webhook: newWebhook + }); + } catch (error: any) { + console.log(`Could not add webhook - ${error}`); + res.status(500).json({ ok: false, error: 'Could not add webhook configuration' }); + } +}); + +// Update existing webhook +router.post('/update', requireSignIn, async (req: Request, res: Response) => { + const { webhook, robotId } = req.body; + const authenticatedReq = req as AuthenticatedRequest; + + try { + if (!authenticatedReq.user) { + return res.status(401).json({ ok: false, error: 'Unauthorized' }); + } + + if (!webhook || !robotId || !webhook.id) { + return res.status(400).json({ ok: false, error: 'Webhook configuration, webhook ID, and robot ID are required' }); + } + + // Validate URL format if provided + if (webhook.url) { + try { + new URL(webhook.url); + } catch (error) { + return res.status(400).json({ ok: false, error: 'Invalid webhook URL format' }); + } + } + + const robot = await Robot.findOne({ where: { 'recording_meta.id': robotId } }); + + if (!robot) { + return res.status(404).json({ ok: false, error: 'Robot not found' }); + } + + const currentWebhooks = robot.webhooks || []; + const webhookIndex = currentWebhooks.findIndex((w: WebhookConfig) => w.id === webhook.id); + + if (webhookIndex === -1) { + return res.status(404).json({ ok: false, error: 'Webhook not found' }); + } + + // Check for duplicate URLs (excluding current webhook) + const duplicateUrl = currentWebhooks.find((w: WebhookConfig, index: number) => + w.url === webhook.url && index !== webhookIndex + ); + if (duplicateUrl) { + return res.status(400).json({ ok: false, error: 'Webhook with this URL already exists' }); + } + + const updatedWebhook: WebhookConfig = { + ...currentWebhooks[webhookIndex], + ...webhook, + updatedAt: new Date().toISOString(), + lastCalledAt: currentWebhooks[webhookIndex].lastCalledAt + }; + + const updatedWebhooks = [...currentWebhooks]; + updatedWebhooks[webhookIndex] = updatedWebhook; + + await robot.update({ webhooks: updatedWebhooks }); + + res.status(200).json({ + ok: true, + message: 'Webhook updated successfully', + webhook: updatedWebhook + }); + } catch (error: any) { + console.log(`Could not update webhook - ${error}`); + res.status(500).json({ ok: false, error: 'Could not update webhook configuration' }); + } +}); + +// Remove webhook +router.post('/remove', requireSignIn, async (req: Request, res: Response) => { + const { webhookId, robotId } = req.body; + const authenticatedReq = req as AuthenticatedRequest; + + try { + if (!authenticatedReq.user) { + return res.status(401).json({ ok: false, error: 'Unauthorized' }); + } + + if (!webhookId || !robotId) { + return res.status(400).json({ ok: false, error: 'Webhook ID and robot ID are required' }); + } + + const robot = await Robot.findOne({ where: { 'recording_meta.id': robotId } }); + + if (!robot) { + return res.status(404).json({ ok: false, error: 'Robot not found' }); + } + + const currentWebhooks = robot.webhooks || []; + const webhookExists = currentWebhooks.find((w: WebhookConfig) => w.id === webhookId); + + if (!webhookExists) { + return res.status(404).json({ ok: false, error: 'Webhook not found' }); + } + + const updatedWebhooks = currentWebhooks.filter((w: WebhookConfig) => w.id !== webhookId); + + await robot.update({ webhooks: updatedWebhooks }); + + res.status(200).json({ + ok: true, + message: 'Webhook removed successfully' + }); + } catch (error: any) { + console.log(`Could not remove webhook - ${error}`); + res.status(500).json({ ok: false, error: 'Could not remove webhook configuration' }); + } +}); + +// Get all webhooks for a robot +router.get('/list/:robotId', requireSignIn, async (req: Request, res: Response) => { + const { robotId } = req.params; + const authenticatedReq = req as AuthenticatedRequest; + + try { + if (!authenticatedReq.user) { + return res.status(401).json({ ok: false, error: 'Unauthorized' }); + } + + const robot = await Robot.findOne({ + where: { 'recording_meta.id': robotId }, + attributes: ['webhooks'] + }); + + if (!robot) { + return res.status(404).json({ ok: false, error: 'Robot not found' }); + } + + const webhooks = robot.webhooks || []; + + res.status(200).json({ + ok: true, + webhooks: webhooks + }); + } catch (error: any) { + console.log(`Could not retrieve webhooks - ${error}`); + res.status(500).json({ ok: false, error: 'Could not retrieve webhook configurations' }); + } +}); + +// Test webhook endpoint +router.post('/test', requireSignIn, async (req: Request, res: Response) => { + const { webhook, robotId } = req.body; + const authenticatedReq = req as AuthenticatedRequest; + + try { + if (!authenticatedReq.user) { + return res.status(401).json({ ok: false, error: 'Unauthorized' }); + } + + if (!webhook || !robotId) { + return res.status(400).json({ ok: false, error: 'Webhook configuration and robot ID are required' }); + } + + const robot = await Robot.findOne({ where: { 'recording_meta.id': robotId } }); + + if (!robot) { + return res.status(404).json({ ok: false, error: 'Robot not found' }); + } + + // Create test payload + const testPayload = { + event_type: "webhook_test", + timestamp: new Date().toISOString(), + webhook_id: webhook.id, + data: { + robot_id: robotId, + run_id: "110c4dae-c39b-4b30-a932-eff1022e4bb0", + robot_name: robot.recording_meta?.name || "E-commerce Product Scraper", + status: "test", + started_at: new Date(Date.now() - 45000).toISOString(), + finished_at: new Date().toISOString(), + extracted_data: { + captured_texts: [ + { + "Product Name": "MacBook Pro 16-inch M3 Max", + "Price": "$3,999.00", + "Rating": "4.8/5 stars", + "Availability": "In Stock - Ships within 2-3 business days", + "SKU": "MBPM3-16-1TB-SLV", + "Description": "The most powerful MacBook Pro ever is here. With the blazing-fast M3 Max chip, pro-level performance has never been more portable." + } + ], + captured_lists: { + "list_1": [ + { + "Rank": "1", + "Product": "MacBook Air M2", + "Category": "Laptops", + "Units Sold": "2,847", + "Revenue": "$2,847,000" + }, + { + "Rank": "2", + "Product": "iPhone 15", + "Category": "Smartphones", + "Units Sold": "1,923", + "Revenue": "$1,923,000" + }, + { + "Rank": "3", + "Product": "iPad Pro 12.9", + "Category": "Tablets", + "Units Sold": "1,456", + "Revenue": "$1,456,000" + } + ], + "list_0": [ + { + "Customer": "Sarah M.", + "Rating": "5 stars", + "Review": "Absolutely love my new MacBook! The battery life is incredible and the performance is outstanding.", + "Date": "2024-12-15", + "Verified Purchase": "Yes" + }, + { + "Customer": "John D.", + "Rating": "4 stars", + "Review": "Great phone overall, but wish the battery lasted a bit longer with heavy usage.", + "Date": "2024-12-14", + "Verified Purchase": "Yes" + }, + { + "Customer": "Emily R.", + "Rating": "5 stars", + "Review": "The camera quality is phenomenal! Perfect for my photography business.", + "Date": "2024-12-13", + "Verified Purchase": "Yes" + } + ], + }, + total_rows: 11, + captured_texts_count: 5, + captured_lists_count: 6, + screenshots_count: 5 + }, + metadata: { + test_mode: true, + browser_id: "d27ace57-75cb-441c-8589-8ba34e52f7d1", + user_id: 108, + } + } + }; + + await updateWebhookLastCalled(robotId, webhook.id); + + const response = await axios.post(webhook.url, testPayload, { + timeout: (webhook.timeout || 30) * 1000, + validateStatus: (status) => status < 500 + }); + + const success = response.status >= 200 && response.status < 300; + + res.status(200).json({ + ok: true, + message: success ? 'Test webhook sent successfully' : 'Webhook endpoint responded with non-success status', + details: { + status: response.status, + statusText: response.statusText, + success: success + } + }); + } catch (error: any) { + console.log(`Could not test webhook - ${error}`); + + try { + await updateWebhookLastCalled(robotId, webhook.id); + } catch (updateError) { + console.error('Failed to update lastCalledAt after webhook error:', updateError); + } + + let errorMessage = 'Could not send test webhook'; + if (error.code === 'ECONNREFUSED') { + errorMessage = 'Connection refused - webhook URL is not accessible'; + } else if (error.code === 'ETIMEDOUT') { + errorMessage = 'Request timeout - webhook endpoint did not respond in time'; + } else if (error.response) { + errorMessage = `Webhook endpoint responded with error: ${error.response.status} ${error.response.statusText}`; + } + + res.status(500).json({ + ok: false, + error: errorMessage, + details: { + code: error.code, + message: error.message + } + }); + } +}); + +// Send webhook +export const sendWebhook = async (robotId: string, eventType: string, data: any): Promise => { + try { + const robot = await Robot.findOne({ where: { 'recording_meta.id': robotId } }); + if (!robot || !robot.webhooks) { + return; + } + + const activeWebhooks = robot.webhooks.filter((w: WebhookConfig) => + w.active && w.events.includes(eventType) + ); + + if (activeWebhooks.length === 0) { + return; + } + + const webhookPromises = activeWebhooks.map(async (webhook: WebhookConfig) => { + const payload = { + event_type: eventType, + timestamp: new Date().toISOString(), + webhook_id: webhook.id, + data: data + }; + + return sendWebhookWithRetry(robotId, webhook, payload); + }); + + await Promise.allSettled(webhookPromises); + } catch (error) { + console.error('Error sending webhooks:', error); + } +}; + +// Helper function to send webhook with retry logic +const sendWebhookWithRetry = async (robotId: string, webhook: WebhookConfig, payload: any, attempt: number = 1): Promise => { + const maxRetries = webhook.retryAttempts || 3; + const retryDelay = webhook.retryDelay || 5; + const timeout = webhook.timeout || 30; + + try { + await updateWebhookLastCalled(robotId, webhook.id); + + const response = await axios.post(webhook.url, payload, { + timeout: timeout * 1000, + validateStatus: (status) => status >= 200 && status < 300 + }); + + console.log(`Webhook sent successfully to ${webhook.url}: ${response.status}`); + } catch (error: any) { + console.error(`Webhook failed for ${webhook.url} (attempt ${attempt}):`, error.message); + + if (attempt < maxRetries) { + const delay = retryDelay * Math.pow(2, attempt - 1); + console.log(`Retrying webhook ${webhook.url} in ${delay} seconds...`); + + setTimeout(async () => { + await sendWebhookWithRetry(robotId, webhook, payload, attempt + 1); + }, delay * 1000); + } else { + console.error(`Webhook ${webhook.url} failed after ${maxRetries} attempts`); + } + } +}; + +// Clear all webhooks for a robot +router.delete('/clear/:robotId', requireSignIn, async (req: Request, res: Response) => { + const { robotId } = req.params; + const authenticatedReq = req as AuthenticatedRequest; + + try { + if (!authenticatedReq.user) { + return res.status(401).json({ ok: false, error: 'Unauthorized' }); + } + + const robot = await Robot.findOne({ where: { 'recording_meta.id': robotId } }); + + if (!robot) { + return res.status(404).json({ ok: false, error: 'Robot not found' }); + } + + await robot.update({ webhooks: [] }); + + res.status(200).json({ + ok: true, + message: 'All webhooks cleared successfully' + }); + } catch (error: any) { + console.log(`Could not clear webhooks - ${error}`); + res.status(500).json({ ok: false, error: 'Could not clear webhook configurations' }); + } +}); \ No newline at end of file diff --git a/server/src/schedule-worker.ts b/server/src/schedule-worker.ts index 91c3c224..c75770e4 100644 --- a/server/src/schedule-worker.ts +++ b/server/src/schedule-worker.ts @@ -6,6 +6,7 @@ import logger from './logger'; import Robot from './models/Robot'; import { handleRunRecording } from './workflow-management/scheduler'; import { computeNextRun } from './utils/schedule'; +import { v4 as uuid } from "uuid"; if (!process.env.DB_USER || !process.env.DB_PASSWORD || !process.env.DB_HOST || !process.env.DB_PORT || !process.env.DB_NAME) { throw new Error('One or more required environment variables are missing.'); @@ -32,7 +33,7 @@ interface ScheduledWorkflowData { */ export async function scheduleWorkflow(id: string, userId: string, cronExpression: string, timezone: string): Promise { try { - const runId = require('uuidv4').uuid(); + const runId = uuid(); const queueName = `scheduled-workflow-${id}`; diff --git a/server/src/server.ts b/server/src/server.ts index 11b25a37..0a107b88 100644 --- a/server/src/server.ts +++ b/server/src/server.ts @@ -4,7 +4,7 @@ import http from 'http'; import cors from 'cors'; import dotenv from 'dotenv'; dotenv.config(); -import { record, workflow, storage, auth, integration, proxy } from './routes'; +import { record, workflow, storage, auth, integration, proxy, webhook } from './routes'; import { BrowserPool } from "./browser-management/classes/BrowserPool"; import logger from './logger'; import { connectDB, syncDB } from './storage/db' @@ -20,6 +20,7 @@ import connectPgSimple from 'connect-pg-simple'; import pg from 'pg'; import session from 'express-session'; import Run from './models/Run'; +import { processQueuedRuns } from './routes/storage'; const app = express(); app.use(cors({ @@ -83,11 +84,9 @@ export const io = new Server(server); */ export const browserPool = new BrowserPool(); -// app.use(bodyParser.json({ limit: '10mb' })) -// app.use(bodyParser.urlencoded({ extended: true, limit: '10mb', parameterLimit: 9000 })); -// parse cookies - "cookie" is true in csrfProtection app.use(cookieParser()) +app.use('/webhook', webhook); app.use('/record', record); app.use('/workflow', workflow); app.use('/storage', storage); @@ -98,9 +97,9 @@ app.use('/api-docs', swaggerUi.serve, swaggerUi.setup(swaggerSpec)); readdirSync(path.join(__dirname, 'api')).forEach((r) => { const route = require(path.join(__dirname, 'api', r)); - const router = route.default || route; // Use .default if available, fallback to route + const router = route.default || route; if (typeof router === 'function') { - app.use('/api', router); // Use the default export or named router + app.use('/api', router); } else { console.error(`Error: ${r} does not export a valid router`); } @@ -150,7 +149,6 @@ app.get('/', function (req, res) { return res.send('Maxun server started 🚀'); }); -// Add CORS headers app.use((req, res, next) => { res.header('Access-Control-Allow-Origin', process.env.PUBLIC_URL || 'http://localhost:5173'); res.header('Access-Control-Allow-Methods', 'GET,PUT,POST,DELETE,OPTIONS'); @@ -178,14 +176,19 @@ io.of('/queued-run').on('connection', (socket) => { } }); +setInterval(() => { + processQueuedRuns(); +}, 5000); + + server.listen(SERVER_PORT, '0.0.0.0', async () => { try { await connectDB(); await syncDB(); - logger.log('info', `Server listening on port ${SERVER_PORT}`); + logger.log('info', `Server listening on port ${SERVER_PORT}`); } catch (error: any) { logger.log('error', `Failed to connect to the database: ${error.message}`); - process.exit(1); // Exit the process if DB connection fails + process.exit(1); } }); @@ -219,4 +222,4 @@ process.on('SIGINT', async () => { if (recordingWorkerProcess) recordingWorkerProcess.kill(); } process.exit(); -}); +}); \ No newline at end of file diff --git a/server/src/workflow-management/classes/Generator.ts b/server/src/workflow-management/classes/Generator.ts index 1be328aa..eb0e33a7 100644 --- a/server/src/workflow-management/classes/Generator.ts +++ b/server/src/workflow-management/classes/Generator.ts @@ -15,7 +15,7 @@ import { import { CustomActions } from "../../../../src/shared/types"; import Robot from "../../models/Robot"; import { getBestSelectorForAction } from "../utils"; -import { uuid } from "uuidv4"; +import { v4 as uuid } from "uuid"; import { capture } from "../../utils/analytics" import { decrypt, encrypt } from "../../utils/auth"; @@ -82,6 +82,7 @@ export class WorkflowGenerator { this.poolId = poolId; this.registerEventHandlers(socket); this.initializeSocketListeners(); + this.initializeDOMListeners(); } /** @@ -92,6 +93,8 @@ export class WorkflowGenerator { workflow: [], }; + private isDOMMode: boolean = false; + /** * Metadata of the currently recorded workflow. * @private @@ -134,6 +137,18 @@ export class WorkflowGenerator { }) } + private initializeDOMListeners() { + this.socket.on('dom-mode-enabled', () => { + this.isDOMMode = true; + logger.log('debug', 'Generator: DOM mode enabled'); + }); + + this.socket.on('screenshot-mode-enabled', () => { + this.isDOMMode = false; + logger.log('debug', 'Generator: Screenshot mode enabled'); + }); + } + /** * Registers the event handlers for all generator-related events on the socket. * @param socket The socket used to communicate with the client. @@ -159,9 +174,11 @@ export class WorkflowGenerator { switch (actionType) { case 'customAction': // pair.where.selectors = [this.generatedData.lastUsedSelector]; - pair.where.selectors = pair.where.selectors.filter( - (selector: string) => selector !== this.generatedData.lastUsedSelector - ); + if (pair.where.selectors) { + pair.where.selectors = pair.where.selectors.filter( + (selector: string) => selector !== this.generatedData.lastUsedSelector + ); + } break; default: break; } @@ -348,6 +365,96 @@ export class WorkflowGenerator { await this.addPairToWorkflowAndNotifyClient(pair, page); }; + // Handles click events on the DOM, generating a pair for the click action + public onDOMClickAction = async (page: Page, data: { + selector: string, + url: string, + userId: string, + elementInfo?: any, + coordinates?: { x: number, y: number } + }) => { + const { selector, url, elementInfo, coordinates } = data; + + const pair: WhereWhatPair = { + where: { + url: this.getBestUrl(url), + selectors: [selector] + }, + what: [{ + action: 'click', + args: [selector], + }], + }; + + // Handle special input elements with cursor positioning + if (elementInfo && coordinates && + (elementInfo.tagName === 'INPUT' || elementInfo.tagName === 'TEXTAREA')) { + pair.what[0] = { + action: 'click', + args: [selector, { position: coordinates }, { cursorIndex: 0 }], + }; + } + + this.generatedData.lastUsedSelector = selector; + this.generatedData.lastAction = 'click'; + + await this.addPairToWorkflowAndNotifyClient(pair, page); + }; + + // Handles keyboard actions on the DOM, generating a pair for the key press action + public onDOMKeyboardAction = async (page: Page, data: { + selector: string, + key: string, + url: string, + userId: string, + inputType?: string + }) => { + const { selector, key, url, inputType } = data; + + const pair: WhereWhatPair = { + where: { + url: this.getBestUrl(url), + selectors: [selector] + }, + what: [{ + action: 'press', + args: [selector, encrypt(key), inputType || 'text'], + }], + }; + + this.generatedData.lastUsedSelector = selector; + this.generatedData.lastAction = 'press'; + + await this.addPairToWorkflowAndNotifyClient(pair, page); + }; + + // Handles navigation events on the DOM, generating a pair for the navigation action + public onDOMNavigation = async (page: Page, data: { + url: string, + currentUrl: string, + userId: string + }) => { + const { url, currentUrl } = data; + + const pair: WhereWhatPair = { + where: { url: this.getBestUrl(currentUrl) }, + what: [{ + action: 'goto', + args: [url], + }], + }; + + this.generatedData.lastUsedSelector = ''; + await this.addPairToWorkflowAndNotifyClient(pair, page); + }; + + // Handles workflow pair events on the DOM + public onDOMWorkflowPair = async (page: Page, data: { pair: WhereWhatPair, userId: string }) => { + const { pair } = data; + + await this.addPairToWorkflowAndNotifyClient(pair, page); + }; + /** * Generates a pair for the click event. * @param coordinates The coordinates of the click event. @@ -708,6 +815,7 @@ export class WorkflowGenerator { this.socket = socket; this.registerEventHandlers(socket); this.initializeSocketListeners(); + this.initializeDOMListeners(); }; /** @@ -890,6 +998,7 @@ export class WorkflowGenerator { rect, selector: displaySelector, elementInfo, + isDOMMode: this.isDOMMode, // Include shadow DOM specific information shadowInfo: elementInfo?.isShadowRoot ? { mode: elementInfo.shadowRootMode, diff --git a/server/src/workflow-management/classes/Interpreter.ts b/server/src/workflow-management/classes/Interpreter.ts index 14877600..f249f26e 100644 --- a/server/src/workflow-management/classes/Interpreter.ts +++ b/server/src/workflow-management/classes/Interpreter.ts @@ -107,6 +107,11 @@ export class WorkflowInterpreter { */ public binaryData: { mimetype: string, data: string }[] = []; + /** + * Track current scrapeList index + */ + private currentScrapeListIndex: number = 0; + /** * An array of id's of the pairs from the workflow that are about to be paused. * As "breakpoints". @@ -288,6 +293,7 @@ export class WorkflowInterpreter { scrapeList: [], }; this.binaryData = []; + this.currentScrapeListIndex = 0; } /** @@ -322,6 +328,9 @@ export class WorkflowInterpreter { }, setActionType: (type: string) => { this.currentActionType = type; + }, + incrementScrapeListIndex: () => { + this.currentScrapeListIndex++; } }, serializableCallback: (data: any) => { @@ -334,7 +343,7 @@ export class WorkflowInterpreter { this.serializableDataByType.scrapeSchema.push([data]); } } else if (this.currentActionType === 'scrapeList') { - this.serializableDataByType.scrapeList.push(data); + this.serializableDataByType.scrapeList[this.currentScrapeListIndex] = data; } this.socket.emit('serializableCallback', data); @@ -372,25 +381,19 @@ export class WorkflowInterpreter { log: this.debugMessages, result: status, scrapeSchemaOutput: Object.keys(mergedScrapeSchema).length > 0 - ? { "schema-merged": [mergedScrapeSchema] } + ? { "schema_merged": [mergedScrapeSchema] } : this.serializableDataByType.scrapeSchema.reduce((reducedObject, item, index) => { - return { - [`schema-${index}`]: item, - ...reducedObject, - } - }, {}), + reducedObject[`schema_${index}`] = item; + return reducedObject; + }, {} as Record), scrapeListOutput: this.serializableDataByType.scrapeList.reduce((reducedObject, item, index) => { - return { - [`list-${index}`]: item, - ...reducedObject, - } - }, {}), + reducedObject[`list_${index}`] = item; + return reducedObject; + }, {} as Record), binaryOutput: this.binaryData.reduce((reducedObject, item, index) => { - return { - [`item-${index}`]: item, - ...reducedObject, - } - }, {}) + reducedObject[`item_${index}`] = item; + return reducedObject; + }, {} as Record) } logger.log('debug', `Interpretation finished`); diff --git a/server/src/workflow-management/integrations/airtable.ts b/server/src/workflow-management/integrations/airtable.ts index 401bc11d..5f72c836 100644 --- a/server/src/workflow-management/integrations/airtable.ts +++ b/server/src/workflow-management/integrations/airtable.ts @@ -44,65 +44,100 @@ async function refreshAirtableToken(refreshToken: string) { } } + function mergeRelatedData(serializableOutput: SerializableOutput, binaryOutput: Record) { - const mergedRecords: Record[] = []; + const allRecords: Record[] = []; - const maxLength = Math.max( - ...[ - ...(serializableOutput.scrapeSchema ?? []).map(arr => arr?.length ?? 0), - ...(serializableOutput.scrapeList ?? []).map(arr => arr?.length ?? 0), - 0 - ] - ); - - for (let i = 0; i < maxLength; i++) { - mergedRecords.push({}); - } + const schemaData: Array<{key: string, value: any}> = []; + const listData: any[] = []; + const screenshotData: Array<{key: string, url: string}> = []; + // Collect schema data if (serializableOutput.scrapeSchema) { for (const schemaArray of serializableOutput.scrapeSchema) { if (!Array.isArray(schemaArray)) continue; - - for (let i = 0; i < schemaArray.length; i++) { - if (i >= mergedRecords.length) break; - mergedRecords[i] = { ...mergedRecords[i], ...schemaArray[i] }; + for (const schemaItem of schemaArray) { + Object.entries(schemaItem).forEach(([key, value]) => { + if (key && key.trim() !== '' && value !== null && value !== undefined && value !== '') { + schemaData.push({key, value}); + } + }); } } } + // Collect list data if (serializableOutput.scrapeList) { for (const listArray of serializableOutput.scrapeList) { if (!Array.isArray(listArray)) continue; - - for (let i = 0; i < listArray.length; i++) { - if (i >= mergedRecords.length) break; - mergedRecords[i] = { ...mergedRecords[i], ...listArray[i] }; - } - } - } - - if (binaryOutput && Object.keys(binaryOutput).length > 0) { - for (let i = 0; i < mergedRecords.length; i++) { - const screenshotKey = `item-${i}`; - if (binaryOutput[screenshotKey]) { - mergedRecords[i].Screenshot = binaryOutput[screenshotKey]; - mergedRecords[i].Key = screenshotKey; - } - } - - for (const [key, url] of Object.entries(binaryOutput)) { - if (mergedRecords.some(record => record.Key === key)) { - continue; - } - - mergedRecords.push({ - "Key": key, - "Screenshot": url + listArray.forEach(listItem => { + const hasContent = Object.values(listItem).some(value => + value !== null && value !== undefined && value !== '' + ); + if (hasContent) { + listData.push(listItem); + } }); } } - return mergedRecords; + // Collect screenshot data + if (binaryOutput && Object.keys(binaryOutput).length > 0) { + Object.entries(binaryOutput).forEach(([key, url]) => { + if (key && key.trim() !== '' && url && url.trim() !== '') { + screenshotData.push({key, url}); + } + }); + } + + // Mix all data types together to create consecutive records + const maxLength = Math.max(schemaData.length, listData.length, screenshotData.length); + + for (let i = 0; i < maxLength; i++) { + const record: Record = {}; + + if (i < schemaData.length) { + record.Label = schemaData[i].key; + record.Value = schemaData[i].value; + } + + if (i < listData.length) { + Object.entries(listData[i]).forEach(([key, value]) => { + if (value !== null && value !== undefined && value !== '') { + record[key] = value; + } + }); + } + + if (i < screenshotData.length) { + record.Key = screenshotData[i].key; + record.Screenshot = screenshotData[i].url; + } + + if (Object.keys(record).length > 0) { + allRecords.push(record); + } + } + + for (let i = maxLength; i < schemaData.length; i++) { + allRecords.push({ + Label: schemaData[i].key, + Value: schemaData[i].value + }); + } + + for (let i = maxLength; i < listData.length; i++) { + allRecords.push(listData[i]); + } + + for (let i = maxLength; i < screenshotData.length; i++) { + allRecords.push({ + Key: screenshotData[i].key, + Screenshot: screenshotData[i].url + }); + } + + return allRecords; } export async function updateAirtable(robotId: string, runId: string) { @@ -210,11 +245,13 @@ export async function writeDataToAirtable( const airtable = new Airtable({ apiKey: accessToken }); const base = airtable.base(baseId); + await deleteEmptyRecords(base, tableName); + const processedData = data.map(item => { const cleanedItem: Record = {}; for (const [key, value] of Object.entries(item)) { - if (value === null || value === undefined) { + if (value === null || value === undefined || value === '') { cleanedItem[key] = ''; } else if (typeof value === 'object' && !Array.isArray(value)) { cleanedItem[key] = JSON.stringify(value); @@ -224,113 +261,55 @@ export async function writeDataToAirtable( } return cleanedItem; + }).filter(record => { + return Object.values(record).some(value => value !== null && value !== undefined && value !== ''); }); - const existingFields = await getExistingFields(base, tableName); - console.log(`Found ${existingFields.length} existing fields in Airtable: ${existingFields.join(', ')}`); + if (processedData.length === 0) { + console.log('No valid data to write after filtering. Skipping.'); + return; + } - const dataFields = [...new Set(processedData.flatMap(row => Object.keys(row)))]; + const dataFields = [...new Set(processedData.flatMap(row => Object.keys(row)))]; console.log(`Found ${dataFields.length} fields in data: ${dataFields.join(', ')}`); + const existingFields = await getExistingFields(base, tableName); const missingFields = dataFields.filter(field => !existingFields.includes(field)); - const hasNewColumns = missingFields.length > 0; - console.log(`Found ${missingFields.length} new fields: ${missingFields.join(', ')}`); - - for (const field of missingFields) { - const sampleRow = processedData.find(row => field in row); - if (sampleRow) { - const sampleValue = sampleRow[field]; - try { - await createAirtableField(baseId, tableName, field, sampleValue, accessToken, tableId); - console.log(`Successfully created field: ${field}`); - - await new Promise(resolve => setTimeout(resolve, 200)); - } catch (fieldError: any) { - console.warn(`Warning: Could not create field "${field}": ${fieldError.message}`); - } - } - } - let existingRecords: Array<{ id: string, fields: Record }> = []; - - if (hasNewColumns) { - existingRecords = await fetchAllRecords(base, tableName); - console.log(`Found ${existingRecords.length} existing records in Airtable`); - } - - if (hasNewColumns && existingRecords.length > 0) { - const recordsToUpdate = []; - const recordsToCreate = []; + if (missingFields.length > 0) { + console.log(`Creating ${missingFields.length} new fields: ${missingFields.join(', ')}`); - const newColumnData = processedData.map(record => { - const newColumnsOnly: Record = {}; - missingFields.forEach(field => { - if (field in record) { - newColumnsOnly[field] = record[field]; - } - }); - return newColumnsOnly; - }); - - for (let i = 0; i < Math.min(existingRecords.length, newColumnData.length); i++) { - if (Object.keys(newColumnData[i]).length > 0) { - recordsToUpdate.push({ - id: existingRecords[i].id, - fields: newColumnData[i] - }); - } - } - - const existingColumnsBeingUpdated = dataFields.filter(field => - existingFields.includes(field) && !missingFields.includes(field) - ); - - if (existingColumnsBeingUpdated.length > 0) { - recordsToCreate.push(...processedData.map(record => ({ fields: record }))); - console.log(`Will append ${recordsToCreate.length} new records with all data`); - } else { - if (processedData.length > existingRecords.length) { - const additionalRecords = processedData.slice(existingRecords.length); - recordsToCreate.push(...additionalRecords.map(record => ({ fields: record }))); - console.log(`Will append ${recordsToCreate.length} additional records`); - } - } - - if (recordsToUpdate.length > 0) { - console.log(`Updating ${recordsToUpdate.length} existing records with new columns`); - const BATCH_SIZE = 10; - for (let i = 0; i < recordsToUpdate.length; i += BATCH_SIZE) { - const batch = recordsToUpdate.slice(i, i + BATCH_SIZE); - console.log(`Updating batch ${Math.floor(i/BATCH_SIZE) + 1} of ${Math.ceil(recordsToUpdate.length/BATCH_SIZE)}`); - + for (const field of missingFields) { + const sampleRow = processedData.find(row => field in row && row[field] !== ''); + if (sampleRow) { + const sampleValue = sampleRow[field]; try { - await retryableAirtableUpdate(base, tableName, batch); - } catch (batchError: any) { - console.error(`Error updating batch: ${batchError.message}`); - throw batchError; + await createAirtableField(baseId, tableName, field, sampleValue, accessToken, tableId); + console.log(`Successfully created field: ${field}`); + await new Promise(resolve => setTimeout(resolve, 200)); + } catch (fieldError: any) { + console.warn(`Warning: Could not create field "${field}": ${fieldError.message}`); } - - await new Promise(resolve => setTimeout(resolve, 500)); } } - } else { - console.log(`Appending all ${processedData.length} records to Airtable`); - const recordsToCreate = processedData.map(record => ({ fields: record })); + } + + console.log(`Appending all ${processedData.length} records to Airtable`); + const recordsToCreate = processedData.map(record => ({ fields: record })); + + const BATCH_SIZE = 10; + for (let i = 0; i < recordsToCreate.length; i += BATCH_SIZE) { + const batch = recordsToCreate.slice(i, i + BATCH_SIZE); + console.log(`Creating batch ${Math.floor(i/BATCH_SIZE) + 1} of ${Math.ceil(recordsToCreate.length/BATCH_SIZE)}`); - const BATCH_SIZE = 10; - for (let i = 0; i < recordsToCreate.length; i += BATCH_SIZE) { - const batch = recordsToCreate.slice(i, i + BATCH_SIZE); - console.log(`Creating batch ${Math.floor(i/BATCH_SIZE) + 1} of ${Math.ceil(recordsToCreate.length/BATCH_SIZE)}`); - - try { - await retryableAirtableCreate(base, tableName, batch); - } catch (batchError: any) { - console.error(`Error creating batch: ${batchError.message}`); - throw batchError; - } - - await new Promise(resolve => setTimeout(resolve, 500)); + try { + await retryableAirtableCreate(base, tableName, batch); + } catch (batchError: any) { + console.error(`Error creating batch: ${batchError.message}`); + throw batchError; } + + await new Promise(resolve => setTimeout(resolve, 500)); } await deleteEmptyRecords(base, tableName); @@ -343,20 +322,6 @@ export async function writeDataToAirtable( } } -async function fetchAllRecords(base: Airtable.Base, tableName: string): Promise }>> { - try { - console.log(`Fetching all records from ${tableName}...`); - const records = await base(tableName).select().all(); - return records.map(record => ({ - id: record.id, - fields: record.fields - })); - } catch (error: any) { - console.warn(`Warning: Could not fetch all records: ${error.message}`); - return []; - } -} - async function deleteEmptyRecords(base: Airtable.Base, tableName: string): Promise { console.log('Checking for empty records to clear...'); @@ -407,23 +372,6 @@ async function retryableAirtableCreate( } } -async function retryableAirtableUpdate( - base: Airtable.Base, - tableName: string, - batch: any[], - retries = MAX_RETRIES -): Promise { - try { - await base(tableName).update(batch); - } catch (error) { - if (retries > 0) { - await new Promise(resolve => setTimeout(resolve, BASE_API_DELAY)); - return retryableAirtableUpdate(base, tableName, batch, retries - 1); - } - throw error; - } -} - // Helper functions async function getExistingFields(base: Airtable.Base, tableName: string): Promise { try { diff --git a/server/src/workflow-management/integrations/gsheet.ts b/server/src/workflow-management/integrations/gsheet.ts index 402ace5d..2a29bdcc 100644 --- a/server/src/workflow-management/integrations/gsheet.ts +++ b/server/src/workflow-management/integrations/gsheet.ts @@ -114,7 +114,16 @@ async function processOutputType( await ensureSheetExists(spreadsheetId, sheetName, robotConfig); - await writeDataToSheet(robotId, spreadsheetId, data, sheetName, robotConfig); + let formattedData = data; + if (outputType === 'Text' && data.length > 0) { + const schemaItem = data[0]; + formattedData = Object.entries(schemaItem).map(([key, value]) => ({ + Label: key, + Value: value + })); + } + + await writeDataToSheet(robotId, spreadsheetId, formattedData, sheetName, robotConfig); console.log(`Data written to ${sheetName} sheet for ${outputType} data`); } } diff --git a/server/src/workflow-management/scheduler/index.ts b/server/src/workflow-management/scheduler/index.ts index 1559e63c..b40e55f2 100644 --- a/server/src/workflow-management/scheduler/index.ts +++ b/server/src/workflow-management/scheduler/index.ts @@ -1,4 +1,4 @@ -import { uuid } from "uuidv4"; +import { v4 as uuid } from "uuid"; import { chromium } from 'playwright-extra'; import stealthPlugin from 'puppeteer-extra-plugin-stealth'; import { io, Socket } from "socket.io-client"; @@ -13,6 +13,8 @@ import { BinaryOutputService } from "../../storage/mino"; import { capture } from "../../utils/analytics"; import { WorkflowFile } from "maxun-core"; import { Page } from "playwright"; +import { sendWebhook } from "../../routes/webhook"; +import { airtableUpdateTasks, processAirtableUpdates } from "../integrations/airtable"; chromium.use(stealthPlugin()); async function createWorkflowAndStoreMetadata(id: string, userId: string) { @@ -152,26 +154,34 @@ async function executeRun(id: string, userId: string) { binaryOutput: uploadedBinaryOutput, }); - let totalRowsExtracted = 0; + // Track extraction metrics + let totalSchemaItemsExtracted = 0; + let totalListItemsExtracted = 0; let extractedScreenshotsCount = 0; - let extractedItemsCount = 0; - - if (run.dataValues.binaryOutput && run.dataValues.binaryOutput["item-0"]) { - extractedScreenshotsCount = 1; + + if (categorizedOutput.scrapeSchema) { + Object.values(categorizedOutput.scrapeSchema).forEach((schemaResult: any) => { + if (Array.isArray(schemaResult)) { + totalSchemaItemsExtracted += schemaResult.length; + } else if (schemaResult && typeof schemaResult === 'object') { + totalSchemaItemsExtracted += 1; + } + }); } - - if (run.dataValues.serializableOutput && run.dataValues.serializableOutput["item-0"]) { - const itemsArray = run.dataValues.serializableOutput["item-0"]; - extractedItemsCount = itemsArray.length; - - totalRowsExtracted = itemsArray.reduce((total, item) => { - return total + Object.keys(item).length; - }, 0); + + if (categorizedOutput.scrapeList) { + Object.values(categorizedOutput.scrapeList).forEach((listResult: any) => { + if (Array.isArray(listResult)) { + totalListItemsExtracted += listResult.length; + } + }); } - - console.log(`Extracted Items Count: ${extractedItemsCount}`); - console.log(`Extracted Screenshots Count: ${extractedScreenshotsCount}`); - console.log(`Total Rows Extracted: ${totalRowsExtracted}`); + + if (uploadedBinaryOutput) { + extractedScreenshotsCount = Object.keys(uploadedBinaryOutput).length; + } + + const totalRowsExtracted = totalSchemaItemsExtracted + totalListItemsExtracted; capture( 'maxun-oss-run-created-scheduled', @@ -180,18 +190,60 @@ async function executeRun(id: string, userId: string) { created_at: new Date().toISOString(), status: 'success', totalRowsExtracted, - extractedItemsCount, + schemaItemsExtracted: totalSchemaItemsExtracted, + listItemsExtracted: totalListItemsExtracted, extractedScreenshotsCount, } ); - googleSheetUpdateTasks[id] = { - robotId: plainRun.robotMetaId, - runId: id, - status: 'pending', - retries: 5, + const webhookPayload = { + robot_id: plainRun.robotMetaId, + run_id: plainRun.runId, + robot_name: recording.recording_meta.name, + status: 'success', + started_at: plainRun.startedAt, + finished_at: new Date().toLocaleString(), + extracted_data: { + captured_texts: Object.values(categorizedOutput.scrapeSchema).flat() || [], + captured_lists: categorizedOutput.scrapeList, + total_rows: totalRowsExtracted, + captured_texts_count: totalSchemaItemsExtracted, + captured_lists_count: totalListItemsExtracted, + screenshots_count: extractedScreenshotsCount + }, + metadata: { + browser_id: plainRun.browserId, + user_id: userId + } }; - processGoogleSheetUpdates(); + + try { + await sendWebhook(plainRun.robotMetaId, 'run_completed', webhookPayload); + logger.log('info', `Webhooks sent successfully for completed run ${plainRun.runId}`); + } catch (webhookError: any) { + logger.log('error', `Failed to send webhooks for run ${plainRun.runId}: ${webhookError.message}`); + } + + try { + googleSheetUpdateTasks[plainRun.runId] = { + robotId: plainRun.robotMetaId, + runId: plainRun.runId, + status: 'pending', + retries: 5, + }; + + airtableUpdateTasks[plainRun.runId] = { + robotId: plainRun.robotMetaId, + runId: plainRun.runId, + status: 'pending', + retries: 5, + }; + + processAirtableUpdates(); + processGoogleSheetUpdates(); + } catch (err: any) { + logger.log('error', `Failed to update Google Sheet for run: ${plainRun.runId}: ${err.message}`); + } return true; } catch (error: any) { logger.log('info', `Error while running a robot with id: ${id} - ${error.message}`); @@ -202,6 +254,34 @@ async function executeRun(id: string, userId: string) { status: 'failed', finishedAt: new Date().toLocaleString(), }); + + const recording = await Robot.findOne({ where: { 'recording_meta.id': run.robotMetaId }, raw: true }); + + // Trigger webhooks for run failure + const failedWebhookPayload = { + robot_id: run.robotMetaId, + run_id: run.runId, + robot_name: recording ? recording.recording_meta.name : 'Unknown Robot', + status: 'failed', + started_at: run.startedAt, + finished_at: new Date().toLocaleString(), + error: { + message: error.message, + stack: error.stack, + type: error.name || 'ExecutionError' + }, + metadata: { + browser_id: run.browserId, + user_id: userId, + } + }; + + try { + await sendWebhook(run.robotMetaId, 'run_failed', failedWebhookPayload); + logger.log('info', `Failure webhooks sent successfully for run ${run.runId}`); + } catch (webhookError: any) { + logger.log('error', `Failed to send failure webhooks for run ${run.runId}: ${webhookError.message}`); + } } capture( 'maxun-oss-run-created-scheduled', diff --git a/server/tsconfig.mcp.json b/server/tsconfig.mcp.json new file mode 100644 index 00000000..d799017b --- /dev/null +++ b/server/tsconfig.mcp.json @@ -0,0 +1,23 @@ +{ + "compilerOptions": { + "target": "ES2022", + "module": "Node16", + "moduleResolution": "Node16", + "outDir": "../dist", + "rootDir": "./src", + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "forceConsistentCasingInFileNames": true, + "declaration": true, + "declarationMap": true, + "sourceMap": true + }, + "include": [ + "src/mcp-worker.ts" + ], + "exclude": [ + "node_modules", + "dist" + ] +} \ No newline at end of file diff --git a/src/api/recording.ts b/src/api/recording.ts index 6b816001..ce07753a 100644 --- a/src/api/recording.ts +++ b/src/api/recording.ts @@ -35,6 +35,19 @@ export const getActiveBrowserId = async(): Promise => { } }; +export const canCreateBrowserInState = async(state: "recording" | "run"): Promise => { + try { + const response = await axios.get(`${apiUrl}/record/can-create/${state}`, { withCredentials: true }); + if (response.status === 200) { + return response.data.canCreate; + } else { + return false; + } + } catch(error: any) { + return false; + } +}; + export const interpretCurrentRecording = async(): Promise => { try { const response = await axios.get(`${apiUrl}/record/interpret`); diff --git a/src/api/storage.ts b/src/api/storage.ts index 295f340c..d3fa3eb8 100644 --- a/src/api/storage.ts +++ b/src/api/storage.ts @@ -154,6 +154,27 @@ export const editRecordingFromStorage = async (browserId: string, id: string): P } }; +export interface CreateRunResponseWithQueue extends CreateRunResponse { + queued?: boolean; +} + +export const createAndRunRecording = async (id: string, settings: RunSettings): Promise => { + try { + const response = await axios.put( + `${apiUrl}/storage/runs/${id}`, + { ...settings, withCredentials: true } + ); + if (response.status === 200) { + return response.data; + } else { + throw new Error(`Couldn't create and run recording ${id}`); + } + } catch (error: any) { + console.log(error); + return { browserId: '', runId: '', robotMetaId: '', queued: false }; + } +} + export const createRunForStoredRecording = async (id: string, settings: RunSettings): Promise => { try { const response = await axios.put( @@ -184,20 +205,24 @@ export const interpretStoredRecording = async (id: string): Promise => } } -export const notifyAboutAbort = async (id: string): Promise => { +export const notifyAboutAbort = async (id: string): Promise<{ success: boolean; isQueued?: boolean }> => { try { - const response = await axios.post(`${apiUrl}/storage/runs/abort/${id}`); + const response = await axios.post(`${apiUrl}/storage/runs/abort/${id}`, { withCredentials: true }); if (response.status === 200) { - return response.data; + return { + success: response.data.success, + isQueued: response.data.isQueued + }; } else { throw new Error(`Couldn't abort a running recording with id ${id}`); } } catch (error: any) { console.log(error); - return false; + return { success: false }; } } + export const scheduleStoredRecording = async (id: string, settings: ScheduleSettings): Promise => { try { const response = await axios.put( diff --git a/src/api/webhook.ts b/src/api/webhook.ts new file mode 100644 index 00000000..ad5c75dc --- /dev/null +++ b/src/api/webhook.ts @@ -0,0 +1,149 @@ +import { default as axios } from "axios"; +import { apiUrl } from "../apiConfig"; + +export interface WebhookConfig { + id: string; + url: string; + events: string[]; + active: boolean; + createdAt?: string; + updatedAt?: string; + lastCalledAt?: string | null; + retryAttempts?: number; + retryDelay?: number; + timeout?: number; +} + +export interface WebhookResponse { + ok: boolean; + message?: string; + webhook?: WebhookConfig; + webhooks?: WebhookConfig[]; + error?: string; + details?: any; +} + +export const addWebhook = async (webhook: WebhookConfig, robotId: string): Promise => { + try { + const response = await axios.post(`${apiUrl}/webhook/add`, { + webhook, + robotId + }, { withCredentials: true }); + + if (response.status === 200) { + return response.data; + } else { + throw new Error(`Failed to add webhook. Status code: ${response.status}`); + } + } catch (error: any) { + console.error('Error adding webhook:', error.message || error); + return { + ok: false, + error: error.response?.data?.message || error.message || 'Failed to add webhook' + }; + } +}; + +export const updateWebhook = async (webhook: WebhookConfig, robotId: string): Promise => { + try { + const response = await axios.post(`${apiUrl}/webhook/update`, { + webhook, + robotId + }, { withCredentials: true }); + + if (response.status === 200) { + return response.data; + } else { + throw new Error(`Failed to update webhook. Status code: ${response.status}`); + } + } catch (error: any) { + console.error('Error updating webhook:', error.message || error); + return { + ok: false, + error: error.response?.data?.message || error.message || 'Failed to update webhook' + }; + } +}; + +export const removeWebhook = async (webhookId: string, robotId: string): Promise => { + try { + const response = await axios.post(`${apiUrl}/webhook/remove`, { + webhookId, + robotId + }, { withCredentials: true }); + + if (response.status === 200) { + return response.data; + } else { + throw new Error(`Failed to remove webhook. Status code: ${response.status}`); + } + } catch (error: any) { + console.error('Error removing webhook:', error.message || error); + return { + ok: false, + error: error.response?.data?.message || error.message || 'Failed to remove webhook' + }; + } +}; + +export const getWebhooks = async (robotId: string): Promise => { + try { + const response = await axios.get(`${apiUrl}/webhook/list/${robotId}`, { + withCredentials: true + }); + + if (response.status === 200) { + return response.data; + } else { + throw new Error(`Failed to fetch webhooks. Status code: ${response.status}`); + } + } catch (error: any) { + console.error('Error fetching webhooks:', error.message || error); + return { + ok: false, + error: error.response?.data?.message || error.message || 'Failed to fetch webhooks', + webhooks: [] + }; + } +}; + +export const testWebhook = async (webhook: WebhookConfig, robotId: string): Promise => { + try { + const response = await axios.post(`${apiUrl}/webhook/test`, { + webhook, + robotId + }, { withCredentials: true }); + + if (response.status === 200) { + return response.data; + } else { + throw new Error(`Failed to test webhook. Status code: ${response.status}`); + } + } catch (error: any) { + console.error('Error testing webhook:', error.message || error); + return { + ok: false, + error: error.response?.data?.message || error.message || 'Failed to test webhook' + }; + } +}; + +export const clearAllWebhooks = async (robotId: string): Promise => { + try { + const response = await axios.delete(`${apiUrl}/webhook/clear/${robotId}`, { + withCredentials: true + }); + + if (response.status === 200) { + return response.data; + } else { + throw new Error(`Failed to clear webhooks. Status code: ${response.status}`); + } + } catch (error: any) { + console.error('Error clearing webhooks:', error.message || error); + return { + ok: false, + error: error.response?.data?.message || error.message || 'Failed to clear webhooks' + }; + } +}; \ No newline at end of file diff --git a/src/components/action/action-settings/scrape.tsx b/src/components/action/action-settings/Scrape.tsx similarity index 100% rename from src/components/action/action-settings/scrape.tsx rename to src/components/action/action-settings/Scrape.tsx diff --git a/src/components/action/action-settings/scrapeSchema.tsx b/src/components/action/action-settings/ScrapeSchema.tsx similarity index 100% rename from src/components/action/action-settings/scrapeSchema.tsx rename to src/components/action/action-settings/ScrapeSchema.tsx diff --git a/src/components/action/action-settings/screenshot.tsx b/src/components/action/action-settings/Screenshot.tsx similarity index 100% rename from src/components/action/action-settings/screenshot.tsx rename to src/components/action/action-settings/Screenshot.tsx diff --git a/src/components/action/action-settings/scroll.tsx b/src/components/action/action-settings/Scroll.tsx similarity index 100% rename from src/components/action/action-settings/scroll.tsx rename to src/components/action/action-settings/Scroll.tsx diff --git a/src/components/action/action-settings/index.ts b/src/components/action/action-settings/index.ts index 58e3f3c4..09e571d1 100644 --- a/src/components/action/action-settings/index.ts +++ b/src/components/action/action-settings/index.ts @@ -1,7 +1,7 @@ -import { ScrollSettings } from './scroll'; -import { ScreenshotSettings } from "./screenshot"; -import { ScrapeSettings } from "./scrape"; -import { ScrapeSchemaSettings } from "./scrapeSchema"; +import { ScrollSettings } from './Scroll'; +import { ScreenshotSettings } from "./Screenshot"; +import { ScrapeSettings } from "./Scrape"; +import { ScrapeSchemaSettings } from "./ScrapeSchema"; export { ScrollSettings, diff --git a/src/components/api/ApiKey.tsx b/src/components/api/ApiKey.tsx index d3b3ed9f..9feb9551 100644 --- a/src/components/api/ApiKey.tsx +++ b/src/components/api/ApiKey.tsx @@ -108,7 +108,21 @@ const ApiKeyManager = () => { return ( - + + Start by creating an API key below. Then, + + test your API + + or read the + API documentation + for setup instructions. + + {t('apikey.title')} {apiKey ? ( diff --git a/src/components/browser/BrowserContent.tsx b/src/components/browser/BrowserContent.tsx index 14b9385e..46a8886d 100644 --- a/src/components/browser/BrowserContent.tsx +++ b/src/components/browser/BrowserContent.tsx @@ -130,7 +130,7 @@ export const BrowserContent = () => { } }) .catch((error) => { - console.log("Fetching current url failed"); + console.log(`Fetching current url failed: ${error}`); }); }, []); diff --git a/src/components/browser/BrowserNavBar.tsx b/src/components/browser/BrowserNavBar.tsx index a9a1a8d0..a06b7b4e 100644 --- a/src/components/browser/BrowserNavBar.tsx +++ b/src/components/browser/BrowserNavBar.tsx @@ -3,7 +3,7 @@ import styled from 'styled-components'; import ReplayIcon from '@mui/icons-material/Replay'; import ArrowBackIcon from '@mui/icons-material/ArrowBack'; import ArrowForwardIcon from '@mui/icons-material/ArrowForward'; -import { NavBarButton } from '../ui/buttons/buttons'; +import { NavBarButton } from '../ui/buttons/Buttons'; import { UrlForm } from './UrlForm'; import { useCallback, useEffect } from "react"; import { useSocketStore } from "../../context/socket"; @@ -63,7 +63,7 @@ const BrowserNavBar: FC = ({ handleUrlChanged(response); } }).catch((error) => { - console.log("Fetching current url failed"); + console.log(`Fetching current url failed: ${error}`); }) }, []); diff --git a/src/components/browser/BrowserWindow.tsx b/src/components/browser/BrowserWindow.tsx index 83750f38..5bb84101 100644 --- a/src/components/browser/BrowserWindow.tsx +++ b/src/components/browser/BrowserWindow.tsx @@ -1,7 +1,7 @@ -import React, { useCallback, useContext, useEffect, useState } from 'react'; +import React, { useCallback, useContext, useEffect, useRef, useState } from 'react'; import { useSocketStore } from '../../context/socket'; import { Button } from '@mui/material'; -import Canvas from "../recorder/canvas"; +import Canvas from "../recorder/Canvas"; import { Highlighter } from "../recorder/Highlighter"; import { GenericModal } from '../ui/GenericModal'; import { useActionContext } from '../../context/browserActions'; @@ -11,6 +11,12 @@ import { useTranslation } from 'react-i18next'; import { AuthContext } from '../../context/auth'; import { coordinateMapper } from '../../helpers/coordinateMapper'; import { useBrowserDimensionsStore } from '../../context/browserDimensions'; +import { clientSelectorGenerator, ElementFingerprint } from "../../helpers/clientSelectorGenerator"; +import DatePicker from "../pickers/DatePicker"; +import Dropdown from "../pickers/Dropdown"; +import TimePicker from "../pickers/TimePicker"; +import DateTimeLocalPicker from "../pickers/DateTimeLocalPicker"; +import { DOMBrowserRenderer } from '../recorder/DOMBrowserRenderer'; interface ElementInfo { tagName: string; @@ -23,6 +29,7 @@ interface ElementInfo { attributes?: Record; innerHTML?: string; outerHTML?: string; + isDOMMode?: boolean; } interface AttributeOption { @@ -41,6 +48,73 @@ interface ViewportInfo { height: number; } +interface RRWebSnapshot { + type: number; + childNodes?: RRWebSnapshot[]; + tagName?: string; + attributes?: Record; + textContent: string; + id: number; + [key: string]: any; +} + +interface ProcessedSnapshot { + snapshot: RRWebSnapshot; + resources: { + stylesheets: Array<{ + href: string; + content: string; + media?: string; + }>; + images: Array<{ + src: string; + dataUrl: string; + alt?: string; + }>; + fonts: Array<{ + url: string; + dataUrl: string; + format?: string; + }>; + scripts: Array<{ + src: string; + content: string; + type?: string; + }>; + media: Array<{ + src: string; + dataUrl: string; + type: string; + }>; + }; + baseUrl: string; + viewport: { width: number; height: number }; + timestamp: number; + processingStats: { + totalReplacements: number; + discoveredResources: { + images: number; + stylesheets: number; + scripts: number; + fonts: number; + media: number; + }; + cachedResources: { + stylesheets: number; + images: number; + fonts: number; + scripts: number; + media: number; + }; + totalCacheSize: number; + }; +} + +interface RRWebDOMCastData { + snapshotData: ProcessedSnapshot; + userId: string; + timestamp: number; +} const getAttributeOptions = (tagName: string, elementInfo: ElementInfo | null): AttributeOption[] => { if (!elementInfo) return []; @@ -73,42 +147,1074 @@ export const BrowserWindow = () => { const { browserWidth, browserHeight } = useBrowserDimensionsStore(); const [canvasRef, setCanvasReference] = useState | undefined>(undefined); const [screenShot, setScreenShot] = useState(""); - const [highlighterData, setHighlighterData] = useState<{ rect: DOMRect, selector: string, elementInfo: ElementInfo | null, childSelectors?: string[] } | null>(null); + const [highlighterData, setHighlighterData] = useState<{ + rect: DOMRect; + selector: string; + elementInfo: ElementInfo | null; + isShadow?: boolean; + childSelectors?: string[]; + groupElements?: Array<{ element: HTMLElement; rect: DOMRect }>; + similarElements?: { + elements: HTMLElement[]; + rects: DOMRect[]; + }; + } | null>(null); const [showAttributeModal, setShowAttributeModal] = useState(false); const [attributeOptions, setAttributeOptions] = useState([]); const [selectedElement, setSelectedElement] = useState<{ selector: string, info: ElementInfo | null } | null>(null); const [currentListId, setCurrentListId] = useState(null); const [viewportInfo, setViewportInfo] = useState({ width: browserWidth, height: browserHeight }); - + const [isLoading, setIsLoading] = useState(false); + const [cachedChildSelectors, setCachedChildSelectors] = useState([]); + const [processingGroupCoordinates, setProcessingGroupCoordinates] = useState>([]); const [listSelector, setListSelector] = useState(null); const [fields, setFields] = useState>({}); const [paginationSelector, setPaginationSelector] = useState(''); + const highlighterUpdateRef = useRef(0); + const [isCachingChildSelectors, setIsCachingChildSelectors] = useState(false); + const [cachedListSelector, setCachedListSelector] = useState( + null + ); + const [pendingNotification, setPendingNotification] = useState<{ + type: "error" | "warning" | "info" | "success"; + message: string; + count?: number; + } | null>(null); + const { socket } = useSocketStore(); - const { notify } = useGlobalInfoStore(); + const { notify, currentTextActionId, currentListActionId, updateDOMMode, isDOMMode, currentSnapshot } = useGlobalInfoStore(); const { getText, getList, paginationMode, paginationType, limitMode, captureStage } = useActionContext(); - const { addTextStep, addListStep, updateListStepData } = useBrowserSteps(); + const { addTextStep, addListStep } = useBrowserSteps(); + + const [currentGroupInfo, setCurrentGroupInfo] = useState<{ + isGroupElement: boolean; + groupSize: number; + groupElements: HTMLElement[]; + } | null>(null); const { state } = useContext(AuthContext); const { user } = state; + const [datePickerInfo, setDatePickerInfo] = useState<{ + coordinates: { x: number; y: number }; + selector: string; + } | null>(null); + + const [dropdownInfo, setDropdownInfo] = useState<{ + coordinates: { x: number; y: number }; + selector: string; + options: Array<{ + value: string; + text: string; + disabled: boolean; + selected: boolean; + }>; + } | null>(null); + + const [timePickerInfo, setTimePickerInfo] = useState<{ + coordinates: { x: number; y: number }; + selector: string; + } | null>(null); + + const [dateTimeLocalInfo, setDateTimeLocalInfo] = useState<{ + coordinates: { x: number; y: number }; + selector: string; + } | null>(null); + const dimensions = { width: browserWidth, height: browserHeight }; + const handleShowDatePicker = useCallback( + (info: { coordinates: { x: number; y: number }; selector: string }) => { + setDatePickerInfo(info); + }, + [] + ); + + const handleShowDropdown = useCallback( + (info: { + coordinates: { x: number; y: number }; + selector: string; + options: Array<{ + value: string; + text: string; + disabled: boolean; + selected: boolean; + }>; + }) => { + setDropdownInfo(info); + }, + [] + ); + + const handleShowTimePicker = useCallback( + (info: { coordinates: { x: number; y: number }; selector: string }) => { + setTimePickerInfo(info); + }, + [] + ); + + const handleShowDateTimePicker = useCallback( + (info: { coordinates: { x: number; y: number }; selector: string }) => { + setDateTimeLocalInfo(info); + }, + [] + ); + + const rrwebSnapshotHandler = useCallback( + (data: RRWebDOMCastData) => { + if (!data.userId || data.userId === user?.id) { + if (data.snapshotData && data.snapshotData.snapshot) { + updateDOMMode(true, data.snapshotData); + socket?.emit("dom-mode-enabled"); + setIsLoading(false); + } else { + setIsLoading(false); + } + } + }, + [user?.id, socket, updateDOMMode] + ); + + const domModeHandler = useCallback( + (data: any) => { + if (!data.userId || data.userId === user?.id) { + updateDOMMode(true); + socket?.emit("dom-mode-enabled"); + setIsLoading(false); + } + }, + [user?.id, socket, updateDOMMode] + ); + + const domModeErrorHandler = useCallback( + (data: any) => { + if (!data.userId || data.userId === user?.id) { + updateDOMMode(false); + setIsLoading(false); + } + }, + [user?.id, updateDOMMode] + ); + + useEffect(() => { + if (isDOMMode) { + clientSelectorGenerator.setGetList(getList); + clientSelectorGenerator.setListSelector(listSelector || ""); + clientSelectorGenerator.setPaginationMode(paginationMode); + } + }, [isDOMMode, getList, listSelector, paginationMode]); + + const createFieldsFromChildSelectors = useCallback( + (childSelectors: string[], listSelector: string) => { + if (!childSelectors.length || !currentSnapshot) return {}; + + const iframeElement = document.querySelector( + "#dom-browser-iframe" + ) as HTMLIFrameElement; + + if (!iframeElement?.contentDocument) return {}; + + const candidateFields: Array<{ + id: number; + field: TextStep; + element: HTMLElement; + isLeaf: boolean; + depth: number; + position: { x: number; y: number }; + }> = []; + + const uniqueChildSelectors = [...new Set(childSelectors)]; + + // Filter child selectors that occur in at least 2 out of first 10 list elements + const validateChildSelectors = (selectors: string[]): string[] => { + try { + // Get first 10 list elements + const listElements = evaluateXPathAllWithShadowSupport( + iframeElement.contentDocument!, + listSelector, + listSelector.includes(">>") || listSelector.startsWith("//") + ).slice(0, 10); + + if (listElements.length < 2) { + return selectors; + } + + const validSelectors: string[] = []; + + for (const selector of selectors) { + // First, try to access the element directly + try { + const testElement = iframeElement.contentDocument!.evaluate( + selector, + iframeElement.contentDocument!, + null, + XPathResult.FIRST_ORDERED_NODE_TYPE, + null + ).singleNodeValue; + + // If we can't access the element, it's likely in shadow DOM - include it + if (!testElement) { + console.log(`Including potentially shadow DOM selector: ${selector}`); + validSelectors.push(selector); + continue; + } + } catch (accessError) { + // If there's an error accessing, assume shadow DOM and include it + console.log(`Including selector due to access error: ${selector}`); + validSelectors.push(selector); + continue; + } + + let occurrenceCount = 0; + + // Get all elements that match this child selector + const childElements = evaluateXPathAllWithShadowSupport( + iframeElement.contentDocument!, + selector, + selector.includes(">>") || selector.startsWith("//") + ); + + // Check how many of these child elements are contained within our list elements + for (const childElement of childElements) { + for (const listElement of listElements) { + if (listElement.contains(childElement)) { + occurrenceCount++; + break; + } + } + } + + // Only include selectors that occur in at least 2 list elements + if (occurrenceCount >= 2) { + validSelectors.push(selector); + } + } + + return validSelectors; + } catch (error) { + console.warn("Failed to validate child selectors:", error); + return selectors; + } + }; + + // Enhanced XPath evaluation for multiple elements + const evaluateXPathAllWithShadowSupport = ( + document: Document, + xpath: string, + isShadow: boolean = false + ): Element[] => { + try { + // First try regular XPath evaluation + const result = document.evaluate( + xpath, + document, + null, + XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, + null + ); + + const elements: Element[] = []; + for (let i = 0; i < result.snapshotLength; i++) { + const node = result.snapshotItem(i); + if (node && node.nodeType === Node.ELEMENT_NODE) { + elements.push(node as Element); + } + } + + if (!isShadow || elements.length > 0) { + return elements; + } + + // If shadow DOM is indicated and regular XPath fails, use shadow DOM traversal + // This is a simplified version - for multiple elements, we'll primarily rely on regular XPath + return elements; + } catch (err) { + console.error("XPath evaluation failed:", xpath, err); + return []; + } + }; + + const validatedChildSelectors = validateChildSelectors(uniqueChildSelectors); + + const isElementVisible = (element: HTMLElement): boolean => { + try { + const rect = element.getBoundingClientRect(); + return rect.width > 0 && rect.height > 0; + } catch (error) { + return false; + } + }; + + const isValidData = (data: string): boolean => { + if (!data || data.trim().length === 0) return false; + + const trimmed = data.trim(); + + // Filter out single letters + if (trimmed.length === 1) { + return false; + } + + // Filter out pure symbols/punctuation + if (trimmed.length < 3 && /^[^\w\s]+$/.test(trimmed)) { + return false; + } + + // Filter out whitespace and punctuation only + if (/^[\s\p{P}\p{S}]*$/u.test(trimmed)) return false; + + return trimmed.length > 0; + }; + + // Enhanced shadow DOM-aware element evaluation + const evaluateXPathWithShadowSupport = ( + document: Document, + xpath: string, + isShadow: boolean = false + ): Element | null => { + try { + // First try regular XPath evaluation + const result = document.evaluate( + xpath, + document, + null, + XPathResult.FIRST_ORDERED_NODE_TYPE, + null + ).singleNodeValue as Element | null; + + if (!isShadow || result) { + return result; + } + + // If shadow DOM is indicated and regular XPath fails, use shadow DOM traversal + let cleanPath = xpath; + let isIndexed = false; + + const indexedMatch = xpath.match(/^\((.*?)\)\[(\d+)\](.*)$/); + if (indexedMatch) { + cleanPath = indexedMatch[1] + indexedMatch[3]; + isIndexed = true; + } + + const pathParts = cleanPath + .replace(/^\/\//, "") + .split("/") + .map((p) => p.trim()) + .filter((p) => p.length > 0); + + let currentContexts: (Document | Element | ShadowRoot)[] = [document]; + + for (let i = 0; i < pathParts.length; i++) { + const part = pathParts[i]; + const nextContexts: (Element | ShadowRoot)[] = []; + + for (const ctx of currentContexts) { + const positionalMatch = part.match(/^([^[]+)\[(\d+)\]$/); + let partWithoutPosition = part; + let requestedPosition: number | null = null; + + if (positionalMatch) { + partWithoutPosition = positionalMatch[1]; + requestedPosition = parseInt(positionalMatch[2]); + } + + const matched = queryInsideContext(ctx, partWithoutPosition); + + let elementsToAdd = matched; + if (requestedPosition !== null) { + const index = requestedPosition - 1; + if (index >= 0 && index < matched.length) { + elementsToAdd = [matched[index]]; + } else { + elementsToAdd = []; + } + } + + elementsToAdd.forEach((el) => { + nextContexts.push(el); + if (el.shadowRoot) { + nextContexts.push(el.shadowRoot); + } + }); + } + + if (nextContexts.length === 0) { + return null; + } + + currentContexts = nextContexts; + } + + if (currentContexts.length > 0) { + if (isIndexed && indexedMatch) { + const requestedIndex = parseInt(indexedMatch[2]) - 1; + if (requestedIndex >= 0 && requestedIndex < currentContexts.length) { + return currentContexts[requestedIndex] as Element; + } else { + return null; + } + } + + return currentContexts[0] as Element; + } + + return null; + } catch (err) { + console.error("XPath evaluation failed:", xpath, err); + return null; + } + }; + + const queryInsideContext = ( + context: Document | Element | ShadowRoot, + part: string + ): Element[] => { + try { + const { tagName, conditions } = parseXPathPart(part); + + const candidateElements = Array.from(context.querySelectorAll(tagName)); + if (candidateElements.length === 0) { + return []; + } + + const matchingElements = candidateElements.filter((el) => { + return elementMatchesConditions(el, conditions); + }); + + return matchingElements; + } catch (err) { + console.error("Error in queryInsideContext:", err); + return []; + } + }; + + const parseXPathPart = ( + part: string + ): { tagName: string; conditions: string[] } => { + const tagMatch = part.match(/^([a-zA-Z0-9-]+)/); + const tagName = tagMatch ? tagMatch[1] : "*"; + + const conditionMatches = part.match(/\[([^\]]+)\]/g); + const conditions = conditionMatches + ? conditionMatches.map((c) => c.slice(1, -1)) + : []; + + return { tagName, conditions }; + }; + + const elementMatchesConditions = ( + element: Element, + conditions: string[] + ): boolean => { + for (const condition of conditions) { + if (!elementMatchesCondition(element, condition)) { + return false; + } + } + return true; + }; + + const elementMatchesCondition = ( + element: Element, + condition: string + ): boolean => { + condition = condition.trim(); + + if (/^\d+$/.test(condition)) { + return true; + } + + // Handle @attribute="value" + const attrMatch = condition.match(/^@([^=]+)=["']([^"']+)["']$/); + if (attrMatch) { + const [, attr, value] = attrMatch; + const elementValue = element.getAttribute(attr); + return elementValue === value; + } + + // Handle contains(@class, 'value') + const classContainsMatch = condition.match( + /^contains\(@class,\s*["']([^"']+)["']\)$/ + ); + if (classContainsMatch) { + const className = classContainsMatch[1]; + return element.classList.contains(className); + } + + // Handle contains(@attribute, 'value') + const attrContainsMatch = condition.match( + /^contains\(@([^,]+),\s*["']([^"']+)["']\)$/ + ); + if (attrContainsMatch) { + const [, attr, value] = attrContainsMatch; + const elementValue = element.getAttribute(attr) || ""; + return elementValue.includes(value); + } + + // Handle text()="value" + const textMatch = condition.match(/^text\(\)=["']([^"']+)["']$/); + if (textMatch) { + const expectedText = textMatch[1]; + const elementText = element.textContent?.trim() || ""; + return elementText === expectedText; + } + + // Handle contains(text(), 'value') + const textContainsMatch = condition.match( + /^contains\(text\(\),\s*["']([^"']+)["']\)$/ + ); + if (textContainsMatch) { + const expectedText = textContainsMatch[1]; + const elementText = element.textContent?.trim() || ""; + return elementText.includes(expectedText); + } + + // Handle count(*)=0 (element has no children) + if (condition === "count(*)=0") { + return element.children.length === 0; + } + + // Handle other count conditions + const countMatch = condition.match(/^count\(\*\)=(\d+)$/); + if (countMatch) { + const expectedCount = parseInt(countMatch[1]); + return element.children.length === expectedCount; + } + + return true; + }; + + // Enhanced value extraction with shadow DOM support + const extractValueWithShadowSupport = ( + element: Element, + attribute: string + ): string | null => { + if (!element) return null; + + const baseURL = + element.ownerDocument?.location?.href || window.location.origin; + + // Check shadow DOM content first + if (element.shadowRoot) { + const shadowContent = element.shadowRoot.textContent; + if (shadowContent?.trim()) { + return shadowContent.trim(); + } + } + + if (attribute === "innerText") { + let textContent = + (element as HTMLElement).innerText?.trim() || + (element as HTMLElement).textContent?.trim(); + + if (!textContent) { + const dataAttributes = [ + "data-600", + "data-text", + "data-label", + "data-value", + "data-content", + ]; + for (const attr of dataAttributes) { + const dataValue = element.getAttribute(attr); + if (dataValue && dataValue.trim()) { + textContent = dataValue.trim(); + break; + } + } + } + + return textContent || null; + } else if (attribute === "innerHTML") { + return element.innerHTML?.trim() || null; + } else if (attribute === "href") { + let anchorElement = element; + + if (element.tagName !== "A") { + anchorElement = + element.closest("a") || + element.parentElement?.closest("a") || + element; + } + + const hrefValue = anchorElement.getAttribute("href"); + if (!hrefValue || hrefValue.trim() === "") { + return null; + } + + try { + return new URL(hrefValue, baseURL).href; + } catch (e) { + console.warn("Error creating URL from", hrefValue, e); + return hrefValue; + } + } else if (attribute === "src") { + const attrValue = element.getAttribute(attribute); + const dataAttr = attrValue || element.getAttribute("data-" + attribute); + + if (!dataAttr || dataAttr.trim() === "") { + const style = window.getComputedStyle(element as HTMLElement); + const bgImage = style.backgroundImage; + if (bgImage && bgImage !== "none") { + const matches = bgImage.match(/url\(['"]?([^'")]+)['"]?\)/); + return matches ? new URL(matches[1], baseURL).href : null; + } + return null; + } + + try { + return new URL(dataAttr, baseURL).href; + } catch (e) { + console.warn("Error creating URL from", dataAttr, e); + return dataAttr; + } + } + return element.getAttribute(attribute); + }; + + // Simple deepest child finder - limit depth to prevent hanging + const findDeepestChild = (element: HTMLElement): HTMLElement => { + let deepest = element; + let maxDepth = 0; + + const traverse = (el: HTMLElement, depth: number) => { + if (depth > 3) return; + + const text = el.textContent?.trim() || ""; + if (isValidData(text) && depth > maxDepth) { + maxDepth = depth; + deepest = el; + } + + const children = Array.from(el.children).slice(0, 3); + children.forEach((child) => { + if (child instanceof HTMLElement) { + traverse(child, depth + 1); + } + }); + }; + + traverse(element, 0); + return deepest; + }; + + validatedChildSelectors.forEach((childSelector, index) => { + try { + // Detect if this selector should use shadow DOM traversal + const isShadowSelector = childSelector.includes('>>') || + childSelector.startsWith('//') && + (listSelector.includes('>>') || currentSnapshot?.snapshot); + + const element = evaluateXPathWithShadowSupport( + iframeElement.contentDocument!, + childSelector, + isShadowSelector + ) as HTMLElement; + + if (element && isElementVisible(element)) { + const rect = element.getBoundingClientRect(); + const position = { x: rect.left, y: rect.top }; + + const tagName = element.tagName.toLowerCase(); + const isShadow = element.getRootNode() instanceof ShadowRoot; + + if (tagName === "a") { + const anchor = element as HTMLAnchorElement; + const href = extractValueWithShadowSupport(anchor, "href"); + const text = extractValueWithShadowSupport(anchor, "innerText"); + + if ( + href && + href.trim() !== "" && + href !== window.location.href && + !href.startsWith("javascript:") && + !href.startsWith("#") + ) { + const fieldIdHref = Date.now() + index * 1000; + + candidateFields.push({ + id: fieldIdHref, + element: element, + isLeaf: true, + depth: 0, + position: position, + field: { + id: fieldIdHref, + type: "text", + label: `Label ${index * 2 + 1}`, + data: href, + selectorObj: { + selector: childSelector, + tag: element.tagName, + isShadow: isShadow, + attribute: "href", + }, + }, + }); + } + + const fieldIdText = Date.now() + index * 1000 + 1; + + if (text && isValidData(text)) { + candidateFields.push({ + id: fieldIdText, + element: element, + isLeaf: true, + depth: 0, + position: position, + field: { + id: fieldIdText, + type: "text", + label: `Label ${index * 2 + 2}`, + data: text, + selectorObj: { + selector: childSelector, + tag: element.tagName, + isShadow: isShadow, + attribute: "innerText", + }, + }, + }); + } + } else if (tagName === "img") { + const img = element as HTMLImageElement; + const src = extractValueWithShadowSupport(img, "src"); + const alt = extractValueWithShadowSupport(img, "alt"); + + if (src && !src.startsWith("data:") && src.length > 10) { + const fieldId = Date.now() + index * 1000; + + candidateFields.push({ + id: fieldId, + element: element, + isLeaf: true, + depth: 0, + position: position, + field: { + id: fieldId, + type: "text", + label: `Label ${index + 1}`, + data: src, + selectorObj: { + selector: childSelector, + tag: element.tagName, + isShadow: isShadow, + attribute: "src", + }, + }, + }); + } + + if (alt && isValidData(alt)) { + const fieldId = Date.now() + index * 1000 + 1; + + candidateFields.push({ + id: fieldId, + element: element, + isLeaf: true, + depth: 0, + position: position, + field: { + id: fieldId, + type: "text", + label: `Label ${index + 2}`, + data: alt, + selectorObj: { + selector: childSelector, + tag: element.tagName, + isShadow: isShadow, + attribute: "alt", + }, + }, + }); + } + } else { + const deepestElement = findDeepestChild(element); + const data = extractValueWithShadowSupport(deepestElement, "innerText"); + + if (data && isValidData(data)) { + const isLeaf = isLeafElement(deepestElement); + const depth = getElementDepthFromList( + deepestElement, + listSelector, + iframeElement.contentDocument! + ); + + const fieldId = Date.now() + index; + + candidateFields.push({ + id: fieldId, + element: deepestElement, + isLeaf: isLeaf, + depth: depth, + position: position, + field: { + id: fieldId, + type: "text", + label: `Label ${index + 1}`, + data: data, + selectorObj: { + selector: childSelector, + tag: deepestElement.tagName, + isShadow: deepestElement.getRootNode() instanceof ShadowRoot, + attribute: "innerText", + }, + }, + }); + } + } + } + } catch (error) { + console.warn( + `Failed to process child selector ${childSelector}:`, + error + ); + } + }); + + candidateFields.sort((a, b) => { + const yDiff = a.position.y - b.position.y; + + if (Math.abs(yDiff) <= 5) { + return a.position.x - b.position.x; + } + + return yDiff; + }); + + const filteredCandidates = removeParentChildDuplicates(candidateFields); + + const finalFields = removeDuplicateContent(filteredCandidates); + return finalFields; + }, + [currentSnapshot] + ); + + const isLeafElement = (element: HTMLElement): boolean => { + const children = Array.from(element.children) as HTMLElement[]; + + if (children.length === 0) return true; + + const hasContentfulChildren = children.some((child) => { + const text = child.textContent?.trim() || ""; + return text.length > 0 && text !== element.textContent?.trim(); + }); + + return !hasContentfulChildren; + }; + + const getElementDepthFromList = ( + element: HTMLElement, + listSelector: string, + document: Document + ): number => { + try { + const listResult = document.evaluate( + listSelector, + document, + null, + XPathResult.FIRST_ORDERED_NODE_TYPE, + null + ); + + const listElement = listResult.singleNodeValue as HTMLElement; + if (!listElement) return 0; + + let depth = 0; + let current = element; + + while (current && current !== listElement && current.parentElement) { + depth++; + current = current.parentElement; + if (depth > 20) break; + } + + return current === listElement ? depth : 0; + } catch (error) { + return 0; + } + }; + + const removeParentChildDuplicates = ( + candidates: Array<{ + id: number; + field: TextStep; + element: HTMLElement; + isLeaf: boolean; + depth: number; + position: { x: number; y: number }; + }> + ): Array<{ + id: number; + field: TextStep; + element: HTMLElement; + isLeaf: boolean; + depth: number; + position: { x: number; y: number }; + }> => { + const filtered: Array<{ + id: number; + field: TextStep; + element: HTMLElement; + isLeaf: boolean; + depth: number; + position: { x: number; y: number }; + }> = []; + + for (const candidate of candidates) { + let shouldInclude = true; + + for (const existing of filtered) { + if (candidate.element.contains(existing.element)) { + shouldInclude = false; + break; + } else if (existing.element.contains(candidate.element)) { + const existingIndex = filtered.indexOf(existing); + filtered.splice(existingIndex, 1); + break; + } + } + + if (candidate.element.tagName.toLowerCase() === "a") { + shouldInclude = true; + } + + if (shouldInclude) { + filtered.push(candidate); + } + } + + return filtered; + }; + + const removeDuplicateContent = ( + candidates: Array<{ + id: number; + field: TextStep; + element: HTMLElement; + isLeaf: boolean; + depth: number; + position: { x: number; y: number }; + }> + ): Record => { + const finalFields: Record = {}; + const seenContent = new Set(); + let labelCounter = 1; + + for (const candidate of candidates) { + const content = candidate.field.data.trim().toLowerCase(); + + if (!seenContent.has(content)) { + seenContent.add(content); + finalFields[candidate.id] = { + ...candidate.field, + label: `Label ${labelCounter++}`, + }; + } + } + + return finalFields; + }; + + useEffect(() => { + if (isDOMMode && listSelector) { + socket?.emit("setGetList", { getList: true }); + socket?.emit("listSelector", { selector: listSelector }); + + clientSelectorGenerator.setListSelector(listSelector); + + if (currentSnapshot && cachedListSelector !== listSelector) { + setCachedChildSelectors([]); + setIsCachingChildSelectors(true); + setCachedListSelector(listSelector); + + const iframeElement = document.querySelector( + "#dom-browser-iframe" + ) as HTMLIFrameElement; + + if (iframeElement?.contentDocument) { + setTimeout(() => { + try { + const childSelectors = + clientSelectorGenerator.getChildSelectors( + iframeElement.contentDocument as Document, + listSelector + ); + + clientSelectorGenerator.precomputeChildSelectorMappings( + childSelectors, + iframeElement.contentDocument as Document + ); + + setCachedChildSelectors(childSelectors); + + const autoFields = createFieldsFromChildSelectors( + childSelectors, + listSelector + ); + + if (Object.keys(autoFields).length > 0) { + setFields(autoFields); + + addListStep( + listSelector, + autoFields, + currentListId || Date.now(), + currentListActionId || `list-${crypto.randomUUID()}`, + { type: "", selector: paginationSelector }, + undefined, + false + ); + } + } catch (error) { + console.error("Error during child selector caching:", error); + } finally { + setIsCachingChildSelectors(false); + + if (pendingNotification) { + notify(pendingNotification.type, pendingNotification.message); + setPendingNotification(null); + } + } + }, 100); + } else { + setIsCachingChildSelectors(false); + } + } + } + }, [ + isDOMMode, + listSelector, + socket, + getList, + currentSnapshot, + cachedListSelector, + pendingNotification, + notify, + ]); + + useEffect(() => { + if (!listSelector) { + setCachedListSelector(null); + } + }, [listSelector]); + useEffect(() => { coordinateMapper.updateDimensions(dimensions.width, dimensions.height, viewportInfo.width, viewportInfo.height); }, [viewportInfo, dimensions.width, dimensions.height]); useEffect(() => { if (listSelector) { - window.sessionStorage.setItem('recordingListSelector', listSelector); + sessionStorage.setItem('recordingListSelector', listSelector); } }, [listSelector]); useEffect(() => { - const storedListSelector = window.sessionStorage.getItem('recordingListSelector'); + const storedListSelector = sessionStorage.getItem('recordingListSelector'); // Only restore state if it exists in sessionStorage if (storedListSelector && !listSelector) { @@ -135,6 +1241,7 @@ export const BrowserWindow = () => { setListSelector(null); setFields({}); setCurrentListId(null); + setCachedChildSelectors([]); }, []); useEffect(() => { @@ -160,18 +1267,195 @@ export const BrowserWindow = () => { useEffect(() => { if (socket) { socket.on("screencast", screencastHandler); + socket.on("domcast", rrwebSnapshotHandler); + socket.on("dom-mode-enabled", domModeHandler); + socket.on("dom-mode-error", domModeErrorHandler); } - if (canvasRef?.current) { - drawImage(screenShot, canvasRef.current); - } else { - console.log('Canvas is not initialized'); - } - return () => { - socket?.off("screencast", screencastHandler); - } - }, [screenShot, canvasRef, socket, screencastHandler]); - const highlighterHandler = useCallback((data: { rect: DOMRect, selector: string, elementInfo: ElementInfo | null, childSelectors?: string[] }) => { + if (canvasRef?.current && !isDOMMode && screenShot) { + drawImage(screenShot, canvasRef.current); + } + + return () => { + if (socket) { + socket.off("screencast", screencastHandler); + socket.off("domcast", rrwebSnapshotHandler); + socket.off("dom-mode-enabled", domModeHandler); + socket.off("dom-mode-error", domModeErrorHandler); + } + }; + }, [ + socket, + screenShot, + canvasRef, + isDOMMode, + screencastHandler, + rrwebSnapshotHandler, + domModeHandler, + domModeErrorHandler, + ]); + + const domHighlighterHandler = useCallback( + (data: { + rect: DOMRect; + selector: string; + elementInfo: ElementInfo | null; + childSelectors?: string[]; + isShadow?: boolean; + groupInfo?: { + isGroupElement: boolean; + groupSize: number; + groupElements: HTMLElement[]; + groupFingerprint: ElementFingerprint; + }; + similarElements?: { + elements: HTMLElement[]; + rects: DOMRect[]; + }; + isDOMMode?: boolean; + }) => { + if (!getText && !getList) { + setHighlighterData(null); + return; + } + + if (!isDOMMode || !currentSnapshot) { + return; + } + + let iframeElement = document.querySelector( + "#dom-browser-iframe" + ) as HTMLIFrameElement; + + if (!iframeElement) { + iframeElement = document.querySelector( + "#browser-window iframe" + ) as HTMLIFrameElement; + } + + if (!iframeElement) { + console.error("Could not find iframe element for DOM highlighting"); + return; + } + + const iframeRect = iframeElement.getBoundingClientRect(); + const IFRAME_BODY_PADDING = 16; + + let mappedSimilarElements; + if (data.similarElements) { + mappedSimilarElements = { + elements: data.similarElements.elements, + rects: data.similarElements.rects.map( + (rect) => + new DOMRect( + rect.x + iframeRect.left - IFRAME_BODY_PADDING, + rect.y + iframeRect.top - IFRAME_BODY_PADDING, + rect.width, + rect.height + ) + ), + }; + } + + if (data.groupInfo) { + setCurrentGroupInfo(data.groupInfo); + } else { + setCurrentGroupInfo(null); + } + + const absoluteRect = new DOMRect( + data.rect.x + iframeRect.left - IFRAME_BODY_PADDING, + data.rect.y + iframeRect.top - IFRAME_BODY_PADDING, + data.rect.width, + data.rect.height + ); + + const mappedData = { + ...data, + rect: absoluteRect, + childSelectors: data.childSelectors || cachedChildSelectors, + similarElements: mappedSimilarElements, + }; + + if (getList === true) { + if (!listSelector && data.groupInfo?.isGroupElement) { + const updatedGroupElements = data.groupInfo.groupElements.map( + (element) => { + const elementRect = element.getBoundingClientRect(); + return { + element, + rect: new DOMRect( + elementRect.x + iframeRect.left - IFRAME_BODY_PADDING, + elementRect.y + iframeRect.top - IFRAME_BODY_PADDING, + elementRect.width, + elementRect.height + ), + }; + } + ); + + const mappedData = { + ...data, + rect: absoluteRect, + groupElements: updatedGroupElements, + childSelectors: data.childSelectors || cachedChildSelectors, + }; + + setHighlighterData(mappedData); + } else if (listSelector) { + const hasChildSelectors = + Array.isArray(mappedData.childSelectors) && + mappedData.childSelectors.length > 0; + + if (limitMode) { + setHighlighterData(null); + } else if (paginationMode) { + if ( + paginationType !== "" && + !["none", "scrollDown", "scrollUp"].includes(paginationType) + ) { + setHighlighterData(mappedData); + } else { + setHighlighterData(null); + } + } else if (hasChildSelectors) { + setHighlighterData(mappedData); + } else { + setHighlighterData(null); + } + } else { + setHighlighterData(mappedData); + } + } else { + setHighlighterData(mappedData); + } + }, + [ + isDOMMode, + currentSnapshot, + getText, + getList, + socket, + listSelector, + paginationMode, + paginationType, + limitMode, + cachedChildSelectors, + ] + ); + + const highlighterHandler = useCallback((data: { rect: DOMRect, selector: string, elementInfo: ElementInfo | null, childSelectors?: string[], isDOMMode?: boolean; }) => { + if (isDOMMode || data.isDOMMode) { + domHighlighterHandler(data); + return; + } + + const now = performance.now(); + if (now - highlighterUpdateRef.current < 16) { + return; + } + highlighterUpdateRef.current = now; + // Map the incoming DOMRect from browser coordinates to canvas coordinates const mappedRect = new DOMRect( data.rect.x, @@ -253,37 +1537,21 @@ export const BrowserWindow = () => { }, [getList, socket, listSelector, paginationMode, paginationType, limitMode]); useEffect(() => { + document.addEventListener("mousemove", onMouseMove, false); if (socket) { - socket.on('listDataExtracted', (response) => { - const { currentListId, data } = response; - - updateListStepData(currentListId, data); - }); - } - - return () => { - socket?.off('listDataExtracted'); - }; - }, [socket]); - - useEffect(() => { - document.addEventListener('mousemove', onMouseMove, false); - if (socket) { - socket.off("highlighter", highlighterHandler); - - socket.on("highlighter", highlighterHandler); - } - return () => { - document.removeEventListener('mousemove', onMouseMove); - if (socket) { socket.off("highlighter", highlighterHandler); - } + socket.on("highlighter", highlighterHandler); + } + return () => { + document.removeEventListener("mousemove", onMouseMove); + if (socket) { + socket.off("highlighter", highlighterHandler); + } }; - }, [socket, highlighterHandler, onMouseMove, getList, listSelector]); + }, [socket, highlighterHandler, getList, listSelector]); useEffect(() => { if (socket && listSelector) { - console.log('Syncing list selector with server:', listSelector); socket.emit('setGetList', { getList: true }); socket.emit('listSelector', { selector: listSelector }); } @@ -296,137 +1564,378 @@ export const BrowserWindow = () => { } }, [captureStage, listSelector, socket]); - const handleClick = (e: React.MouseEvent) => { - if (highlighterData && canvasRef?.current) { - const canvasRect = canvasRef.current.getBoundingClientRect(); - const clickX = e.clientX - canvasRect.left; - const clickY = e.clientY - canvasRect.top; + const handleDOMElementSelection = useCallback( + (highlighterData: { + rect: DOMRect; + selector: string; + isShadow?: boolean; + elementInfo: ElementInfo | null; + childSelectors?: string[]; + groupInfo?: { + isGroupElement: boolean; + groupSize: number; + groupElements: HTMLElement[]; + }; + }) => { + setShowAttributeModal(false); + setSelectedElement(null); + setAttributeOptions([]); - const highlightRect = highlighterData.rect; - - const mappedRect = coordinateMapper.mapBrowserRectToCanvas(highlightRect); - if ( - clickX >= mappedRect.left && - clickX <= mappedRect.right && - clickY >= mappedRect.top && - clickY <= mappedRect.bottom - ) { - - const options = getAttributeOptions(highlighterData.elementInfo?.tagName || '', highlighterData.elementInfo); - - if (getText === true) { - if (options.length === 1) { - // Directly use the available attribute if only one option is present - const attribute = options[0].value; - const data = attribute === 'href' ? highlighterData.elementInfo?.url || '' : - attribute === 'src' ? highlighterData.elementInfo?.imageUrl || '' : - highlighterData.elementInfo?.innerText || ''; - - addTextStep('', data, { - selector: highlighterData.selector, - tag: highlighterData.elementInfo?.tagName, - shadow: highlighterData.elementInfo?.isShadowRoot, - attribute - }); - } else { - // Show the modal if there are multiple options - setAttributeOptions(options); - setSelectedElement({ - selector: highlighterData.selector, - info: highlighterData.elementInfo, - }); - setShowAttributeModal(true); - } - } - - if (paginationMode && getList) { - // Only allow selection in pagination mode if type is not empty, 'scrollDown', or 'scrollUp' - if (paginationType !== '' && paginationType !== 'scrollDown' && paginationType !== 'scrollUp' && paginationType !== 'none') { - setPaginationSelector(highlighterData.selector); - notify(`info`, t('browser_window.attribute_modal.notifications.pagination_select_success')); - addListStep(listSelector!, fields, currentListId || 0, { type: paginationType, selector: highlighterData.selector }); - socket?.emit('setPaginationMode', { pagination: false }); - } - return; - } - - if (getList === true && !listSelector) { - let cleanedSelector = highlighterData.selector; - if (cleanedSelector.includes('nth-child')) { - cleanedSelector = cleanedSelector.replace(/:nth-child\(\d+\)/g, ''); - } - - setListSelector(cleanedSelector); - notify(`info`, t('browser_window.attribute_modal.notifications.list_select_success')); - setCurrentListId(Date.now()); - setFields({}); - } else if (getList === true && listSelector && currentListId) { - const attribute = options[0].value; - const data = attribute === 'href' ? highlighterData.elementInfo?.url || '' : - attribute === 'src' ? highlighterData.elementInfo?.imageUrl || '' : - highlighterData.elementInfo?.innerText || ''; - // Add fields to the list - if (options.length === 1) { - const attribute = options[0].value; - let currentSelector = highlighterData.selector; - - if (currentSelector.includes('>')) { - const [firstPart, ...restParts] = currentSelector.split('>').map(p => p.trim()); - const listSelectorRightPart = listSelector.split('>').pop()?.trim().replace(/:nth-child\(\d+\)/g, ''); - - if (firstPart.includes('nth-child') && - firstPart.replace(/:nth-child\(\d+\)/g, '') === listSelectorRightPart) { - currentSelector = `${firstPart.replace(/:nth-child\(\d+\)/g, '')} > ${restParts.join(' > ')}`; - } - } - - const newField: TextStep = { - id: Date.now(), - type: 'text', - label: `Label ${Object.keys(fields).length + 1}`, - data: data, - selectorObj: { - selector: currentSelector, - tag: highlighterData.elementInfo?.tagName, - shadow: highlighterData.elementInfo?.isShadowRoot, - attribute - } - }; - - const updatedFields = { - ...fields, - [newField.id]: newField - }; - - setFields(updatedFields); - - if (listSelector) { - socket?.emit('extractListData', { - listSelector, - fields: updatedFields, - currentListId, - pagination: { type: '', selector: paginationSelector } - }); - - addListStep( - listSelector, - updatedFields, - currentListId, - { type: '', selector: paginationSelector } - ); - } - - } else { - setAttributeOptions(options); - setSelectedElement({ - selector: highlighterData.selector, - info: highlighterData.elementInfo - }); - setShowAttributeModal(true); - } - } - } + if (paginationMode && getList) { + if ( + paginationType !== "" && + paginationType !== "scrollDown" && + paginationType !== "scrollUp" && + paginationType !== "none" + ) { + setPaginationSelector(highlighterData.selector); + notify( + `info`, + t( + "browser_window.attribute_modal.notifications.pagination_select_success" + ) + ); + addListStep( + listSelector!, + fields, + currentListId || 0, + currentListActionId || `list-${crypto.randomUUID()}`, + { + type: paginationType, + selector: highlighterData.selector, + isShadow: highlighterData.isShadow + }, + undefined, + highlighterData.isShadow + ); + socket?.emit("setPaginationMode", { pagination: false }); + } + return; } + + if ( + getList === true && + !listSelector && + highlighterData.groupInfo?.isGroupElement + ) { + if (highlighterData?.groupInfo.groupElements) { + setProcessingGroupCoordinates( + highlighterData.groupInfo.groupElements.map((element) => ({ + element, + rect: element.getBoundingClientRect(), + })) + ); + } + + let cleanedSelector = highlighterData.selector; + + setListSelector(cleanedSelector); + notify( + `info`, + t( + "browser_window.attribute_modal.notifications.list_select_success", + { + count: highlighterData.groupInfo.groupSize, + } + ) || + `Selected group with ${highlighterData.groupInfo.groupSize} similar elements` + ); + setCurrentListId(Date.now()); + setFields({}); + + socket?.emit("setGetList", { getList: true }); + socket?.emit("listSelector", { selector: cleanedSelector }); + + return; + } + + if (getList === true && listSelector && currentListId) { + const options = getAttributeOptions( + highlighterData.elementInfo?.tagName || "", + highlighterData.elementInfo + ); + + if (options.length === 1) { + const attribute = options[0].value; + let currentSelector = highlighterData.selector; + + const data = + attribute === "href" + ? highlighterData.elementInfo?.url || "" + : attribute === "src" + ? highlighterData.elementInfo?.imageUrl || "" + : highlighterData.elementInfo?.innerText || ""; + + const newField: TextStep = { + id: Date.now(), + type: "text", + label: `Label ${Object.keys(fields).length + 1}`, + data: data, + selectorObj: { + selector: currentSelector, + tag: highlighterData.elementInfo?.tagName, + isShadow: highlighterData.isShadow || highlighterData.elementInfo?.isShadowRoot, + attribute, + }, + }; + + const updatedFields = { + ...fields, + [newField.id]: newField, + }; + + setFields(updatedFields); + + if (listSelector) { + addListStep( + listSelector, + updatedFields, + currentListId, + currentListActionId || `list-${crypto.randomUUID()}`, + { type: "", selector: paginationSelector }, + undefined, + highlighterData.isShadow + ); + } + } else { + setAttributeOptions(options); + setSelectedElement({ + selector: highlighterData.selector, + info: highlighterData.elementInfo, + }); + setShowAttributeModal(true); + } + return; + } + + if (getText === true) { + const options = getAttributeOptions( + highlighterData.elementInfo?.tagName || "", + highlighterData.elementInfo + ); + + if (options.length === 1) { + const attribute = options[0].value; + const data = + attribute === "href" + ? highlighterData.elementInfo?.url || "" + : attribute === "src" + ? highlighterData.elementInfo?.imageUrl || "" + : highlighterData.elementInfo?.innerText || ""; + + addTextStep( + "", + data, + { + selector: highlighterData.selector, + tag: highlighterData.elementInfo?.tagName, + isShadow: highlighterData.isShadow || highlighterData.elementInfo?.isShadowRoot, + attribute, + }, + currentTextActionId || `text-${crypto.randomUUID()}` + ); + } else { + setAttributeOptions(options); + setSelectedElement({ + selector: highlighterData.selector, + info: highlighterData.elementInfo, + }); + setShowAttributeModal(true); + } + } + }, + [ + getText, + getList, + listSelector, + paginationMode, + paginationType, + limitMode, + fields, + currentListId, + currentTextActionId, + currentListActionId, + addTextStep, + addListStep, + notify, + socket, + t, + paginationSelector, + ] + ); + + + const handleClick = (e: React.MouseEvent) => { + if (highlighterData) { + let shouldProcessClick = false; + + if (!isDOMMode && canvasRef?.current) { + const canvasRect = canvasRef.current.getBoundingClientRect(); + const clickX = e.clientX - canvasRect.left; + const clickY = e.clientY - canvasRect.top; + const highlightRect = highlighterData.rect; + const mappedRect = + coordinateMapper.mapBrowserRectToCanvas(highlightRect); + + shouldProcessClick = + clickX >= mappedRect.left && + clickX <= mappedRect.right && + clickY >= mappedRect.top && + clickY <= mappedRect.bottom; + } else { + shouldProcessClick = true; + } + + if (shouldProcessClick) { + const options = getAttributeOptions( + highlighterData.elementInfo?.tagName || "", + highlighterData.elementInfo + ); + + if (getText === true) { + if (options.length === 1) { + const attribute = options[0].value; + const data = + attribute === "href" + ? highlighterData.elementInfo?.url || "" + : attribute === "src" + ? highlighterData.elementInfo?.imageUrl || "" + : highlighterData.elementInfo?.innerText || ""; + + addTextStep( + "", + data, + { + selector: highlighterData.selector, + tag: highlighterData.elementInfo?.tagName, + isShadow: highlighterData.isShadow || highlighterData.elementInfo?.isShadowRoot, + attribute, + }, + currentTextActionId || `text-${crypto.randomUUID()}` + ); + } else { + setAttributeOptions(options); + setSelectedElement({ + selector: highlighterData.selector, + info: highlighterData.elementInfo, + }); + setShowAttributeModal(true); + } + } + + if (paginationMode && getList) { + if ( + paginationType !== "" && + paginationType !== "scrollDown" && + paginationType !== "scrollUp" && + paginationType !== "none" + ) { + setPaginationSelector(highlighterData.selector); + notify( + `info`, + t( + "browser_window.attribute_modal.notifications.pagination_select_success" + ) + ); + addListStep( + listSelector!, + fields, + currentListId || 0, + currentListActionId || `list-${crypto.randomUUID()}`, + { type: paginationType, selector: highlighterData.selector, isShadow: highlighterData.isShadow }, + undefined, + highlighterData.isShadow + ); + socket?.emit("setPaginationMode", { pagination: false }); + } + return; + } + + if (getList === true && !listSelector) { + let cleanedSelector = highlighterData.selector; + if ( + cleanedSelector.includes("[") && + cleanedSelector.match(/\[\d+\]/) + ) { + cleanedSelector = cleanedSelector.replace(/\[\d+\]/g, ""); + } + + setListSelector(cleanedSelector); + notify( + `info`, + t( + "browser_window.attribute_modal.notifications.list_select_success" + ) + ); + setCurrentListId(Date.now()); + setFields({}); + } else if (getList === true && listSelector && currentListId) { + const attribute = options[0].value; + const data = + attribute === "href" + ? highlighterData.elementInfo?.url || "" + : attribute === "src" + ? highlighterData.elementInfo?.imageUrl || "" + : highlighterData.elementInfo?.innerText || ""; + + if (options.length === 1) { + let currentSelector = highlighterData.selector; + + if (currentSelector.includes("/")) { + const xpathParts = currentSelector + .split("/") + .filter((part) => part); + const cleanedParts = xpathParts.map((part) => { + return part.replace(/\[\d+\]/g, ""); + }); + + if (cleanedParts.length > 0) { + currentSelector = "//" + cleanedParts.join("/"); + } + } + + const newField: TextStep = { + id: Date.now(), + type: "text", + label: `Label ${Object.keys(fields).length + 1}`, + data: data, + selectorObj: { + selector: currentSelector, + tag: highlighterData.elementInfo?.tagName, + isShadow: highlighterData.isShadow || highlighterData.elementInfo?.isShadowRoot, + attribute, + }, + }; + + const updatedFields = { + ...fields, + [newField.id]: newField, + }; + + setFields(updatedFields); + + if (listSelector) { + addListStep( + listSelector, + updatedFields, + currentListId, + currentListActionId || `list-${crypto.randomUUID()}`, + { type: "", selector: paginationSelector, isShadow: highlighterData.isShadow }, + undefined, + highlighterData.isShadow + ); + } + } else { + setAttributeOptions(options); + setSelectedElement({ + selector: highlighterData.selector, + info: highlighterData.elementInfo, + }); + setShowAttributeModal(true); + } + } + } + } }; const handleAttributeSelection = (attribute: string) => { @@ -447,9 +1956,9 @@ export const BrowserWindow = () => { addTextStep('', data, { selector: selectedElement.selector, tag: selectedElement.info?.tagName, - shadow: selectedElement.info?.isShadowRoot, + isShadow: highlighterData?.isShadow || selectedElement.info?.isShadowRoot, attribute: attribute - }); + }, currentTextActionId || `text-${crypto.randomUUID()}`); } if (getList === true && listSelector && currentListId) { const newField: TextStep = { @@ -460,7 +1969,7 @@ export const BrowserWindow = () => { selectorObj: { selector: selectedElement.selector, tag: selectedElement.info?.tagName, - shadow: selectedElement.info?.isShadowRoot, + isShadow: highlighterData?.isShadow || highlighterData?.elementInfo?.isShadowRoot, attribute: attribute } }; @@ -473,24 +1982,23 @@ export const BrowserWindow = () => { setFields(updatedFields); if (listSelector) { - socket?.emit('extractListData', { - listSelector, - fields: updatedFields, - currentListId, - pagination: { type: '', selector: paginationSelector } - }); - addListStep( listSelector, updatedFields, currentListId, - { type: '', selector: paginationSelector } + currentListActionId || `list-${crypto.randomUUID()}`, + { type: "", selector: paginationSelector, isShadow: highlighterData?.isShadow }, + undefined, + highlighterData?.isShadow ); } } } } + setShowAttributeModal(false); + setSelectedElement(null); + setAttributeOptions([]); }; const resetPaginationSelector = useCallback(() => { @@ -504,84 +2012,494 @@ export const BrowserWindow = () => { }, [paginationMode, resetPaginationSelector]); return ( -
- { - getText === true || getList === true ? ( - { }} - canBeClosed={false} - modalStyle={modalStyle} +
+ {/* Attribute selection modal */} + {(getText === true || getList === true) && ( + { + setShowAttributeModal(false); + setSelectedElement(null); + setAttributeOptions([]); + }} + canBeClosed={true} + modalStyle={modalStyle} + > +
+

Select Attribute

+
+ {attributeOptions.map((option) => ( + - ))} -
-
-
- ) : null - } -
- {((getText === true || getList === true) && !showAttributeModal && highlighterData?.rect != null && highlighterData?.rect.top != null) && canvasRef?.current ? - - : null} - + + ))} +
+
+
+ )} + + {datePickerInfo && ( + setDatePickerInfo(null)} + /> + )} + {dropdownInfo && ( + setDropdownInfo(null)} + /> + )} + {timePickerInfo && ( + setTimePickerInfo(null)} + /> + )} + {dateTimeLocalInfo && ( + setDateTimeLocalInfo(null)} + /> + )} + + {/* Main content area */} +
+ {/* Add CSS for the spinner animation */} + + + {(getText === true || getList === true) && + !showAttributeModal && + highlighterData?.rect != null && ( + <> + {!isDOMMode && canvasRef?.current && ( + + )} + + {isDOMMode && highlighterData && ( + <> + {/* Individual element highlight (for non-group or hovered element) */} + {((getText && !listSelector) || + (getList && paginationMode && paginationType !== "" && + !["none", "scrollDown", "scrollUp"].includes(paginationType))) && ( +
+ )} + + {/* Group elements highlighting with real-time coordinates */} + {getList && + !listSelector && + currentGroupInfo?.isGroupElement && + highlighterData.groupElements && + highlighterData.groupElements.map( + (groupElement, index) => ( + + {/* Highlight box */} +
+ +
+ List item {index + 1} +
+ + ) + )} + + {getList && + listSelector && + !paginationMode && + !limitMode && + highlighterData?.similarElements && + highlighterData.similarElements.rects.map( + (rect, index) => ( + + {/* Highlight box for similar element */} +
+ + {/* Label for similar element */} +
+ Item {index + 1} +
+ + ) + )} + + )} + + )} + + {isDOMMode ? ( +
+ {currentSnapshot ? ( + { + domHighlighterHandler(data); + }} + isCachingChildSelectors={isCachingChildSelectors} + onElementSelect={handleDOMElementSelection} + onShowDatePicker={handleShowDatePicker} + onShowDropdown={handleShowDropdown} + onShowTimePicker={handleShowTimePicker} + onShowDateTimePicker={handleShowDateTimePicker} /> + ) : ( +
+
+
+ Loading website... +
+ +
+ )} + + {/* Loading overlay positioned specifically over DOM content */} + {isCachingChildSelectors && ( + <> + {/* Background overlay */} +
+ + {/* Use processing coordinates captured before listSelector was set */} + {processingGroupCoordinates.map((groupElement, index) => ( + + {/* Original highlight box */} +
+ + {/* Label */} +
+ List item {index + 1} +
+ + {/* Scanning animation */} +
+
+
+ + + + ))} + + {/* Fallback loader */} + {processingGroupCoordinates.length === 0 && ( +
+
+
+ )} + + )}
+ ) : ( + /* Screenshot mode canvas */ + + )}
+
); }; const drawImage = (image: string, canvas: HTMLCanvasElement): void => { - const ctx = canvas.getContext('2d'); + if (!ctx) return; const img = new Image(); - - img.src = image; img.onload = () => { - URL.revokeObjectURL(img.src); - ctx?.drawImage(img, 0, 0, canvas.width, canvas.height); + requestAnimationFrame(() => { + ctx.drawImage(img, 0, 0, canvas.width, canvas.height); + }); + if (image.startsWith('blob:')) { + URL.revokeObjectURL(image); + } }; - + img.onerror = () => { + console.warn('Failed to load image'); + }; + img.src = image; }; const modalStyle = { diff --git a/src/components/browser/UrlForm.tsx b/src/components/browser/UrlForm.tsx index fe97c664..227b9cb8 100644 --- a/src/components/browser/UrlForm.tsx +++ b/src/components/browser/UrlForm.tsx @@ -1,8 +1,8 @@ import React, { useState, useEffect, useCallback, useRef } from 'react'; import type { SyntheticEvent } from 'react'; import KeyboardArrowRightIcon from '@mui/icons-material/KeyboardArrowRight'; -import { NavBarForm, NavBarInput } from "../ui/form"; -import { UrlFormButton } from "../ui/buttons/buttons"; +import { NavBarForm, NavBarInput } from "../ui/Form"; +import { UrlFormButton } from "../ui/buttons/Buttons"; import { useSocketStore } from '../../context/socket'; import { Socket } from "socket.io-client"; @@ -40,7 +40,7 @@ export const UrlForm = ({ lastSubmittedRef.current = url; // Update the last submitted URL } catch (e) { //alert(`ERROR: ${url} is not a valid url!`); - console.log(e) + console.log(`Failed to submit form:`,e) } }, [setCurrentAddress]); diff --git a/src/components/dashboard/MainMenu.tsx b/src/components/dashboard/MainMenu.tsx index 0c51dbab..3dcf34d8 100644 --- a/src/components/dashboard/MainMenu.tsx +++ b/src/components/dashboard/MainMenu.tsx @@ -1,13 +1,12 @@ -import React from 'react'; +import React, { useState } from 'react'; import Tabs from '@mui/material/Tabs'; import Tab from '@mui/material/Tab'; import Box from '@mui/material/Box'; import { useNavigate } from 'react-router-dom'; -import { Paper, Button, useTheme } from "@mui/material"; -import { AutoAwesome, FormatListBulleted, VpnKey, Usb, CloudQueue, Code, } from "@mui/icons-material"; -import { apiUrl } from "../../apiConfig"; +import { Paper, Button, useTheme, Modal, Typography, Stack, TextField, InputAdornment, IconButton } from "@mui/material"; // Added TextField, InputAdornment, IconButton +import { AutoAwesome, FormatListBulleted, VpnKey, Usb, CloudQueue, Description, Favorite, ContentCopy } from "@mui/icons-material"; // Added ContentCopy import { useTranslation } from 'react-i18next'; -import i18n from '../../i18n'; +import { useGlobalInfoStore } from "../../context/globalInfo"; interface MainMenuProps { value: string; @@ -18,105 +17,144 @@ export const MainMenu = ({ value = 'robots', handleChangeContent }: MainMenuProp const theme = useTheme(); const { t } = useTranslation(); const navigate = useNavigate(); + const { notify } = useGlobalInfoStore(); + + const [cloudModalOpen, setCloudModalOpen] = useState(false); + const [sponsorModalOpen, setSponsorModalOpen] = useState(false); + + const ossDiscountCode = "MAXUNOSS8"; const handleChange = (event: React.SyntheticEvent, newValue: string) => { navigate(`/${newValue}`); handleChangeContent(newValue); }; - // Define colors based on theme mode + const copyDiscountCode = () => { + navigator.clipboard.writeText(ossDiscountCode).then(() => { + notify("success", "Discount code copied to clipboard!"); + }).catch(err => { + console.error('Failed to copy text: ', err); + notify("error", "Failed to copy discount code."); + }); + }; + const defaultcolor = theme.palette.mode === 'light' ? 'black' : 'white'; const buttonStyles = { justifyContent: 'flex-start', textAlign: 'left', - fontSize: 'medium', - padding: '6px 16px 6px 22px', + fontSize: '17px', + padding: '20px 16px 20px 22px', minHeight: '48px', minWidth: '100%', display: 'flex', alignItems: 'center', textTransform: 'none', color: theme.palette.mode === 'light' ? '#6C6C6C' : 'inherit', + '&:hover': { + backgroundColor: theme.palette.mode === 'light' ? '#f5f5f5' : 'inherit', + }, }; - return ( - - - - + + + + } iconPosition="start" sx={{ justifyContent: 'flex-start', textAlign: 'left', fontSize: 'medium' }} /> + } iconPosition="start" sx={{ justifyContent: 'flex-start', textAlign: 'left', fontSize: 'medium' }} /> + } iconPosition="start" sx={{ justifyContent: 'flex-start', textAlign: 'left', fontSize: 'medium' }} /> + } iconPosition="start" sx={{ justifyContent: 'flex-start', textAlign: 'left', fontSize: 'medium' }} /> + +
+ + + + + +
+
+ + setCloudModalOpen(false)}> + + + Join Maxun Cloud + + + Unlock reliable web data extraction. Maxun Cloud ensures you bypass blocks and scale with ease. + + + As a thank-you to open source users, enjoy 8% off your subscription! + + + Use the discount code + + + + + + + ), }} - value="robots" - label={t('mainmenu.recordings')} - icon={} - iconPosition="start" + sx={{ mb: 2, fontSize: 13 }} /> - } - iconPosition="start" - /> - } - iconPosition="start" - /> - } - iconPosition="start" - /> -
-
- - - -
-
+ + + setSponsorModalOpen(false)}> + + + Support Maxun Open Source + + + Maxun is built by a small, full-time team. Your donations directly contribute to making it better. +
+
+ Thank you for your support! 💙 +
+ + + + +
+
+ ); -}; \ No newline at end of file +}; diff --git a/src/components/dashboard/NavBar.tsx b/src/components/dashboard/NavBar.tsx index 591b7624..2b2bd3cc 100644 --- a/src/components/dashboard/NavBar.tsx +++ b/src/components/dashboard/NavBar.tsx @@ -164,7 +164,7 @@ export const NavBar: React.FC = ({ sx={{ color: darkMode ? '#ffffff' : '#0000008A', '&:hover': { - color: '#ff00c3' + background: 'inherit' } }} > @@ -267,7 +267,7 @@ export const NavBar: React.FC = ({ top: "50%", left: "50%", transform: "translate(-50%, -50%)", - width: 500, + width: 700, bgcolor: "background.paper", boxShadow: 24, p: 4, @@ -291,8 +291,7 @@ export const NavBar: React.FC = ({ @@ -335,10 +334,15 @@ export const NavBar: React.FC = ({
docker-compose down
+
+ # Remove existing backend and frontend images +
+ docker rmi getmaxun/maxun-frontend:latest getmaxun/maxun-backend:latest +

# pull latest docker images
- docker-compose pull + docker-compose pull backend frontend

# start maxun @@ -365,6 +369,9 @@ export const NavBar: React.FC = ({ borderRadius: '5px', padding: '8px', marginRight: '10px', + '&:hover': { + background: 'inherit' + } }}> {user.email} @@ -390,11 +397,6 @@ export const NavBar: React.FC = ({ {t('navbar.menu_items.language')}
- { - window.open('https://docs.maxun.dev', '_blank'); - }}> - Docs - { window.open('https://github.com/getmaxun/maxun', '_blank'); }}> @@ -468,6 +470,14 @@ export const NavBar: React.FC = ({ > Deutsch + { + changeLanguage("tr"); + handleMenuClose(); + }} + > + Türkçe + { window.open('https://docs.maxun.dev/development/i18n', '_blank'); @@ -564,6 +574,14 @@ export const NavBar: React.FC = ({ > Deutsch + { + changeLanguage("tr"); + handleMenuClose(); + }} + > + Türkçe + { window.open('https://docs.maxun.dev/development/i18n', '_blank'); @@ -600,4 +618,4 @@ const NavBarRight = styled.div` align-items: center; justify-content: flex-end; margin-left: auto; -`; \ No newline at end of file +`; diff --git a/src/components/integration/IntegrationSettings.tsx b/src/components/integration/IntegrationSettings.tsx index 5ca71840..6d0b892c 100644 --- a/src/components/integration/IntegrationSettings.tsx +++ b/src/components/integration/IntegrationSettings.tsx @@ -8,22 +8,41 @@ import { AlertTitle, Button, TextField, + IconButton, + Box, + Chip, + Card, + CardContent, + CardActions, + Switch, + FormControlLabel, + Table, + TableBody, + TableCell, + TableContainer, + TableHead, + TableRow, + Paper, } from "@mui/material"; +import { Add as AddIcon, Delete as DeleteIcon, Edit as EditIcon, Science as ScienceIcon } from "@mui/icons-material"; import axios from "axios"; import { useGlobalInfoStore } from "../../context/globalInfo"; import { getStoredRecording } from "../../api/storage"; import { apiUrl } from "../../apiConfig.js"; +import { v4 as uuid } from "uuid"; import Cookies from "js-cookie"; import { useTranslation } from "react-i18next"; import { useNavigate } from "react-router-dom"; +import { addWebhook, updateWebhook, removeWebhook, getWebhooks, testWebhook,WebhookConfig } from "../../api/webhook"; + interface IntegrationProps { isOpen: boolean; handleStart: (data: IntegrationSettings) => void; handleClose: () => void; - preSelectedIntegrationType?: "googleSheets" | "airtable" | null; + preSelectedIntegrationType?: "googleSheets" | "airtable" | "webhook" | null; } export interface IntegrationSettings { @@ -33,8 +52,9 @@ export interface IntegrationSettings { airtableBaseName?: string; airtableTableName?: string, airtableTableId?: string, + webhooks?: WebhookConfig[]; data: string; - integrationType: "googleSheets" | "airtable"; + integrationType: "googleSheets" | "airtable" | "webhook"; } const getCookie = (name: string): string | null => { @@ -64,6 +84,7 @@ export const IntegrationSettingsModal = ({ airtableBaseName: "", airtableTableName: "", airtableTableId: "", + webhooks: [], data: "", integrationType: preSelectedIntegrationType || "googleSheets", }); @@ -74,6 +95,16 @@ export const IntegrationSettingsModal = ({ const [loading, setLoading] = useState(false); const [error, setError] = useState(null); + const [showWebhookForm, setShowWebhookForm] = useState(false); + const [editingWebhook, setEditingWebhook] = useState(null); + const [newWebhook, setNewWebhook] = useState({ + id: "", + url: "", + events: ["run_completed"], + active: true, + }); + const [urlError, setUrlError] = useState(null); + const { recordingId, notify, @@ -84,7 +115,7 @@ export const IntegrationSettingsModal = ({ const navigate = useNavigate(); const [selectedIntegrationType, setSelectedIntegrationType] = useState< - "googleSheets" | "airtable" | null + "googleSheets" | "airtable" | "webhook" | null >(preSelectedIntegrationType); const authenticateWithGoogle = () => { @@ -96,6 +127,230 @@ export const IntegrationSettingsModal = ({ window.location.href = `${apiUrl}/auth/airtable?robotId=${recordingId}`; }; + const validateWebhookData = (url: string, events: string[], excludeId?: string) => { + if (!url) { + setUrlError("Please provide webhook URL"); + return false; + } + + try { + new URL(url); + } catch { + setUrlError("Please provide a valid URL"); + return false; + } + + const existingWebhook = settings.webhooks?.find( + (webhook) => webhook.url === url && webhook.id !== excludeId + ); + + if (existingWebhook) { + setUrlError("This webhook URL is already in use"); + return false; + } + + if (!events || events.length === 0) { + setUrlError("Please select at least one event"); + return false; + } + + setUrlError(null); + return true; + }; + + const fetchWebhooks = async () => { + try { + setLoading(true); + if (!recordingId) return; + + const response = await getWebhooks(recordingId); + + if (response.ok && response.webhooks) { + setSettings(prev => ({ + ...prev, + webhooks: response.webhooks + })); + } else { + notify("error", response.error || "Failed to fetch webhooks"); + } + setLoading(false); + } catch (error: any) { + setLoading(false); + console.error("Error fetching webhooks:", error); + notify("error", "Failed to fetch webhooks"); + } + }; + + const addWebhookSetting = async () => { + if (!validateWebhookData(newWebhook.url, newWebhook.events)) { + if (!newWebhook.url) { + notify("error", "Please provide webhook URL"); + } else if (!newWebhook.events || newWebhook.events.length === 0) { + notify("error", "Please select at least one event"); + } + return; + } + + if (!recordingId) return; + + try { + setLoading(true); + const webhookWithId = { + ...newWebhook, + id: uuid(), + }; + + const response = await addWebhook(webhookWithId, recordingId); + + if (response.ok) { + const updatedWebhooks = [...(settings.webhooks || []), webhookWithId]; + setSettings({ ...settings, webhooks: updatedWebhooks }); + + resetWebhookForm(); + await refreshRecordingData(); + notify("success", "Webhook added successfully"); + } else { + notify("error", response.error || "Failed to add webhook"); + } + setLoading(false); + } catch (error: any) { + setLoading(false); + console.log("Error adding webhook:", error); + notify("error", "Failed to add webhook"); + } + }; + + const updateWebhookSetting = async () => { + if (!editingWebhook || !recordingId) return; + + if (!validateWebhookData(newWebhook.url, newWebhook.events, editingWebhook)) { + if (!newWebhook.url) { + notify("error", "Please provide webhook URL"); + } else if (!newWebhook.events || newWebhook.events.length === 0) { + notify("error", "Please select at least one event"); + } + return; + } + + try { + setLoading(true); + const response = await updateWebhook(newWebhook, recordingId); + + if (response.ok) { + const updatedWebhooks = (settings.webhooks || []).map(w => + w.id === editingWebhook ? newWebhook : w + ); + setSettings({ ...settings, webhooks: updatedWebhooks }); + + resetWebhookForm(); + await refreshRecordingData(); + notify("success", "Webhook updated successfully"); + } else { + notify("error", response.error || "Failed to update webhook"); + } + setLoading(false); + } catch (error: any) { + setLoading(false); + console.error("Error updating webhook:", error); + notify("error", "Failed to update webhook"); + } + }; + + const removeWebhookSetting = async (webhookId: string) => { + if (!recordingId) return; + + try { + setLoading(true); + const response = await removeWebhook(webhookId, recordingId); + + if (response.ok) { + const updatedWebhooks = (settings.webhooks || []).filter(w => w.id !== webhookId); + setSettings({ ...settings, webhooks: updatedWebhooks }); + + await refreshRecordingData(); + notify("success", "Webhook removed successfully"); + } else { + notify("error", response.error || "Failed to remove webhook"); + } + setLoading(false); + } catch (error: any) { + setLoading(false); + console.error("Error removing webhook:", error); + notify("error", "Failed to remove webhook"); + } + }; + + const toggleWebhookStatusSetting = async (webhookId: string) => { + if (!recordingId) return; + + try { + const webhook = settings.webhooks?.find(w => w.id === webhookId); + if (!webhook) return; + + const updatedWebhook = { ...webhook, active: !webhook.active }; + + const response = await updateWebhook(updatedWebhook, recordingId); + + if (response.ok) { + const updatedWebhooks = (settings.webhooks || []).map(w => + w.id === webhookId ? updatedWebhook : w + ); + setSettings({ ...settings, webhooks: updatedWebhooks }); + + await refreshRecordingData(); + notify("success", `Webhook ${updatedWebhook.active ? "enabled" : "disabled"}`); + } else { + notify("error", response.error || "Failed to update webhook"); + } + } catch (error: any) { + console.error("Error toggling webhook status:", error); + notify("error", "Failed to update webhook"); + } + }; + + const testWebhookSetting = async (webhook: WebhookConfig) => { + if (!recordingId) return; + + try { + setLoading(true); + const response = await testWebhook(webhook, recordingId); + + if (response.ok) { + const updatedWebhooks = (settings.webhooks || []).map(w => + w.id === webhook.id ? { ...w, lastCalledAt: new Date().toISOString() } : w + ); + setSettings({ ...settings, webhooks: updatedWebhooks }); + + notify("success", "Test webhook sent successfully"); + } else { + notify("error", response.error || "Failed to test webhook"); + } + setLoading(false); + } catch (error: any) { + setLoading(false); + console.error("Error testing webhook:", error); + notify("error", "Failed to test webhook"); + } + }; + + const editWebhookSetting = (webhook: WebhookConfig) => { + setNewWebhook(webhook); + setEditingWebhook(webhook.id); + setShowWebhookForm(true); + }; + + const resetWebhookForm = () => { + setNewWebhook({ + id: "", + url: "", + events: ["run_completed"], + active: true, + }); + setShowWebhookForm(false); + setEditingWebhook(null); + setUrlError(null); + }; + // Fetch Google Sheets files const fetchSpreadsheetFiles = async () => { try { @@ -193,6 +448,9 @@ export const IntegrationSettingsModal = ({ if (!recordingId) return null; const updatedRecording = await getStoredRecording(recordingId); setRecording(updatedRecording); + + await fetchWebhooks(); + setRerenderRobots(true); return updatedRecording; }; @@ -331,8 +589,7 @@ export const IntegrationSettingsModal = ({ if (preSelectedIntegrationType) { setSettings(prev => ({ ...prev, integrationType: preSelectedIntegrationType })); - } - else if (recording.google_sheet_id) { + } else if (recording.google_sheet_id) { setSettings(prev => ({ ...prev, integrationType: "googleSheets" })); } else if (recording.airtable_base_id) { setSettings(prev => ({ @@ -341,9 +598,18 @@ export const IntegrationSettingsModal = ({ airtableBaseName: recording.airtable_base_name || "", airtableTableName: recording.airtable_table_name || "", airtableTableId: recording.airtable_table_id || "", - integrationType: recording.airtable_base_id ? "airtable" : "googleSheets" + integrationType: "airtable" })); } + + await fetchWebhooks(); + + if (!preSelectedIntegrationType && !recording.google_sheet_id && !recording.airtable_base_id) { + const webhookResponse = await getWebhooks(recordingId); + if (webhookResponse.ok && webhookResponse.webhooks && webhookResponse.webhooks.length > 0) { + setSettings(prev => ({ ...prev, integrationType: "webhook" })); + } + } } setLoading(false); @@ -370,7 +636,48 @@ export const IntegrationSettingsModal = ({ } }, []); - // Add this UI at the top of the modal return statement + const formatEventName = (event: string) => { + switch (event) { + case "run_completed": + return "Run finished"; + case "run_failed": + return "Run failed"; + default: + return event; + } + }; + + const formatLastCalled = (lastCalledAt?: string | null) => { + if (!lastCalledAt) { + return "Not called yet"; + } + + const date = new Date(lastCalledAt); + const now = new Date(); + const diffMs = now.getTime() - date.getTime(); + const diffDays = Math.floor(diffMs / (1000 * 60 * 60 * 24)); + const diffHours = Math.floor(diffMs / (1000 * 60 * 60)); + const diffMinutes = Math.floor(diffMs / (1000 * 60)); + + if (diffMinutes < 1) { + return "Just now"; + } else if (diffMinutes < 60) { + return `${diffMinutes} minute${diffMinutes === 1 ? '' : 's'} ago`; + } else if (diffHours < 24) { + return `${diffHours} hour${diffHours === 1 ? '' : 's'} ago`; + } else if (diffDays < 7) { + return `${diffDays} day${diffDays === 1 ? '' : 's'} ago`; + } else { + return date.toLocaleDateString('en-US', { + year: 'numeric', + month: 'short', + day: 'numeric', + hour: '2-digit', + minute: '2-digit' + }); + } + }; + if (!selectedIntegrationType) { return ( - Google Sheets + Google Sheets Google Sheets @@ -407,9 +714,33 @@ export const IntegrationSettingsModal = ({ }} style={{ display: "flex", flexDirection: "column", alignItems: "center", background: 'white', color: '#ff00c3' }} > - Airtable + Airtable Airtable + + + +
@@ -423,6 +754,7 @@ export const IntegrationSettingsModal = ({ flexDirection: "column", alignItems: "flex-start", marginLeft: "65px", + maxWidth: "1000px", }}> {settings.integrationType === "googleSheets" && ( @@ -483,14 +815,24 @@ export const IntegrationSettingsModal = ({ ) : error ? ( {error} ) : spreadsheets.length === 0 ? ( - + + + + ) : ( <> {error} ) : airtableBases.length === 0 ? ( - + + + + ) : ( <> )} + + {settings.integrationType === "webhook" && ( + <> + + Integrate using Webhooks + + + {settings.webhooks && settings.webhooks.length > 0 && ( + + + + + Webhook URL + Call when + Last called + Status + Actions + + + + {settings.webhooks.map((webhook) => ( + + {webhook.url} + + + {webhook.events.map((event) => ( + + ))} + + + {formatLastCalled(webhook.lastCalledAt)} + + toggleWebhookStatusSetting(webhook.id)} + size="small" + /> + + + + testWebhookSetting(webhook)} + disabled={loading || !webhook.active} + title="Test" + > + + + editWebhookSetting(webhook)} + disabled={loading} + title="Edit" + > + + + removeWebhookSetting(webhook.id)} + disabled={loading} + title="Delete" + > + + + + + + ))} + +
+
+ )} + + {!showWebhookForm && ( + + + { + setNewWebhook({ ...newWebhook, url: e.target.value }); + if (urlError) setUrlError(null); + }} + error={!!urlError} + helperText={urlError} + required + aria-describedby="webhook-url-help" + /> + setNewWebhook({ + ...newWebhook, + events: [e.target.value] + })} + sx={{ minWidth: "200px" }} + required + > + Run finished + Run failed + + + + + + Refer to the API documentation for examples and details. + + + + + )} + + {showWebhookForm && ( + + + + {editingWebhook ? "Edit Webhook" : "Add New Webhook"} + + + { + setNewWebhook({ ...newWebhook, url: e.target.value }); + if (urlError) setUrlError(null); + }} + sx={{ marginBottom: "15px" }} + placeholder="https://your-api.com/webhook/endpoint" + required + error={!!urlError} + helperText={urlError} + /> + + setNewWebhook({ + ...newWebhook, + events: typeof e.target.value === 'string' ? [e.target.value] : e.target.value + })} + SelectProps={{ + multiple: true, + renderValue: (selected) => ( + + {(selected as string[]).map((value) => ( + + ))} + + ), + }} + sx={{ marginBottom: "20px" }} + required + > + Run finished + Run failed + + + setNewWebhook({ ...newWebhook, active: e.target.checked })} + /> + } + label="Active" + sx={{ marginBottom: "10px" }} + /> + + + + + + + + )} + + )}
); @@ -649,10 +1213,12 @@ export const modalStyle = { top: "40%", left: "50%", transform: "translate(-50%, -50%)", - width: "50%", + width: "60%", backgroundColor: "background.paper", p: 4, height: "fit-content", display: "block", padding: "20px", -}; \ No newline at end of file + maxHeight: "90vh", + overflow: "auto", +}; diff --git a/src/components/pickers/DatePicker.tsx b/src/components/pickers/DatePicker.tsx index 00687115..41c449c6 100644 --- a/src/components/pickers/DatePicker.tsx +++ b/src/components/pickers/DatePicker.tsx @@ -1,6 +1,6 @@ import React, { useState } from 'react'; import { useSocketStore } from '../../context/socket'; -import { Coordinates } from '../recorder/canvas'; +import { Coordinates } from '../recorder/Canvas'; interface DatePickerProps { coordinates: Coordinates; @@ -16,12 +16,58 @@ const DatePicker: React.FC = ({ coordinates, selector, onClose setSelectedDate(e.target.value); }; + const updateDOMElement = (selector: string, value: string) => { + try { + let iframeElement = document.querySelector('#dom-browser-iframe') as HTMLIFrameElement; + + if (!iframeElement) { + iframeElement = document.querySelector('#browser-window iframe') as HTMLIFrameElement; + } + + if (!iframeElement) { + const browserWindow = document.querySelector('#browser-window'); + if (browserWindow) { + iframeElement = browserWindow.querySelector('iframe') as HTMLIFrameElement; + } + } + + if (!iframeElement) { + console.error('Could not find iframe element for DOM update'); + return; + } + + const iframeDoc = iframeElement.contentDocument; + if (!iframeDoc) { + console.error('Could not access iframe document'); + return; + } + + const element = iframeDoc.querySelector(selector) as HTMLInputElement; + if (element) { + element.value = value; + + const changeEvent = new Event('change', { bubbles: true }); + element.dispatchEvent(changeEvent); + + const inputEvent = new Event('input', { bubbles: true }); + element.dispatchEvent(inputEvent); + } else { + console.warn(`Could not find element with selector: ${selector}`); + } + } catch (error) { + console.error('Error updating DOM element:', error); + } + }; + const handleConfirm = () => { if (socket && selectedDate) { socket.emit('input:date', { selector, value: selectedDate }); + + updateDOMElement(selector, selectedDate); + onClose(); } }; diff --git a/src/components/pickers/DateTimeLocalPicker.tsx b/src/components/pickers/DateTimeLocalPicker.tsx index c51e3540..51ac62c6 100644 --- a/src/components/pickers/DateTimeLocalPicker.tsx +++ b/src/components/pickers/DateTimeLocalPicker.tsx @@ -1,6 +1,6 @@ import React, { useState } from 'react'; import { useSocketStore } from '../../context/socket'; -import { Coordinates } from '../recorder/canvas'; +import { Coordinates } from '../recorder/Canvas'; interface DateTimeLocalPickerProps { coordinates: Coordinates; @@ -16,12 +16,58 @@ const DateTimeLocalPicker: React.FC = ({ coordinates, setSelectedDateTime(e.target.value); }; + const updateDOMElement = (selector: string, value: string) => { + try { + let iframeElement = document.querySelector('#dom-browser-iframe') as HTMLIFrameElement; + + if (!iframeElement) { + iframeElement = document.querySelector('#browser-window iframe') as HTMLIFrameElement; + } + + if (!iframeElement) { + const browserWindow = document.querySelector('#browser-window'); + if (browserWindow) { + iframeElement = browserWindow.querySelector('iframe') as HTMLIFrameElement; + } + } + + if (!iframeElement) { + console.error('Could not find iframe element for DOM update'); + return; + } + + const iframeDoc = iframeElement.contentDocument; + if (!iframeDoc) { + console.error('Could not access iframe document'); + return; + } + + const element = iframeDoc.querySelector(selector) as HTMLInputElement; + if (element) { + element.value = value; + + const changeEvent = new Event('change', { bubbles: true }); + element.dispatchEvent(changeEvent); + + const inputEvent = new Event('input', { bubbles: true }); + element.dispatchEvent(inputEvent); + } else { + console.warn(`Could not find element with selector: ${selector}`); + } + } catch (error) { + console.error('Error updating DOM element:', error); + } + }; + const handleConfirm = () => { if (socket && selectedDateTime) { socket.emit('input:datetime-local', { selector, value: selectedDateTime }); + + updateDOMElement(selector, selectedDateTime); + onClose(); } }; @@ -58,8 +104,8 @@ const DateTimeLocalPicker: React.FC = ({ coordinates, onClick={handleConfirm} disabled={!selectedDateTime} className={`px-3 py-1 text-sm rounded ${selectedDateTime - ? 'bg-blue-500 text-white hover:bg-blue-600' - : 'bg-gray-300 text-gray-500 cursor-not-allowed' + ? 'bg-blue-500 text-white hover:bg-blue-600' + : 'bg-gray-300 text-gray-500 cursor-not-allowed' }`} > Confirm diff --git a/src/components/pickers/Dropdown.tsx b/src/components/pickers/Dropdown.tsx index df695efa..a944592a 100644 --- a/src/components/pickers/Dropdown.tsx +++ b/src/components/pickers/Dropdown.tsx @@ -1,6 +1,6 @@ import React, { useState } from 'react'; import { useSocketStore } from '../../context/socket'; -import { Coordinates } from '../recorder/canvas'; +import { Coordinates } from '../recorder/Canvas'; interface DropdownProps { coordinates: Coordinates; @@ -18,9 +18,65 @@ const Dropdown = ({ coordinates, selector, options, onClose }: DropdownProps) => const { socket } = useSocketStore(); const [hoveredIndex, setHoveredIndex] = useState(null); + const updateDOMElement = (selector: string, value: string) => { + try { + let iframeElement = document.querySelector('#dom-browser-iframe') as HTMLIFrameElement; + + if (!iframeElement) { + iframeElement = document.querySelector('#browser-window iframe') as HTMLIFrameElement; + } + + if (!iframeElement) { + const browserWindow = document.querySelector('#browser-window'); + if (browserWindow) { + iframeElement = browserWindow.querySelector('iframe') as HTMLIFrameElement; + } + } + + if (!iframeElement) { + console.error('Could not find iframe element for DOM update'); + return; + } + + const iframeDoc = iframeElement.contentDocument; + if (!iframeDoc) { + console.error('Could not access iframe document'); + return; + } + + const selectElement = iframeDoc.querySelector(selector) as HTMLSelectElement; + if (selectElement) { + selectElement.value = value; + + const optionElements = selectElement.querySelectorAll('option'); + optionElements.forEach(option => { + if (option.value === value) { + option.selected = true; + option.setAttribute('selected', 'selected'); + } else { + option.selected = false; + option.removeAttribute('selected'); + } + }); + + const changeEvent = new Event('change', { bubbles: true }); + selectElement.dispatchEvent(changeEvent); + + const inputEvent = new Event('input', { bubbles: true }); + selectElement.dispatchEvent(inputEvent); + } else { + console.warn(`Could not find select element with selector: ${selector}`); + } + } catch (error) { + console.error('Error updating DOM select element:', error); + } + }; + const handleSelect = (value: string) => { if (socket) { socket.emit('input:dropdown', { selector, value }); + + updateDOMElement(selector, value); } onClose(); }; diff --git a/src/components/pickers/TimePicker.tsx b/src/components/pickers/TimePicker.tsx index 7877787e..a9c02cae 100644 --- a/src/components/pickers/TimePicker.tsx +++ b/src/components/pickers/TimePicker.tsx @@ -1,6 +1,6 @@ import React, { useState } from 'react'; import { useSocketStore } from '../../context/socket'; -import { Coordinates } from '../recorder/canvas'; +import { Coordinates } from '../recorder/Canvas'; interface TimePickerProps { coordinates: Coordinates; diff --git a/src/components/recorder/AddWhereCondModal.tsx b/src/components/recorder/AddWhereCondModal.tsx index 7c5c284c..758c9e75 100644 --- a/src/components/recorder/AddWhereCondModal.tsx +++ b/src/components/recorder/AddWhereCondModal.tsx @@ -140,7 +140,7 @@ export const AddWhereCondModal = ({ isOpen, onClose, pair, index }: AddWhereCond } export const modalStyle = { - top: '40%', + top: '45%', left: '50%', transform: 'translate(-50%, -50%)', width: '30%', diff --git a/src/components/recorder/Canvas.tsx b/src/components/recorder/Canvas.tsx new file mode 100644 index 00000000..15fb9a70 --- /dev/null +++ b/src/components/recorder/Canvas.tsx @@ -0,0 +1,310 @@ +import React, { memo, useCallback, useEffect, useRef } from 'react'; +import { useSocketStore } from '../../context/socket'; +import { useGlobalInfoStore } from "../../context/globalInfo"; +import { useActionContext } from '../../context/browserActions'; +import DatePicker from '../pickers/DatePicker'; +import Dropdown from '../pickers/Dropdown'; +import TimePicker from '../pickers/TimePicker'; +import DateTimeLocalPicker from '../pickers/DateTimeLocalPicker'; +import { coordinateMapper } from '../../helpers/coordinateMapper'; + +interface CreateRefCallback { + (ref: React.RefObject): void; +} + +interface CanvasProps { + width: number; + height: number; + onCreateRef: CreateRefCallback; +} + +/** + * Interface for mouse's x,y coordinates + */ +export interface Coordinates { + x: number; + y: number; +}; + +const Canvas = ({ width, height, onCreateRef }: CanvasProps) => { + + const canvasRef = useRef(null); + const contextRef = useRef(null); + const imageDataRef = useRef(null); + const animationFrameRef = useRef(null); + + const { socket } = useSocketStore(); + const { setLastAction, lastAction } = useGlobalInfoStore(); + const { getText, getList } = useActionContext(); + const getTextRef = useRef(getText); + const getListRef = useRef(getList); + + const MOUSE_MOVE_THROTTLE = 8; + const lastMouseMoveTime = useRef(0); + + const [datePickerInfo, setDatePickerInfo] = React.useState<{ + coordinates: Coordinates; + selector: string; + } | null>(null); + + const [dropdownInfo, setDropdownInfo] = React.useState<{ + coordinates: Coordinates; + selector: string; + options: Array<{ + value: string; + text: string; + disabled: boolean; + selected: boolean; + }>; + } | null>(null); + + const [timePickerInfo, setTimePickerInfo] = React.useState<{ + coordinates: Coordinates; + selector: string; + } | null>(null); + + const [dateTimeLocalInfo, setDateTimeLocalInfo] = React.useState<{ + coordinates: Coordinates; + selector: string; + } | null>(null); + + const notifyLastAction = (action: string) => { + if (lastAction !== action) { + setLastAction(action); + } + }; + + const lastMousePosition = useRef({ x: 0, y: 0 }); + + useEffect(() => { + if (canvasRef.current && !contextRef.current) { + const ctx = canvasRef.current.getContext('2d', { + alpha: false, + desynchronized: true, + willReadFrequently: false + }); + + if (ctx) { + contextRef.current = ctx; + + imageDataRef.current = ctx.createImageData(width, height); + } + } + }, [width, height]); + + useEffect(() => { + getTextRef.current = getText; + getListRef.current = getList; + }, [getText, getList]); + + useEffect(() => { + if (socket) { + const handleDatePicker = (info: { coordinates: Coordinates, selector: string }) => { + const canvasCoords = coordinateMapper.mapBrowserToCanvas(info.coordinates); + setDatePickerInfo({ ...info, coordinates: canvasCoords }); + }; + + const handleDropdown = (info: { + coordinates: Coordinates, + selector: string, + options: Array<{ value: string; text: string; disabled: boolean; selected: boolean; }>; + }) => { + const canvasCoords = coordinateMapper.mapBrowserToCanvas(info.coordinates); + setDropdownInfo({ ...info, coordinates: canvasCoords }); + }; + + const handleTimePicker = (info: { coordinates: Coordinates, selector: string }) => { + const canvasCoords = coordinateMapper.mapBrowserToCanvas(info.coordinates); + setTimePickerInfo({ ...info, coordinates: canvasCoords }); + }; + + const handleDateTimePicker = (info: { coordinates: Coordinates, selector: string }) => { + const canvasCoords = coordinateMapper.mapBrowserToCanvas(info.coordinates); + setDateTimeLocalInfo({ ...info, coordinates: canvasCoords }); + }; + + socket.on('showDatePicker', handleDatePicker); + socket.on('showDropdown', handleDropdown); + socket.on('showTimePicker', handleTimePicker); + socket.on('showDateTimePicker', handleDateTimePicker); + + return () => { + socket.off('showDatePicker', handleDatePicker); + socket.off('showDropdown', handleDropdown); + socket.off('showTimePicker', handleTimePicker); + socket.off('showDateTimePicker', handleDateTimePicker); + }; + } + }, [socket]); + + const onMouseEvent = useCallback((event: MouseEvent) => { + if (!socket || !canvasRef.current) return; + + const rect = canvasRef.current.getBoundingClientRect(); + const clickCoordinates = { + x: event.clientX - rect.left, + y: event.clientY - rect.top, + }; + + const browserCoordinates = coordinateMapper.mapCanvasToBrowser(clickCoordinates); + + switch (event.type) { + case 'mousedown': + if (getTextRef.current === true) { + console.log('Capturing Text...'); + } else if (getListRef.current === true) { + console.log('Capturing List...'); + } else { + socket.emit('input:mousedown', browserCoordinates); + } + notifyLastAction('click'); + break; + + case 'mousemove': { + const now = performance.now(); + if (now - lastMouseMoveTime.current < MOUSE_MOVE_THROTTLE) { + return; + } + lastMouseMoveTime.current = now; + + const dx = Math.abs(lastMousePosition.current.x - clickCoordinates.x); + const dy = Math.abs(lastMousePosition.current.y - clickCoordinates.y); + + if (dx > 0.5 || dy > 0.5) { + lastMousePosition.current = clickCoordinates; + socket.emit('input:mousemove', browserCoordinates); + notifyLastAction('move'); + } + break; + } + + case 'wheel': { + const wheelEvent = event as WheelEvent; + const deltaX = Math.round(wheelEvent.deltaX / 5) * 5; + const deltaY = Math.round(wheelEvent.deltaY / 5) * 5; + + if (Math.abs(deltaX) > 2 || Math.abs(deltaY) > 2) { + socket.emit('input:wheel', { deltaX, deltaY }); + notifyLastAction('scroll'); + } + break; + } + default: + return; + } + }, [socket, notifyLastAction]); + + const onKeyboardEvent = useCallback((event: KeyboardEvent) => { + if (socket) { + const browserCoordinates = coordinateMapper.mapCanvasToBrowser(lastMousePosition.current); + + switch (event.type) { + case 'keydown': + socket.emit('input:keydown', { key: event.key, coordinates: browserCoordinates }); + notifyLastAction(`${event.key} pressed`); + break; + case 'keyup': + socket.emit('input:keyup', event.key); + break; + default: + console.log('Default keyEvent registered'); + return; + } + } + }, [socket, notifyLastAction]); + + + useEffect(() => { + const canvas = canvasRef.current; + if (!canvas) return; + + onCreateRef(canvasRef); + + const options = { passive: true }; + + canvas.addEventListener('mousedown', onMouseEvent, options); + canvas.addEventListener('mousemove', onMouseEvent, options); + canvas.addEventListener('wheel', onMouseEvent, options); + canvas.addEventListener('keydown', onKeyboardEvent); + canvas.addEventListener('keyup', onKeyboardEvent); + + return () => { + canvas.removeEventListener('mousedown', onMouseEvent); + canvas.removeEventListener('mousemove', onMouseEvent); + canvas.removeEventListener('wheel', onMouseEvent); + canvas.removeEventListener('keydown', onKeyboardEvent); + canvas.removeEventListener('keyup', onKeyboardEvent); + }; + }, [onMouseEvent, onKeyboardEvent, onCreateRef]); + + useEffect(() => { + return () => { + if (animationFrameRef.current) { + cancelAnimationFrame(animationFrameRef.current); + } + }; + }, []); + + const containerStyle = React.useMemo(() => ({ + borderRadius: '0px 0px 5px 5px', + overflow: 'hidden', + backgroundColor: 'white', + contain: 'layout style paint', + isolation: 'isolate' as React.CSSProperties['isolation'] + }), []); + + const canvasStyle = React.useMemo(() => ({ + display: 'block', + imageRendering: 'crisp-edges' as const, + willChange: 'contents', + backfaceVisibility: 'hidden' as const, + transform: 'translateZ(0)', + maxWidth: '100%', + maxHeight: '100%' + }), []); + + return ( +
+ + {datePickerInfo && ( + setDatePickerInfo(null)} + /> + )} + {dropdownInfo && ( + setDropdownInfo(null)} + /> + )} + {timePickerInfo && ( + setTimePickerInfo(null)} + /> + )} + {dateTimeLocalInfo && ( + setDateTimeLocalInfo(null)} + /> + )} +
+ ); + +}; + + +export default memo(Canvas); diff --git a/src/components/recorder/DOMBrowserRenderer.tsx b/src/components/recorder/DOMBrowserRenderer.tsx new file mode 100644 index 00000000..e409ff64 --- /dev/null +++ b/src/components/recorder/DOMBrowserRenderer.tsx @@ -0,0 +1,1118 @@ +import React, { + useCallback, + useContext, + useEffect, + useState, + useRef, +} from "react"; +import { useSocketStore } from "../../context/socket"; +import { useGlobalInfoStore } from "../../context/globalInfo"; +import { useTranslation } from "react-i18next"; +import { AuthContext } from "../../context/auth"; +import { rebuild, createMirror } from "rrweb-snapshot"; +import { + ActionType, + clientSelectorGenerator, +} from "../../helpers/clientSelectorGenerator"; + +interface ElementInfo { + tagName: string; + hasOnlyText?: boolean; + isIframeContent?: boolean; + isShadowRoot?: boolean; + innerText?: string; + url?: string; + imageUrl?: string; + attributes?: Record; + innerHTML?: string; + outerHTML?: string; + isDOMMode?: boolean; +} + +interface ProcessedSnapshot { + snapshot: RRWebSnapshot; + resources: { + stylesheets: Array<{ + href: string; + content: string; + media?: string; + }>; + images: Array<{ + src: string; + dataUrl: string; + alt?: string; + }>; + fonts: Array<{ + url: string; + dataUrl: string; + format?: string; + }>; + scripts: Array<{ + src: string; + content: string; + type?: string; + }>; + media: Array<{ + src: string; + dataUrl: string; + type: string; + }>; + }; + baseUrl: string; + viewport: { width: number; height: number }; + timestamp: number; + processingStats: { + totalReplacements: number; + discoveredResources: { + images: number; + stylesheets: number; + scripts: number; + fonts: number; + media: number; + }; + cachedResources: { + stylesheets: number; + images: number; + fonts: number; + scripts: number; + media: number; + }; + totalCacheSize: number; + }; +} + +interface RRWebSnapshot { + type: number; + childNodes?: RRWebSnapshot[]; + tagName?: string; + attributes?: Record; + textContent: string; + id: number; + [key: string]: any; +} + +interface RRWebDOMBrowserRendererProps { + width: number; + height: number; + snapshot: ProcessedSnapshot; + getList?: boolean; + getText?: boolean; + listSelector?: string | null; + cachedChildSelectors?: string[]; + paginationMode?: boolean; + paginationType?: string; + limitMode?: boolean; + isCachingChildSelectors?: boolean; + onHighlight?: (data: { + rect: DOMRect; + selector: string; + isShadow?: boolean; + elementInfo: ElementInfo | null; + childSelectors?: string[]; + groupInfo?: any; + similarElements?: any; + }) => void; + onElementSelect?: (data: { + rect: DOMRect; + selector: string; + isShadow?: boolean; + elementInfo: ElementInfo | null; + childSelectors?: string[]; + groupInfo?: any; + }) => void; + onShowDatePicker?: (info: { + coordinates: { x: number; y: number }; + selector: string; + }) => void; + onShowDropdown?: (info: { + coordinates: { x: number; y: number }; + selector: string; + options: Array<{ + value: string; + text: string; + disabled: boolean; + selected: boolean; + }>; + }) => void; + onShowTimePicker?: (info: { + coordinates: { x: number; y: number }; + selector: string; + }) => void; + onShowDateTimePicker?: (info: { + coordinates: { x: number; y: number }; + selector: string; + }) => void; +} + +export const DOMBrowserRenderer: React.FC = ({ + width, + height, + snapshot, + getList = false, + getText = false, + listSelector = null, + cachedChildSelectors = [], + paginationMode = false, + paginationType = "", + limitMode = false, + isCachingChildSelectors = false, + onHighlight, + onElementSelect, + onShowDatePicker, + onShowDropdown, + onShowTimePicker, + onShowDateTimePicker, +}) => { + const { t } = useTranslation(); + const containerRef = useRef(null); + const iframeRef = useRef(null); + const [isRendered, setIsRendered] = useState(false); + const [renderError, setRenderError] = useState(null); + const [lastMousePosition, setLastMousePosition] = useState({ x: 0, y: 0 }); + const [currentHighlight, setCurrentHighlight] = useState<{ + element: Element; + rect: DOMRect; + selector: string; + elementInfo: ElementInfo; + childSelectors?: string[]; + } | null>(null); + + const { socket } = useSocketStore(); + const { setLastAction, lastAction } = useGlobalInfoStore(); + + const { state } = useContext(AuthContext); + const { user } = state; + + const MOUSE_MOVE_THROTTLE = 16; // ~60fps + const lastMouseMoveTime = useRef(0); + + const notifyLastAction = (action: string) => { + if (lastAction !== action) { + setLastAction(action); + } + }; + + const isInCaptureMode = getText || getList; + + useEffect(() => { + clientSelectorGenerator.setGetList(getList); + clientSelectorGenerator.setListSelector(listSelector || ""); + clientSelectorGenerator.setPaginationMode(paginationMode); + }, [getList, listSelector, paginationMode]); + + useEffect(() => { + if (listSelector) { + clientSelectorGenerator.setListSelector(listSelector); + clientSelectorGenerator.setGetList(getList); + clientSelectorGenerator.setPaginationMode(paginationMode); + } + }, [listSelector, getList, paginationMode]); + + /** + * Handle client-side highlighting for DOM mode using complete backend logic + */ + const handleDOMHighlighting = useCallback( + (x: number, y: number, iframeDoc: Document) => { + try { + if (!getText && !getList) { + setCurrentHighlight(null); + if (onHighlight) { + onHighlight({ + rect: new DOMRect(0, 0, 0, 0), + selector: "", + elementInfo: null, + }); + } + return; + } + + const highlighterData = + clientSelectorGenerator.generateDataForHighlighter( + { x, y }, + iframeDoc, + true, + cachedChildSelectors + ); + + if (!highlighterData) { + setCurrentHighlight(null); + if (onHighlight) { + onHighlight({ + rect: new DOMRect(0, 0, 0, 0), + selector: "", + elementInfo: null, + }); + } + return; + } + + const { rect, selector, elementInfo, childSelectors, groupInfo, similarElements, isShadow } = + highlighterData; + + let shouldHighlight = false; + + if (getList) { + if (!listSelector && groupInfo?.isGroupElement) { + shouldHighlight = true; + } + else if (listSelector) { + if (limitMode) { + shouldHighlight = false; + } else if ( + paginationMode && + paginationType !== "" && + !["none", "scrollDown", "scrollUp"].includes(paginationType) + ) { + shouldHighlight = true; + } else if (childSelectors && childSelectors.length > 0) { + shouldHighlight = true; + } else { + shouldHighlight = false; + } + } + else { + shouldHighlight = true; + } + } else { + shouldHighlight = true; + } + + if (shouldHighlight) { + const element = iframeDoc.elementFromPoint(x, y); + if (element) { + setCurrentHighlight({ + element, + rect: rect, + selector, + elementInfo: { + ...elementInfo, + tagName: elementInfo?.tagName ?? "", + isDOMMode: true, + }, + childSelectors, + }); + + if (onHighlight) { + onHighlight({ + rect: rect, + elementInfo: { + ...elementInfo, + tagName: elementInfo?.tagName ?? "", + isDOMMode: true, + }, + selector, + isShadow, + childSelectors, + groupInfo, + similarElements, // Pass similar elements data + }); + } + } + } else { + setCurrentHighlight(null); + if (onHighlight) { + onHighlight({ + rect: new DOMRect(0, 0, 0, 0), + selector: "", + elementInfo: null, + }); + } + } + } catch (error) { + console.error("Error in DOM highlighting:", error); + setCurrentHighlight(null); + } + }, + [ + getText, + getList, + listSelector, + paginationMode, + cachedChildSelectors, + paginationType, + limitMode, + onHighlight, + ] + ); + /** + * Set up enhanced interaction handlers for DOM mode + */ + const setupIframeInteractions = useCallback( + (iframeDoc: Document) => { + const existingHandlers = (iframeDoc as any)._domRendererHandlers; + if (existingHandlers) { + Object.entries(existingHandlers).forEach(([event, handler]) => { + iframeDoc.removeEventListener(event, handler as EventListener, false); // Changed to false + }); + } + + const handlers: { [key: string]: EventListener } = {}; + + const mouseMoveHandler: EventListener = (e: Event) => { + if (e.target && !iframeDoc.contains(e.target as Node)) { + return; + } + + if (!isInCaptureMode) { + return; + } + + const now = performance.now(); + if (now - lastMouseMoveTime.current < MOUSE_MOVE_THROTTLE) { + return; + } + lastMouseMoveTime.current = now; + + const mouseEvent = e as MouseEvent; + const iframeX = mouseEvent.clientX; + const iframeY = mouseEvent.clientY; + + const iframe = iframeRef.current; + if (iframe) { + const iframeRect = iframe.getBoundingClientRect(); + setLastMousePosition({ + x: iframeX + iframeRect.left, + y: iframeY + iframeRect.top, + }); + } + + handleDOMHighlighting(iframeX, iframeY, iframeDoc); + notifyLastAction("move"); + }; + + const mouseDownHandler: EventListener = (e: Event) => { + if (e.target && !iframeDoc.contains(e.target as Node)) { + return; + } + + const mouseEvent = e as MouseEvent; + const target = mouseEvent.target as Element; + const iframeX = mouseEvent.clientX; + const iframeY = mouseEvent.clientY; + + if (isInCaptureMode) { + e.preventDefault(); + e.stopPropagation(); + + if (currentHighlight && onElementSelect) { + // Get the group info for the current highlight + const highlighterData = + clientSelectorGenerator.generateDataForHighlighter( + { x: iframeX, y: iframeY }, + iframeDoc, + true, + cachedChildSelectors + ); + + onElementSelect({ + rect: currentHighlight.rect, + selector: currentHighlight.selector, + elementInfo: currentHighlight.elementInfo, + isShadow: highlighterData?.isShadow, + childSelectors: + cachedChildSelectors.length > 0 + ? cachedChildSelectors + : highlighterData?.childSelectors || [], + groupInfo: highlighterData?.groupInfo, + }); + } + notifyLastAction("select element"); + return; + } + + const linkElement = target.closest("a[href]") as HTMLAnchorElement; + if (linkElement && linkElement.href && socket) { + e.preventDefault(); + e.stopPropagation(); + + const href = linkElement.href; + + if (linkElement.target) { + linkElement.target = ""; + } + + const originalHref = linkElement.href; + linkElement.removeAttribute("href"); + + setTimeout(() => { + linkElement.setAttribute("href", originalHref); + }, 100); + + const isSPALink = + href.endsWith("#") || + (href.includes("#") && new URL(href).hash !== ""); + + const selector = clientSelectorGenerator.generateSelector( + iframeDoc, + { x: iframeX, y: iframeY }, + ActionType.Click + ); + + const elementInfo = clientSelectorGenerator.getElementInformation( + iframeDoc, + { x: iframeX, y: iframeY }, + clientSelectorGenerator.getCurrentState().listSelector, + clientSelectorGenerator.getCurrentState().getList + ); + + if (selector && socket) { + socket.emit("dom:click", { + selector, + url: snapshot.baseUrl, + userId: user?.id || "unknown", + elementInfo, + coordinates: undefined, + isSPA: isSPALink, + }); + + notifyLastAction( + isSPALink ? `SPA navigation to ${href}` : `navigate to ${href}` + ); + } + return; + } + + const selector = clientSelectorGenerator.generateSelector( + iframeDoc, + { x: iframeX, y: iframeY }, + ActionType.Click + ); + + const elementInfo = clientSelectorGenerator.getElementInformation( + iframeDoc, + { x: iframeX, y: iframeY }, + clientSelectorGenerator.getCurrentState().listSelector, + clientSelectorGenerator.getCurrentState().getList + ); + + if (selector && elementInfo && socket) { + if (elementInfo?.tagName === "SELECT" && elementInfo.innerHTML) { + const inputElement = target as HTMLInputElement; + inputElement.blur(); + + const wasDisabled = inputElement.disabled; + inputElement.disabled = true; + + setTimeout(() => { + inputElement.disabled = wasDisabled; + }, 100); + + const options = elementInfo.innerHTML + .split(" { + const valueMatch = optionHtml.match(/value="([^"]*)"/); + const textMatch = optionHtml.match(/>([^<]*) { + inputElement.disabled = wasDisabled; + }, 100); + + const pickerInfo = { + coordinates: { x: iframeX, y: iframeY }, + selector, + }; + + switch (inputType) { + case "date": + case "month": + case "week": + if (onShowDatePicker) { + onShowDatePicker(pickerInfo); + } + break; + case "time": + if (onShowTimePicker) { + onShowTimePicker(pickerInfo); + } + break; + case "datetime-local": + if (onShowDateTimePicker) { + onShowDateTimePicker(pickerInfo); + } + break; + } + + notifyLastAction(`${inputType} picker opened`); + return; + } + } + + if ( + elementInfo?.tagName !== "INPUT" && + elementInfo?.tagName !== "SELECT" + ) { + socket.emit("dom:click", { + selector, + url: snapshot.baseUrl, + userId: user?.id || "unknown", + elementInfo, + coordinates: { x: iframeX, y: iframeY }, + isSPA: false, + }); + } + } + + notifyLastAction("click"); + }; + + const mouseUpHandler: EventListener = (e: Event) => { + if (e.target && !iframeDoc.contains(e.target as Node)) { + return; + } + + if (!isInCaptureMode) { + notifyLastAction("release"); + } + }; + + const keyDownHandler: EventListener = (e: Event) => { + if (e.target && !iframeDoc.contains(e.target as Node)) { + return; + } + + const keyboardEvent = e as KeyboardEvent; + const target = keyboardEvent.target as HTMLElement; + + if (!isInCaptureMode && socket && snapshot?.baseUrl) { + const iframe = iframeRef.current; + if (iframe) { + const focusedElement = iframeDoc.activeElement as HTMLElement; + let coordinates = { x: 0, y: 0 }; + + if (focusedElement && focusedElement !== iframeDoc.body) { + // Get coordinates from the focused element + const rect = focusedElement.getBoundingClientRect(); + coordinates = { + x: rect.left + rect.width / 2, + y: rect.top + rect.height / 2 + }; + } else { + // Fallback to last mouse position if no focused element + const iframeRect = iframe.getBoundingClientRect(); + coordinates = { + x: lastMousePosition.x - iframeRect.left, + y: lastMousePosition.y - iframeRect.top + }; + } + + const selector = clientSelectorGenerator.generateSelector( + iframeDoc, + coordinates, + ActionType.Keydown + ); + + const elementInfo = clientSelectorGenerator.getElementInformation( + iframeDoc, + coordinates, + clientSelectorGenerator.getCurrentState().listSelector, + clientSelectorGenerator.getCurrentState().getList + ); + + if (selector) { + socket.emit("dom:keypress", { + selector, + key: keyboardEvent.key, + url: snapshot.baseUrl, + userId: user?.id || "unknown", + inputType: elementInfo?.attributes?.type || "text", + }); + } + } + + notifyLastAction(`${keyboardEvent.key} typed`); + } + + if ( + ["INPUT", "TEXTAREA"].includes(target.tagName) && + !isInCaptureMode + ) { + return; + } + }; + + const keyUpHandler: EventListener = (e: Event) => { + if (e.target && !iframeDoc.contains(e.target as Node)) { + return; + } + + const keyboardEvent = e as KeyboardEvent; + + if (!isInCaptureMode && socket) { + socket.emit("input:keyup", { key: keyboardEvent.key }); + } + }; + + const wheelHandler: EventListener = (e: Event) => { + if (e.target && !iframeDoc.contains(e.target as Node)) { + return; + } + + if (isCachingChildSelectors) { + e.preventDefault(); + e.stopPropagation(); + return; + } + + e.preventDefault(); + + if (!isInCaptureMode) { + const wheelEvent = e as WheelEvent; + const deltaX = Math.round(wheelEvent.deltaX / 10) * 10; + const deltaY = Math.round(wheelEvent.deltaY / 10) * 10; + + if (Math.abs(deltaX) > 5 || Math.abs(deltaY) > 5) { + if (socket) { + socket.emit("dom:scroll", { + deltaX, + deltaY, + }); + } + notifyLastAction("scroll"); + } + } + }; + + const clickHandler: EventListener = (e: Event) => { + if (e.target && !iframeDoc.contains(e.target as Node)) { + return; + } + + if (isInCaptureMode) { + e.preventDefault(); + e.stopPropagation(); + return; + } + }; + + const preventDefaults = (e: Event) => { + if (e.target && !iframeDoc.contains(e.target as Node)) { + return; + } + + e.preventDefault(); + e.stopPropagation(); + return false; + }; + + handlers.mousedown = mouseDownHandler; + handlers.mouseup = mouseUpHandler; + handlers.mousemove = mouseMoveHandler; + handlers.wheel = wheelHandler; + handlers.keydown = keyDownHandler; + handlers.keyup = keyUpHandler; + handlers.click = clickHandler; + handlers.submit = preventDefaults; + handlers.beforeunload = preventDefaults; + + Object.entries(handlers).forEach(([event, handler]) => { + iframeDoc.addEventListener(event, handler, false); + }); + + // Store handlers for cleanup + (iframeDoc as any)._domRendererHandlers = handlers; + + // Make iframe focusable for keyboard events + if (iframeRef.current) { + iframeRef.current.tabIndex = 0; + } + }, + [ + socket, + lastMousePosition, + notifyLastAction, + handleDOMHighlighting, + currentHighlight, + onElementSelect, + isInCaptureMode, + snapshot, + user?.id, + onShowDatePicker, + onShowDropdown, + onShowTimePicker, + onShowDateTimePicker, + ] + ); + + /** + * Render DOM snapshot using rrweb + */ + const renderRRWebSnapshot = useCallback( + (snapshotData: ProcessedSnapshot) => { + if (!iframeRef.current) { + console.warn("No iframe reference available"); + return; + } + + if (isInCaptureMode || isCachingChildSelectors) { + return; // Skip rendering in capture mode + } + + try { + setRenderError(null); + setIsRendered(false); + + const iframe = iframeRef.current!; + const iframeDoc = iframe.contentDocument!; + + const styleTags = Array.from( + document.querySelectorAll('link[rel="stylesheet"], style') + ) + .map((tag) => tag.outerHTML) + .join("\n"); + + const enhancedCSS = ` + /* rrweb rebuilt content styles */ + html, body { + margin: 0 !important; + padding: 8px !important; + overflow-x: hidden !important; + } + + html::-webkit-scrollbar, + body::-webkit-scrollbar { + display: none !important; + width: 0 !important; + height: 0 !important; + background: transparent !important; + } + + /* Hide scrollbars for all elements */ + *::-webkit-scrollbar { + display: none !important; + width: 0 !important; + height: 0 !important; + background: transparent !important; + } + + * { + scrollbar-width: none !important; /* Firefox */ + -ms-overflow-style: none !important; /* Internet Explorer 10+ */ + } + + /* Make everything interactive */ + * { + cursor: "pointer" !important; + } + `; + + const skeleton = ` + + + + + + + ${styleTags} + + + + + `; + + if (!iframeDoc) { + throw new Error("Cannot access iframe document"); + } + + // Write the skeleton into the iframe + iframeDoc.open(); + iframeDoc.write(skeleton); + iframeDoc.close(); + + const mirror = createMirror(); + + try { + rebuild(snapshotData.snapshot, { + doc: iframeDoc, + mirror: mirror, + cache: { stylesWithHoverClass: new Map() }, + afterAppend: (node) => { + if (node.nodeType === Node.TEXT_NODE && node.textContent) { + const text = node.textContent.trim(); + + if ( + text.startsWith("<") && + text.includes(">") && + text.length > 50 + ) { + if (node.parentNode) { + node.parentNode.removeChild(node); + } + } + } + }, + }); + } catch (rebuildError) { + console.error("rrweb rebuild failed:", rebuildError); + throw new Error(`rrweb rebuild failed: ${rebuildError}`); + } + + setIsRendered(true); + setupIframeInteractions(iframeDoc); + } catch (error) { + console.error("Error rendering rrweb snapshot:", error); + setRenderError(error instanceof Error ? error.message : String(error)); + showErrorInIframe(error); + } + }, + [setupIframeInteractions, isInCaptureMode, isCachingChildSelectors] + ); + + useEffect(() => { + if (snapshot && iframeRef.current) { + renderRRWebSnapshot(snapshot); + } + }, [snapshot]); + + useEffect(() => { + if (isRendered && iframeRef.current) { + const iframeDoc = iframeRef.current.contentDocument; + if (iframeDoc) { + setupIframeInteractions(iframeDoc); + } + } + }, [getText, getList, listSelector, isRendered, setupIframeInteractions]); + + /** + * Show error message in iframe + */ + const showErrorInIframe = (error: any) => { + if (!iframeRef.current) return; + + const iframe = iframeRef.current; + const iframeDoc = iframe.contentDocument || iframe.contentWindow?.document; + + if (iframeDoc) { + try { + iframeDoc.open(); + iframeDoc.write(` + + + + + +
+

Error Loading DOM Content

+

Failed to render the page in DOM mode.

+

Common causes:

+
    +
  • Page is still loading or navigating
  • +
  • Resource proxy timeouts or failures
  • +
  • Network connectivity issues
  • +
  • Invalid HTML structure
  • +
+

Solutions:

+
    +
  • Try switching back to Screenshot mode
  • +
  • Wait for the page to fully load and try again
  • +
  • Check your network connection
  • +
  • Refresh the browser page
  • +
+ +
+ Technical details +
${error.toString()}
+
+
+ + + `); + iframeDoc.close(); + + window.addEventListener("message", (event) => { + if (event.data === "retry-dom-mode") { + if (socket) { + socket.emit("enable-dom-streaming"); + } + } + }); + } catch (e) { + console.error("Failed to write error message to iframe:", e); + } + } + }; + + useEffect(() => { + return () => { + if (iframeRef.current) { + const iframeDoc = iframeRef.current.contentDocument; + if (iframeDoc) { + const handlers = (iframeDoc as any)._domRendererHandlers; + if (handlers) { + Object.entries(handlers).forEach(([event, handler]) => { + iframeDoc.removeEventListener( + event, + handler as EventListener, + true + ); + }); + } + } + } + }; + }, []); + + return ( +
+