From 98c56cbd8c5e01c18f37d2a3cd7e4f84d89a7b2d Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Tue, 29 Oct 2024 11:07:04 +0530 Subject: [PATCH 01/40] chore: v0.0.3 --- maxun-core/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/maxun-core/package.json b/maxun-core/package.json index 87e8b93d..45c69ffe 100644 --- a/maxun-core/package.json +++ b/maxun-core/package.json @@ -1,6 +1,6 @@ { "name": "maxun-core", - "version": "0.0.2", + "version": "0.0.3", "description": "Core package for Maxun, responsible for data extraction", "main": "build/index.js", "typings": "build/index.d.ts", From 831958196067440435966f0ed5688b9a15926d67 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Tue, 29 Oct 2024 11:09:45 +0530 Subject: [PATCH 02/40] chore: use maxun-core v0.0.3 --- package.json | 1 + 1 file changed, 1 insertion(+) diff --git a/package.json b/package.json index 70b94311..267057af 100644 --- a/package.json +++ b/package.json @@ -41,6 +41,7 @@ "jsonwebtoken": "^9.0.2", "loglevel": "^1.8.0", "loglevel-plugin-remote": "^0.6.8", + "maxun-core": "^0.0.3", "minio": "^8.0.1", "moment-timezone": "^0.5.45", "node-cron": "^3.0.3", From 6953997e0366fd60ef6375f1f3c48c333c2df111 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Tue, 29 Oct 2024 21:05:10 +0530 Subject: [PATCH 03/40] feat: set manifest to true --- vite.config.js | 1 + 1 file changed, 1 insertion(+) diff --git a/vite.config.js b/vite.config.js index 4e690eb8..f3aae237 100644 --- a/vite.config.js +++ b/vite.config.js @@ -5,6 +5,7 @@ export default defineConfig(() => { return { build: { outDir: 'build', + manifest: true, }, plugins: [react()], }; From 0f69f21b92313b96c7c3f6638ee7d8fb56168502 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Tue, 29 Oct 2024 23:41:06 +0530 Subject: [PATCH 04/40] fix: favicon path --- index.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/index.html b/index.html index c7579580..9a9db7a4 100644 --- a/index.html +++ b/index.html @@ -8,7 +8,7 @@ name="description" content="Web site created using Vite" /> - + Maxun | Open Source No Code Web Data Extraction Platform From c095c543bbf343314565bc21944e22d79a8b11c5 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Tue, 29 Oct 2024 23:41:26 +0530 Subject: [PATCH 05/40] chore: build preview script --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index 267057af..c4199152 100644 --- a/package.json +++ b/package.json @@ -76,7 +76,7 @@ "server": "./node_modules/.bin/nodemon server/src/server.ts", "client": "vite", "build": "vite build", - "test": "vite preview", + "preview": "vite preview", "lint": "./node_modules/.bin/eslint ." }, "eslintConfig": { From ba07f67af931a5e7e2c0998388c4f8840c40e753 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Tue, 29 Oct 2024 23:41:49 +0530 Subject: [PATCH 06/40] chore: set chunkSizeWarningLimit --- vite.config.js | 1 + 1 file changed, 1 insertion(+) diff --git a/vite.config.js b/vite.config.js index f3aae237..aab999e4 100644 --- a/vite.config.js +++ b/vite.config.js @@ -6,6 +6,7 @@ export default defineConfig(() => { build: { outDir: 'build', manifest: true, + chunkSizeWarningLimit: 1024, }, plugins: [react()], }; From 2fd6262e91dd4e7864fe5275201c3cc8f9b027f2 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Tue, 29 Oct 2024 23:42:14 +0530 Subject: [PATCH 07/40] chore: img path --- src/components/molecules/ActionDescriptionBox.tsx | 2 +- src/components/molecules/NavBar.tsx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/components/molecules/ActionDescriptionBox.tsx b/src/components/molecules/ActionDescriptionBox.tsx index 23f8c1ed..4efdb32e 100644 --- a/src/components/molecules/ActionDescriptionBox.tsx +++ b/src/components/molecules/ActionDescriptionBox.tsx @@ -110,7 +110,7 @@ const ActionDescriptionBox = () => { return ( - + {renderActionDescription()} diff --git a/src/components/molecules/NavBar.tsx b/src/components/molecules/NavBar.tsx index 89b005af..b0a409b1 100644 --- a/src/components/molecules/NavBar.tsx +++ b/src/components/molecules/NavBar.tsx @@ -54,7 +54,7 @@ export const NavBar: React.FC = ({ recordingName, isRecording }) => display: 'flex', justifyContent: 'flex-start', }}> - +
Maxun
{ From e88fabe8645e59792d4f23f5949be47dba7e4d36 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Tue, 29 Oct 2024 23:42:34 +0530 Subject: [PATCH 08/40] feat: docker setup --- Dockerfile | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 Dockerfile diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..e6675017 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,42 @@ +# --- Base Stage --- +FROM node:18 AS base +WORKDIR /app + +# Copy shared package.json and install dependencies +COPY package.json package-lock.json ./ +RUN npm install + +# --- Backend Stage --- +FROM base AS backend +WORKDIR /app/server + +# Copy backend code +COPY server/src ./src +COPY maxun-core ./maxun-core + +EXPOSE 8080 +CMD ["npm", "run", "start:server"] # Add an npm script for backend start in package.json + +# --- Frontend Stage --- +FROM base AS frontend +WORKDIR /app + +# Copy frontend code, including root-level index.html +COPY src ./src +COPY index.html ./index.html +COPY public ./public +COPY vite.config.js ./ + +# Run the Vite build +RUN npm run build && \ +ls -la && \ +ls -la build # This will help us verify the build output + +# --- Final Stage: Nginx --- +FROM nginx:alpine +COPY --from=frontend /app/build /usr/share/nginx/html +COPY --from=frontend /app/public/img /usr/share/nginx/html/img + +EXPOSE 80 +CMD ["nginx", "-g", "daemon off;"] + \ No newline at end of file From 26d22a16024eb984d146cc82f15c36ae3e3407f8 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Tue, 29 Oct 2024 23:42:58 +0530 Subject: [PATCH 09/40] feat: docker setup --- Dockerfile.backend | 23 ----------------- Dockerfile.frontend | 29 --------------------- docker-compose.yml | 63 +++++++++++++-------------------------------- 3 files changed, 18 insertions(+), 97 deletions(-) delete mode 100644 Dockerfile.backend delete mode 100644 Dockerfile.frontend diff --git a/Dockerfile.backend b/Dockerfile.backend deleted file mode 100644 index ec5ca679..00000000 --- a/Dockerfile.backend +++ /dev/null @@ -1,23 +0,0 @@ -# Use node image -FROM node:18-alpine - -# Set working directory in the container to /app -WORKDIR /app - -# Copy only the package.json and package-lock.json first for caching -COPY package.json package-lock.json ./ - -# Install dependencies -RUN npm install --production - -# Copy the entire project (core and backend code) -COPY . . - -# Set the working directory to the backend folder -WORKDIR /app/server - -# Expose the port the backend listens on -EXPOSE 8080 - -# Start the backend server -CMD ["npm", "run", "start:server"] diff --git a/Dockerfile.frontend b/Dockerfile.frontend deleted file mode 100644 index 9fd668ad..00000000 --- a/Dockerfile.frontend +++ /dev/null @@ -1,29 +0,0 @@ -# Use node image for the build stage -FROM node:18-alpine AS build - -# Set working directory in the container to /app -WORKDIR /app - -# Copy only the package.json and package-lock.json first for caching -COPY package.json package-lock.json ./ - -# Install dependencies (legacy peer deps is needed for react highlight, we get rid of it soon) -RUN npm install --legacy-peer-deps - -# Copy the entire project (including frontend code) -COPY . . - -# Build the frontend -RUN npm run build - -# Use NGINX for serving the built frontend in production -FROM nginx:stable-alpine - -# Copy the build output from the previous stage -COPY --from=build /app/build /usr/share/nginx/html - -# Expose the frontend port -EXPOSE 3000 - -# Start NGINX server -CMD ["nginx", "-g", "daemon off;"] diff --git a/docker-compose.yml b/docker-compose.yml index 1019d486..2f4b07b6 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -1,67 +1,40 @@ version: '3.8' + services: - # Frontend - frontend: + app: build: context: . - dockerfile: ./Dockerfile.frontend + dockerfile: Dockerfile + env_file: .env ports: - - "3000:3000" # Map host port 3000 to container port 3000 + - "5173:80" + - "8080:8080" depends_on: - - backend - networks: - - app-network - - # Backend - backend: - build: - context: . - dockerfile: ./Dockerfile.backend - ports: - - "8080:8080" # Map host port 8080 to container port 8080 - environment: - POSTGRES_HOST: postgres - POSTGRES_DB: mydb - POSTGRES_USER: myuser - POSTGRES_PASSWORD: mypassword - MINIO_ENDPOINT: minio - MINIO_PORT: 9000 - depends_on: - - postgres + - db - minio - networks: - - app-network - # Postgres Database - postgres: - image: postgres:15 + db: + image: postgres:13 environment: - POSTGRES_DB: mydb - POSTGRES_USER: myuser - POSTGRES_PASSWORD: mypassword + POSTGRES_DB: ${DB_NAME} + POSTGRES_USER: ${DB_USER} + POSTGRES_PASSWORD: ${DB_PASSWORD} + ports: + - "5432:5432" volumes: - postgres_data:/var/lib/postgresql/data - networks: - - app-network - # MinIO for Storage minio: image: minio/minio + environment: + MINIO_ROOT_USER: ${MINIO_ACCESS_KEY} + MINIO_ROOT_PASSWORD: ${MINIO_SECRET_KEY} command: server /data ports: - "9000:9000" - environment: - MINIO_ROOT_USER: minioadmin - MINIO_ROOT_PASSWORD: minioadmin123 volumes: - minio_data:/data - networks: - - app-network volumes: postgres_data: - minio_data: - -networks: - app-network: - driver: bridge + minio_data: \ No newline at end of file From 6b7965ee2e70d8fb78a7131757c0f893f3a2cfce Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 30 Oct 2024 01:08:52 +0530 Subject: [PATCH 10/40] feat: create tsconfig.json for server --- server/tsconfig.json | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 server/tsconfig.json diff --git a/server/tsconfig.json b/server/tsconfig.json new file mode 100644 index 00000000..a3e18d5c --- /dev/null +++ b/server/tsconfig.json @@ -0,0 +1,15 @@ +{ + "compilerOptions": { + "target": "es2018", + "module": "commonjs", + "outDir": "./dist", + "rootDir": "./src", + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "forceConsistentCasingInFileNames": true, + "moduleResolution": "node" + }, + "include": ["src/**/*"], + "exclude": ["node_modules"] +} From 13c0944c46d1ddd4bf3d2376b2afaa556ce4a1ab Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 30 Oct 2024 02:03:44 +0530 Subject: [PATCH 11/40] feat: create tsconfig.json for server --- server/tsconfig.json | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/server/tsconfig.json b/server/tsconfig.json index a3e18d5c..820e903e 100644 --- a/server/tsconfig.json +++ b/server/tsconfig.json @@ -3,13 +3,30 @@ "target": "es2018", "module": "commonjs", "outDir": "./dist", - "rootDir": "./src", + "rootDir": "../", "strict": true, "esModuleInterop": true, "skipLibCheck": true, "forceConsistentCasingInFileNames": true, - "moduleResolution": "node" + "moduleResolution": "node", + "baseUrl": "../", + "paths": { + "*": ["*"], + "src/*": ["src/*"] + }, + "jsx": "react-jsx", + "lib": ["dom", "dom.iterable", "esnext"], + "allowJs": true }, - "include": ["src/**/*"], - "exclude": ["node_modules"] + "include": [ + "src/**/*", + "../src/shared/**/*", + "../src/helpers/**/*" + ], + "exclude": [ + "node_modules", + "../src/components/**/*", // Exclude frontend components + "../src/pages/**/*", // Exclude frontend pages + "../src/app/**/*" // Exclude other frontend-specific code + ] } From 1a55693a95fcda6650bc38d804cbd94e4b750d38 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 30 Oct 2024 02:04:37 +0530 Subject: [PATCH 12/40] fix: start script for server via dist --- package.json | 2 ++ 1 file changed, 2 insertions(+) diff --git a/package.json b/package.json index c4199152..343270b8 100644 --- a/package.json +++ b/package.json @@ -76,6 +76,8 @@ "server": "./node_modules/.bin/nodemon server/src/server.ts", "client": "vite", "build": "vite build", + "build:server": "tsc -p server/tsconfig.json", + "start:server": "node server/dist/server/src/server.js", "preview": "vite preview", "lint": "./node_modules/.bin/eslint ." }, From 728e3702212cbbd3f8a4e1dcd507f9a3a03ccb13 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 30 Oct 2024 02:04:53 +0530 Subject: [PATCH 13/40] chore: nginx config --- nginx.conf | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 nginx.conf diff --git a/nginx.conf b/nginx.conf new file mode 100644 index 00000000..1e4b4d17 --- /dev/null +++ b/nginx.conf @@ -0,0 +1,17 @@ +server { + listen 80; + + location / { + root /usr/share/nginx/html; + try_files $uri $uri/ /index.html; + } + + location /api { + proxy_pass http://localhost:8080; + proxy_http_version 1.1; + proxy_set_header Upgrade $http_upgrade; + proxy_set_header Connection 'upgrade'; + proxy_set_header Host $host; + proxy_cache_bypass $http_upgrade; + } +} \ No newline at end of file From 114b7706a3798e9ef3317c08a0db864269f53830 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 30 Oct 2024 02:06:07 +0530 Subject: [PATCH 14/40] feat: docker config --- Dockerfile | 76 +++++++++++++++++++++++++++++++++++++----------------- 1 file changed, 52 insertions(+), 24 deletions(-) diff --git a/Dockerfile b/Dockerfile index e6675017..d7abca37 100644 --- a/Dockerfile +++ b/Dockerfile @@ -4,39 +4,67 @@ WORKDIR /app # Copy shared package.json and install dependencies COPY package.json package-lock.json ./ +COPY maxun-core/package.json ./maxun-core/package.json RUN npm install -# --- Backend Stage --- -FROM base AS backend -WORKDIR /app/server - -# Copy backend code -COPY server/src ./src -COPY maxun-core ./maxun-core - -EXPOSE 8080 -CMD ["npm", "run", "start:server"] # Add an npm script for backend start in package.json - -# --- Frontend Stage --- -FROM base AS frontend +# --- Backend Build Stage --- +FROM base AS backend-build WORKDIR /app -# Copy frontend code, including root-level index.html +# Copy TypeScript configs +COPY tsconfig*.json ./ +COPY server/tsconfig.json ./server/ + +# Copy ALL source code (both frontend and backend) +COPY src ./src +# Copy backend code and maxun-core +COPY server/src ./server/src +COPY maxun-core ./maxun-core + +# Install TypeScript globally and build +RUN npm install -g typescript +RUN npm run build:server + +# --- Frontend Build Stage --- +FROM base AS frontend-build +WORKDIR /app + +# Copy frontend code and configs COPY src ./src COPY index.html ./index.html COPY public ./public COPY vite.config.js ./ +COPY tsconfig.json ./ -# Run the Vite build -RUN npm run build && \ -ls -la && \ -ls -la build # This will help us verify the build output +# Build frontend +RUN npm run build -# --- Final Stage: Nginx --- -FROM nginx:alpine -COPY --from=frontend /app/build /usr/share/nginx/html -COPY --from=frontend /app/public/img /usr/share/nginx/html/img +# --- Production Stage --- +FROM nginx:alpine AS production -EXPOSE 80 -CMD ["nginx", "-g", "daemon off;"] +# Install Node.js in the production image +RUN apk add --update nodejs npm + +# Copy nginx configuration +COPY nginx.conf /etc/nginx/conf.d/default.conf + +# Copy built frontend +COPY --from=frontend-build /app/build /usr/share/nginx/html +COPY --from=frontend-build /app/public/img /usr/share/nginx/html/img + +# Copy built backend and its dependencies +WORKDIR /app +COPY --from=backend-build /app/package*.json ./ +COPY --from=backend-build /app/server/dist ./server/dist +COPY --from=backend-build /app/maxun-core ./maxun-core +COPY --from=backend-build /app/node_modules ./node_modules + +# Copy start script +COPY docker-entrypoint.sh / +RUN chmod +x /docker-entrypoint.sh + +EXPOSE 80 8080 + +# Start both nginx and node server +ENTRYPOINT ["/docker-entrypoint.sh"] \ No newline at end of file From adaf13711d924652681f7457dd54b0bfd70f3658 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 30 Oct 2024 02:06:24 +0530 Subject: [PATCH 15/40] feat: docker config --- docker-entrypoint.sh | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 docker-entrypoint.sh diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh new file mode 100644 index 00000000..7e36eed4 --- /dev/null +++ b/docker-entrypoint.sh @@ -0,0 +1,7 @@ +#!/bin/sh + +# Start backend server +cd /app && npm run start:server & + +# Start nginx +nginx -g 'daemon off;' \ No newline at end of file From 3c16e024152821f3aa4411d58aea97dcc2f8ba22 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 30 Oct 2024 02:10:20 +0530 Subject: [PATCH 16/40] fix: console errors --- index.html | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/index.html b/index.html index 9a9db7a4..6f3b133b 100644 --- a/index.html +++ b/index.html @@ -9,7 +9,7 @@ content="Web site created using Vite" /> - + {{/* */}} Maxun | Open Source No Code Web Data Extraction Platform From d92e039755a21bee71c4d571acc67cfeafc0ee1c Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 30 Oct 2024 02:51:48 +0530 Subject: [PATCH 17/40] fix: remove {{}} --- index.html | 1 - 1 file changed, 1 deletion(-) diff --git a/index.html b/index.html index 6f3b133b..8a52962b 100644 --- a/index.html +++ b/index.html @@ -9,7 +9,6 @@ content="Web site created using Vite" /> - {{/* */}} Maxun | Open Source No Code Web Data Extraction Platform From a9cd7c1b46d752a4287e377309417970c6b82d56 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 30 Oct 2024 03:21:45 +0530 Subject: [PATCH 18/40] feat: format run response --- server/src/api/record.ts | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/server/src/api/record.ts b/server/src/api/record.ts index e4dca279..07aa26ae 100644 --- a/server/src/api/record.ts +++ b/server/src/api/record.ts @@ -303,7 +303,7 @@ router.get("/robots/:id/runs", requireAPIKey, async (req: Request, res: Response messageCode: "success", runs: { totalCount: runs.length, - items: runs, + items: formatRunResponse(runs), }, }; @@ -319,6 +319,32 @@ router.get("/robots/:id/runs", requireAPIKey, async (req: Request, res: Response } ); +function formatRunResponse(run: any) { + const formattedRun = { + id: run.id, + status: run.status, + name: run.name, + robotId: run.robotMetaId, // Renaming robotMetaId to robotId + startedAt: run.startedAt, + finishedAt: run.finishedAt, + runId: run.runId, + runByUserId: run.runByUserId, + runByScheduleId: run.runByScheduleId, + runByAPI: run.runByAPI, + data: {}, + screenshot: null, + }; + + if (run.serializableOutput && run.serializableOutput['item-0']) { + formattedRun.data = run.serializableOutput['item-0']; + } else if (run.binaryOutput && run.binaryOutput['item-0']) { + formattedRun.screenshot = run.binaryOutput['item-0']; + } + + return formattedRun; +} + + /** * @swagger * /api/robots/{id}/runs/{runId}: @@ -393,7 +419,7 @@ router.get("/robots/:id/runs/:runId", requireAPIKey, async (req: Request, res: R const response = { statusCode: 200, messageCode: "success", - run: run, + run: formatRunResponse(run), }; res.status(200).json(response); @@ -754,7 +780,7 @@ router.post("/robots/:id/runs", requireAPIKey, async (req: AuthenticatedRequest, const response = { statusCode: 200, messageCode: "success", - run: completedRun, + run: formatRunResponse(completedRun), }; res.status(200).json(response); From 6e62ae536bf7fc5cfc7f5e9cbc3907e16431b1cf Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 30 Oct 2024 03:52:15 +0530 Subject: [PATCH 19/40] fix: map through runs --- server/src/api/record.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/server/src/api/record.ts b/server/src/api/record.ts index 07aa26ae..7710f075 100644 --- a/server/src/api/record.ts +++ b/server/src/api/record.ts @@ -298,12 +298,14 @@ router.get("/robots/:id/runs", requireAPIKey, async (req: Request, res: Response raw: true }); + const formattedRuns = runs.map(formatRunResponse); + const response = { statusCode: 200, messageCode: "success", runs: { - totalCount: runs.length, - items: formatRunResponse(runs), + totalCount: formattedRuns.length, + items: formattedRuns, }, }; From 0d4680d328f8e9e5d9113d1e5cea015034e0f5ad Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 30 Oct 2024 03:58:02 +0530 Subject: [PATCH 20/40] feat: create minio bucket with policy --- server/src/storage/mino.ts | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/server/src/storage/mino.ts b/server/src/storage/mino.ts index 96e3d0c2..ed0ec18c 100644 --- a/server/src/storage/mino.ts +++ b/server/src/storage/mino.ts @@ -21,6 +21,38 @@ minioClient.bucketExists('maxun-test') console.error('Error connecting to MinIO:', err); }) +async function createBucketWithPolicy(bucketName: string, policy?: 'public-read' | 'private') { + try { + const bucketExists = await minioClient.bucketExists(bucketName); + if (!bucketExists) { + await minioClient.makeBucket(bucketName); + console.log(`Bucket ${bucketName} created successfully.`); + + if (policy === 'public-read') { + // Define a public-read policy + const policyJSON = { + Version: "2012-10-17", + Statement: [ + { + Effect: "Allow", + Principal: "*", + Action: ["s3:GetObject"], + Resource: [`arn:aws:s3:::${bucketName}/*`] + } + ] + }; + await minioClient.setBucketPolicy(bucketName, JSON.stringify(policyJSON)); + console.log(`Public-read policy applied to bucket ${bucketName}.`); + } + } else { + console.log(`Bucket ${bucketName} already exists.`); + } + } catch (error) { + console.error('Error in bucket creation or policy application:', error); + } +} + + class BinaryOutputService { private bucketName: string; From 239412bc09f73ed0399b28c84a87c7937d6958b4 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 30 Oct 2024 04:02:59 +0530 Subject: [PATCH 21/40] feat: check if maxun-run-screenshots bucket is created --- server/src/storage/mino.ts | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/server/src/storage/mino.ts b/server/src/storage/mino.ts index ed0ec18c..3b83e386 100644 --- a/server/src/storage/mino.ts +++ b/server/src/storage/mino.ts @@ -35,7 +35,7 @@ async function createBucketWithPolicy(bucketName: string, policy?: 'public-read' Statement: [ { Effect: "Allow", - Principal: "*", + Principal: "", Action: ["s3:GetObject"], Resource: [`arn:aws:s3:::${bucketName}/*`] } @@ -130,6 +130,7 @@ class BinaryOutputService { } async uploadBinaryOutputToMinioBucket(run: Run, key: string, data: Buffer): Promise { + await createBucketWithPolicy('maxun-run-screenshots', 'public-read'); const bucketName = 'maxun-run-screenshots'; try { console.log(`Uploading to bucket ${bucketName} with key ${key}`); From fc29b1a86cdeced82f25d0da3c8ed12b497678ee Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 30 Oct 2024 04:06:29 +0530 Subject: [PATCH 22/40] feat: get redis port & host from env --- server/src/worker.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/server/src/worker.ts b/server/src/worker.ts index 7bbf52af..857b1000 100644 --- a/server/src/worker.ts +++ b/server/src/worker.ts @@ -6,8 +6,8 @@ import Robot from './models/Robot'; import { computeNextRun } from './utils/schedule'; const connection = new IORedis({ - host: 'localhost', - port: 6379, + host: process.env.REDIS_HOST || 'localhost', + port: process.env.REDIS_PORT ? parseInt(process.env.REDIS_PORT, 10) : 6379, maxRetriesPerRequest: null, }); From 670869e0efbb01f40041ae9ce46e59a7a5b80e66 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 30 Oct 2024 05:47:36 +0530 Subject: [PATCH 23/40] chore: add gitignore --- server/.gitignore | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 server/.gitignore diff --git a/server/.gitignore b/server/.gitignore new file mode 100644 index 00000000..0d78c503 --- /dev/null +++ b/server/.gitignore @@ -0,0 +1,20 @@ +# dependencies +/node_modules + +# misc +.DS_Store +.env.local +.env.development.local +.env.test.local +.env.production.local +.env + +/.idea + +/server/logs + +/build + +/dist + +package-lock.json \ No newline at end of file From 06266500958d133b3414cdaf6a3cc00a91ef58ce Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 30 Oct 2024 05:47:58 +0530 Subject: [PATCH 24/40] feat: redis config --- docker-compose.yml | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 2f4b07b6..6ee8a398 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -5,6 +5,7 @@ services: build: context: . dockerfile: Dockerfile + target: production env_file: .env ports: - "5173:80" @@ -12,9 +13,10 @@ services: depends_on: - db - minio + - redis db: - image: postgres:13 + image: postgres:15 environment: POSTGRES_DB: ${DB_NAME} POSTGRES_USER: ${DB_USER} @@ -35,6 +37,17 @@ services: volumes: - minio_data:/data + redis: + image: redis:6 + environment: + - REDIS_HOST=redis + - REDIS_PORT=6379 + ports: + - "6379:6379" + volumes: + - redis_data:/data + volumes: postgres_data: - minio_data: \ No newline at end of file + minio_data: + redis_data: From 8a21a7ac101e5c2a76a72a5ccf75bda94583bb4a Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 30 Oct 2024 06:31:00 +0530 Subject: [PATCH 25/40] feat: redis config --- server/src/worker.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/server/src/worker.ts b/server/src/worker.ts index 857b1000..00bb13d2 100644 --- a/server/src/worker.ts +++ b/server/src/worker.ts @@ -6,7 +6,7 @@ import Robot from './models/Robot'; import { computeNextRun } from './utils/schedule'; const connection = new IORedis({ - host: process.env.REDIS_HOST || 'localhost', + host: process.env.REDIS_HOST ? process.env.REDIS_HOST : 'redis', port: process.env.REDIS_PORT ? parseInt(process.env.REDIS_PORT, 10) : 6379, maxRetriesPerRequest: null, }); From edc0c3247779605a9d2006f29fd4205a472c04c3 Mon Sep 17 00:00:00 2001 From: Karishma Shukla Date: Wed, 30 Oct 2024 08:35:10 +0530 Subject: [PATCH 26/40] chore: readme --- README.md | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 00000000..c82070b6 --- /dev/null +++ b/README.md @@ -0,0 +1,59 @@ +

+ + Open-Source No-Code Web Data Extraction Platform
+

+ +

+Maxun lets you train a robot in 2 minutes and scrape the web on auto-pilot. Web data extraction doesn't get easier than this! +

+ + +

+ Discord • + Twitter • +

+ +// add demo video here + + + + +# Join Our Community + +

+ Discord • + Twitter • +

+ +# Installation + +# Features + +# Cloud + +# Contributing + +Please refer to [Contribution Guide](https://github.com/amhsirak/maxun/blob/master/.github/CONTRIBUTING.md). + +# + + +# License + +

+This project is licensed under AGPLv3. +

+ +# Contributors + +Thank you to the combined efforts of everyone who contributes! + + + + From b70e76d993186211bc8b18e3c15a2e0ed17164d8 Mon Sep 17 00:00:00 2001 From: Karishma Shukla Date: Wed, 30 Oct 2024 09:06:36 +0530 Subject: [PATCH 27/40] feat: robot actions and features --- README.md | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) diff --git a/README.md b/README.md index c82070b6..52027eba 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -

+

Open-Source No-Code Web Data Extraction Platform
-

+

Maxun lets you train a robot in 2 minutes and scrape the web on auto-pilot. Web data extraction doesn't get easier than this! @@ -19,8 +19,7 @@ Maxun lets you train a robot in 2 minutes and scrape the web on auto-pilot. Web Twitter

-// add demo video here - +![maxun_demo](https://github.com/user-attachments/assets/a61ba670-e56a-4ae1-9681-0b4bd6ba9cdc) @@ -33,17 +32,38 @@ Maxun lets you train a robot in 2 minutes and scrape the web on auto-pilot. Web # Installation +# How Does It Work? +Maxun lets you create custom robots which emulate user actions and extract data. A robot can perform any of the actions: Capture List, Capture Text or Capture Screenshot. Once a robot is created, it will keep extracting data for you without manual intervention + +![Screenshot 2024-10-23 222138](https://github.com/user-attachments/assets/53573c98-769e-490d-829e-ada9fac0764f) + +### 1. Robot Actions +1. Capture List: Useful to extract structured and bulk items from the website. Example: Scrape products from Amazon etc. +2. Capture Text: Useful to extract individual text content from the website. +3. Capture Screenshot: Get fullpage or visible section screenshots of the website. + +### 2. BYOP +BYOP (Bring Your Own Proxy) lets you connect external proxies to bypass anti-bot protection. Currently, the proxies are per user. Soon you'll be able to configure proxy per robot. + + # Features +- Extract Data With No-Code +- Handle Pagination & Scrolling +- Run Robots On A Specific Schedule +- Convert Websites to APIs +- Convert Websites to Spreadsheets +- Adapt To Website Layout Changes (coming soon) +- Extract Behind Login, With Two-Factor Authentication Support (coming soon) +- Integrations (currently Google Sheet) +- +++ A lot of amazing things soon! # Cloud +We offer a managed cloud version to run Maxun without having to manage the infrastructure and extract data at scale. Maxun cloud also deals with anti-bot detection, huge proxy network, and CAPTCHA solving. If this interests you, [join the cloud waitlist](https://docs.google.com/forms/d/e/1FAIpQLSdbD2uhqC4sbg4eLZ9qrFbyrfkXZ2XsI6dQ0USRCQNZNn5pzg/viewform) as we launch soon. # Contributing Please refer to [Contribution Guide](https://github.com/amhsirak/maxun/blob/master/.github/CONTRIBUTING.md). -# - - # License

From 7f17a834e99e8a00172f0a61c3f62169555aedb6 Mon Sep 17 00:00:00 2001 From: Karishma Shukla Date: Wed, 30 Oct 2024 09:24:01 +0530 Subject: [PATCH 28/40] chore: features --- README.md | 31 +++++++++++-------------------- 1 file changed, 11 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 52027eba..6f493030 100644 --- a/README.md +++ b/README.md @@ -15,21 +15,15 @@ Maxun lets you train a robot in 2 minutes and scrape the web on auto-pilot. Web

+ WebsiteDiscord • - Twitter • + Twitter

![maxun_demo](https://github.com/user-attachments/assets/a61ba670-e56a-4ae1-9681-0b4bd6ba9cdc) -# Join Our Community - -

- Discord • - Twitter • -

- # Installation # How Does It Work? @@ -47,31 +41,28 @@ BYOP (Bring Your Own Proxy) lets you connect external proxies to bypass anti-bot # Features -- Extract Data With No-Code -- Handle Pagination & Scrolling -- Run Robots On A Specific Schedule -- Convert Websites to APIs -- Convert Websites to Spreadsheets -- Adapt To Website Layout Changes (coming soon) -- Extract Behind Login, With Two-Factor Authentication Support (coming soon) -- Integrations (currently Google Sheet) +- ✨ Extract Data With No-Code +- ✨ Handle Pagination & Scrolling +- ✨ Run Robots On A Specific Schedule +- ✨ Turn Websites to APIs +- ✨ Turn Websites to Spreadsheets +- ✨ Adapt To Website Layout Changes (coming soon) +- ✨ Extract Behind Login, With Two-Factor Authentication Support (coming soon) +- ✨ Integrations (currently Google Sheet) - +++ A lot of amazing things soon! # Cloud -We offer a managed cloud version to run Maxun without having to manage the infrastructure and extract data at scale. Maxun cloud also deals with anti-bot detection, huge proxy network, and CAPTCHA solving. If this interests you, [join the cloud waitlist](https://docs.google.com/forms/d/e/1FAIpQLSdbD2uhqC4sbg4eLZ9qrFbyrfkXZ2XsI6dQ0USRCQNZNn5pzg/viewform) as we launch soon. +We offer a managed cloud version to run Maxun without having to manage the infrastructure and extract data at scale. Maxun cloud also deals with anti-bot detection, huge proxy network with automatic proxy rotation, and CAPTCHA solving. If this interests you, [join the cloud waitlist](https://docs.google.com/forms/d/e/1FAIpQLSdbD2uhqC4sbg4eLZ9qrFbyrfkXZ2XsI6dQ0USRCQNZNn5pzg/viewform) as we launch soon. # Contributing - Please refer to [Contribution Guide](https://github.com/amhsirak/maxun/blob/master/.github/CONTRIBUTING.md). # License -

This project is licensed under AGPLv3.

# Contributors - Thank you to the combined efforts of everyone who contributes! From 836dafa9915cf4777dc41ffe2604bbde18d42ab5 Mon Sep 17 00:00:00 2001 From: Karishma Shukla Date: Wed, 30 Oct 2024 09:25:59 +0530 Subject: [PATCH 29/40] chore: note --- README.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/README.md b/README.md index 6f493030..30e89739 100644 --- a/README.md +++ b/README.md @@ -54,6 +54,9 @@ BYOP (Bring Your Own Proxy) lets you connect external proxies to bypass anti-bot # Cloud We offer a managed cloud version to run Maxun without having to manage the infrastructure and extract data at scale. Maxun cloud also deals with anti-bot detection, huge proxy network with automatic proxy rotation, and CAPTCHA solving. If this interests you, [join the cloud waitlist](https://docs.google.com/forms/d/e/1FAIpQLSdbD2uhqC4sbg4eLZ9qrFbyrfkXZ2XsI6dQ0USRCQNZNn5pzg/viewform) as we launch soon. +# Note +This project is in early stages of development. We're actively working to improve the product. + # Contributing Please refer to [Contribution Guide](https://github.com/amhsirak/maxun/blob/master/.github/CONTRIBUTING.md). From dd1fb6a13c564bc0220855316c922a8e52187b49 Mon Sep 17 00:00:00 2001 From: Karishma Shukla Date: Wed, 30 Oct 2024 10:35:31 +0530 Subject: [PATCH 30/40] wip: env variables --- README.md | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/README.md b/README.md index 30e89739..226ebfe2 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,30 @@ Maxun lets you train a robot in 2 minutes and scrape the web on auto-pilot. Web # Installation +# Envirnoment Variables +| Variable | Mandatory | Description | If Not Set | +|--------------|-----------|----------------------------|----------------------------- | +| `NODE_ENV` | Yes | Sets whether you are running the app locally or in production. | | +| `JWT_SECRET` | Yes | JWT secret is utilized to generate authentication tokens. | | +| `DB_NAME` | Yes | Brief description here. | Describe what happens here. | +| `DB_USER` | Yes | Brief description here. | Describe what happens here. | +| `DB_PASSWORD` | Yes | Brief description here. | Describe what happens here. | +| `DB_NAME` | Yes | Brief description here. | Describe what happens here. | +| `DB_USER` | Yes | Brief description here. | Describe what happens here. | +| `DB_HOST` | Yes | Sets whether you are running the app locally or in production. | | +| `DB_PORT` | Yes | JWT secret is utilized to generate authentication tokens. | | +| `ENCRYPTION_KEY` | Yes | Brief description here. | Describe what happens here. | +| `MINIO_ENDPOINT` | Yes | Brief description here. | Describe what happens here. | +| `MINIO_PORT` | Yes | Brief description here. | Describe what happens here. | +| `MINIO_ACCESS_KEY` | Yes | Brief description here. | Describe what happens here. | +| `GOOGLE_CLIENT_ID` | Yes | Brief description here. | Describe what happens here. | +| `GOOGLE_CLIENT_SECRET` | Yes | Brief description here. | Describe what happens here. | +| `GOOGLE_REDIRECT_URI` | Yes | Brief description here. | Describe what happens here. | +| `REDIS_HOST` | Yes | Brief description here. | Describe what happens here. | +| `REDIS_PORT` | Yes | Brief description here. | Describe what happens here. | +| `MAXUN_TELEMETRY` | No | Brief description here. | Describe what happens here. | + + # How Does It Work? Maxun lets you create custom robots which emulate user actions and extract data. A robot can perform any of the actions: Capture List, Capture Text or Capture Screenshot. Once a robot is created, it will keep extracting data for you without manual intervention From e8389f44515110525abd5da660f4da12ae2f46b5 Mon Sep 17 00:00:00 2001 From: Karishma Shukla Date: Wed, 30 Oct 2024 10:42:30 +0530 Subject: [PATCH 31/40] chore: env variables --- README.md | 41 ++++++++++++++++++++--------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 226ebfe2..ce35a6fc 100644 --- a/README.md +++ b/README.md @@ -27,27 +27,26 @@ Maxun lets you train a robot in 2 minutes and scrape the web on auto-pilot. Web # Installation # Envirnoment Variables -| Variable | Mandatory | Description | If Not Set | -|--------------|-----------|----------------------------|----------------------------- | -| `NODE_ENV` | Yes | Sets whether you are running the app locally or in production. | | -| `JWT_SECRET` | Yes | JWT secret is utilized to generate authentication tokens. | | -| `DB_NAME` | Yes | Brief description here. | Describe what happens here. | -| `DB_USER` | Yes | Brief description here. | Describe what happens here. | -| `DB_PASSWORD` | Yes | Brief description here. | Describe what happens here. | -| `DB_NAME` | Yes | Brief description here. | Describe what happens here. | -| `DB_USER` | Yes | Brief description here. | Describe what happens here. | -| `DB_HOST` | Yes | Sets whether you are running the app locally or in production. | | -| `DB_PORT` | Yes | JWT secret is utilized to generate authentication tokens. | | -| `ENCRYPTION_KEY` | Yes | Brief description here. | Describe what happens here. | -| `MINIO_ENDPOINT` | Yes | Brief description here. | Describe what happens here. | -| `MINIO_PORT` | Yes | Brief description here. | Describe what happens here. | -| `MINIO_ACCESS_KEY` | Yes | Brief description here. | Describe what happens here. | -| `GOOGLE_CLIENT_ID` | Yes | Brief description here. | Describe what happens here. | -| `GOOGLE_CLIENT_SECRET` | Yes | Brief description here. | Describe what happens here. | -| `GOOGLE_REDIRECT_URI` | Yes | Brief description here. | Describe what happens here. | -| `REDIS_HOST` | Yes | Brief description here. | Describe what happens here. | -| `REDIS_PORT` | Yes | Brief description here. | Describe what happens here. | -| `MAXUN_TELEMETRY` | No | Brief description here. | Describe what happens here. | +| Variable | Mandatory | Description | If Not Set | +|-----------------------|-----------|----------------------------------------------------------------------------------------------|--------------------------------------------------------------| +| `NODE_ENV` | Yes | Defines the app environment (`development`, `production`). | Defaults to `development`; app may not behave as expected. | +| `JWT_SECRET` | Yes | Secret key used to sign and verify JSON Web Tokens (JWTs) for authentication. | JWT authentication will not work. | +| `DB_NAME` | Yes | Name of the Postgres database to connect to. | Database connection will fail. | +| `DB_USER` | Yes | Username for Postgres database authentication. | Database connection will fail. | +| `DB_PASSWORD` | Yes | Password for Postgres database authentication. | Database connection will fail. | +| `DB_HOST` | Yes | Host address where the Postgres database server is running. | Database connection will fail. | +| `DB_PORT` | Yes | Port number used to connect to the Postgres database server. | Database connection will fail. | +| `ENCRYPTION_KEY` | Yes | Key used for encrypting sensitive data (proxies, passwords). | Encryption functionality will not work. | +| `MINIO_ENDPOINT` | Yes | Endpoint URL for MinIO, to store robot run screenshots. | Connection to MinIO storage will fail. | +| `MINIO_PORT` | Yes | Port number for MinIO service. | Connection to MinIO storage will fail. | +| `MINIO_ACCESS_KEY` | Yes | Access key for authenticating with MinIO. | MinIO authentication will fail. | +| `GOOGLE_CLIENT_ID` | No | Client ID for Google OAuth, used in authentication. | Google login will not work. | +| `GOOGLE_CLIENT_SECRET`| No | Client Secret for Google OAuth. | Google login will not work. | +| `GOOGLE_REDIRECT_URI` | No | Redirect URI for handling Google OAuth responses. | Google login will not work. | +| `REDIS_HOST` | Yes | Host address of the Redis server for caching. | Redis connection will fail, affecting performance. | +| `REDIS_PORT` | Yes | Port number for the Redis server. | Redis connection will fail, affecting performance. | +| `MAXUN_TELEMETRY` | No | Disables telemetry to stop sending anonymous usage data. Keeping it enabled helps us understand how the product is used and assess the impact of any new changes. Please keep it enabled. | Telemetry data will not be collected. | + # How Does It Work? From 647c25f687458ad793dbe2116fc458a356df1c46 Mon Sep 17 00:00:00 2001 From: Karishma Shukla Date: Wed, 30 Oct 2024 10:45:02 +0530 Subject: [PATCH 32/40] chore: env description --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index ce35a6fc..a07d4c51 100644 --- a/README.md +++ b/README.md @@ -37,14 +37,14 @@ Maxun lets you train a robot in 2 minutes and scrape the web on auto-pilot. Web | `DB_HOST` | Yes | Host address where the Postgres database server is running. | Database connection will fail. | | `DB_PORT` | Yes | Port number used to connect to the Postgres database server. | Database connection will fail. | | `ENCRYPTION_KEY` | Yes | Key used for encrypting sensitive data (proxies, passwords). | Encryption functionality will not work. | -| `MINIO_ENDPOINT` | Yes | Endpoint URL for MinIO, to store robot run screenshots. | Connection to MinIO storage will fail. | +| `MINIO_ENDPOINT` | Yes | Endpoint URL for MinIO, to store Robot Run Screenshots. | Connection to MinIO storage will fail. | | `MINIO_PORT` | Yes | Port number for MinIO service. | Connection to MinIO storage will fail. | | `MINIO_ACCESS_KEY` | Yes | Access key for authenticating with MinIO. | MinIO authentication will fail. | -| `GOOGLE_CLIENT_ID` | No | Client ID for Google OAuth, used in authentication. | Google login will not work. | +| `GOOGLE_CLIENT_ID` | No | Client ID for Google OAuth, used for Google Sheet integration authentication. | Google login will not work. | | `GOOGLE_CLIENT_SECRET`| No | Client Secret for Google OAuth. | Google login will not work. | | `GOOGLE_REDIRECT_URI` | No | Redirect URI for handling Google OAuth responses. | Google login will not work. | -| `REDIS_HOST` | Yes | Host address of the Redis server for caching. | Redis connection will fail, affecting performance. | -| `REDIS_PORT` | Yes | Port number for the Redis server. | Redis connection will fail, affecting performance. | +| `REDIS_HOST` | Yes | Host address of the Redis server, used by BullMQ for scheduling robots. | Redis connection will fail. | +| `REDIS_PORT` | Yes | Port number for the Redis server. | Redis connection will fail. | | `MAXUN_TELEMETRY` | No | Disables telemetry to stop sending anonymous usage data. Keeping it enabled helps us understand how the product is used and assess the impact of any new changes. Please keep it enabled. | Telemetry data will not be collected. | From 4e8db3be92ecfc80697a71d37aa78755162dc0ae Mon Sep 17 00:00:00 2001 From: Karishma Shukla Date: Wed, 30 Oct 2024 10:56:10 +0530 Subject: [PATCH 33/40] chore: node env --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index a07d4c51..b8dbd9e6 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ Maxun lets you train a robot in 2 minutes and scrape the web on auto-pilot. Web # Envirnoment Variables | Variable | Mandatory | Description | If Not Set | |-----------------------|-----------|----------------------------------------------------------------------------------------------|--------------------------------------------------------------| -| `NODE_ENV` | Yes | Defines the app environment (`development`, `production`). | Defaults to `development`; app may not behave as expected. | +| `NODE_ENV` | Yes | Defines the app environment (`development`, `production`). | Defaults to `development`. | | `JWT_SECRET` | Yes | Secret key used to sign and verify JSON Web Tokens (JWTs) for authentication. | JWT authentication will not work. | | `DB_NAME` | Yes | Name of the Postgres database to connect to. | Database connection will fail. | | `DB_USER` | Yes | Username for Postgres database authentication. | Database connection will fail. | From a01ca42353ed9748fc534a29c4d9f7c0b6d2f9b6 Mon Sep 17 00:00:00 2001 From: Karishma Shukla Date: Wed, 30 Oct 2024 11:02:56 +0530 Subject: [PATCH 34/40] chore: local setup --- README.md | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/README.md b/README.md index b8dbd9e6..f624a107 100644 --- a/README.md +++ b/README.md @@ -25,6 +25,29 @@ Maxun lets you train a robot in 2 minutes and scrape the web on auto-pilot. Web # Installation +### Docker + +### Local Setup +1. Ensure you have Node.js, PostgreSQL, MinIO and Redis installed on your system. +2. Run the commands below: +``` +git clone https://github.com/getmaxun/maxun + +# change directory to the project root +cd maxun + +# install dependencies +npm install + +# change directory to maxun-core to install dependencies +cd maxun-core +npm install + +# start frontend and backend together +npm run start +``` +You can access the frontend at http://localhost:5173/ and backend at http://localhost:8080/ + # Envirnoment Variables | Variable | Mandatory | Description | If Not Set | From 053f5cb3f60b443f94d5a4c6236440d9b17a8557 Mon Sep 17 00:00:00 2001 From: Karishma Shukla Date: Wed, 30 Oct 2024 11:10:30 +0530 Subject: [PATCH 35/40] chore: feedback form --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index f624a107..c1ca8f90 100644 --- a/README.md +++ b/README.md @@ -77,12 +77,12 @@ Maxun lets you create custom robots which emulate user actions and extract data. ![Screenshot 2024-10-23 222138](https://github.com/user-attachments/assets/53573c98-769e-490d-829e-ada9fac0764f) -### 1. Robot Actions +## 1. Robot Actions 1. Capture List: Useful to extract structured and bulk items from the website. Example: Scrape products from Amazon etc. 2. Capture Text: Useful to extract individual text content from the website. 3. Capture Screenshot: Get fullpage or visible section screenshots of the website. -### 2. BYOP +## 2. BYOP BYOP (Bring Your Own Proxy) lets you connect external proxies to bypass anti-bot protection. Currently, the proxies are per user. Soon you'll be able to configure proxy per robot. @@ -101,7 +101,7 @@ BYOP (Bring Your Own Proxy) lets you connect external proxies to bypass anti-bot We offer a managed cloud version to run Maxun without having to manage the infrastructure and extract data at scale. Maxun cloud also deals with anti-bot detection, huge proxy network with automatic proxy rotation, and CAPTCHA solving. If this interests you, [join the cloud waitlist](https://docs.google.com/forms/d/e/1FAIpQLSdbD2uhqC4sbg4eLZ9qrFbyrfkXZ2XsI6dQ0USRCQNZNn5pzg/viewform) as we launch soon. # Note -This project is in early stages of development. We're actively working to improve the product. +This project is in early stages of development. Your feedback is very important for us - we're actively working to improve the product. Drop anonymous feedback here. # Contributing Please refer to [Contribution Guide](https://github.com/amhsirak/maxun/blob/master/.github/CONTRIBUTING.md). From 86aec48448367f7bf13adbd8a1d68a109c19f548 Mon Sep 17 00:00:00 2001 From: Karishma Shukla Date: Wed, 30 Oct 2024 11:58:56 +0530 Subject: [PATCH 36/40] chore: wip docker --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index c1ca8f90..e71db720 100644 --- a/README.md +++ b/README.md @@ -26,6 +26,7 @@ Maxun lets you train a robot in 2 minutes and scrape the web on auto-pilot. Web # Installation ### Docker +⚠️ Work In Progress. ### Local Setup 1. Ensure you have Node.js, PostgreSQL, MinIO and Redis installed on your system. From 76767f602d21086a0c8f9b49acab731589abe330 Mon Sep 17 00:00:00 2001 From: karishmas6 Date: Wed, 30 Oct 2024 12:04:38 +0530 Subject: [PATCH 37/40] chore: v0.0.1 --- docker-compose.yml | 2 +- nginx.conf | 2 +- server/src/constants/config.ts | 2 +- server/src/server.ts | 9 +++++++-- server/src/storage/db.ts | 2 +- server/src/utils/auth.ts | 4 ++-- server/src/worker.ts | 7 ++++++- 7 files changed, 19 insertions(+), 9 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 6ee8a398..f36e8900 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -16,7 +16,7 @@ services: - redis db: - image: postgres:15 + image: postgres:13 environment: POSTGRES_DB: ${DB_NAME} POSTGRES_USER: ${DB_USER} diff --git a/nginx.conf b/nginx.conf index 1e4b4d17..e9d636f8 100644 --- a/nginx.conf +++ b/nginx.conf @@ -7,7 +7,7 @@ server { } location /api { - proxy_pass http://localhost:8080; + proxy_pass http://127.0.0.1:8080; proxy_http_version 1.1; proxy_set_header Upgrade $http_upgrade; proxy_set_header Connection 'upgrade'; diff --git a/server/src/constants/config.ts b/server/src/constants/config.ts index afc77031..74d9de4c 100644 --- a/server/src/constants/config.ts +++ b/server/src/constants/config.ts @@ -1,4 +1,4 @@ export const SERVER_PORT = process.env.SERVER_PORT ? Number(process.env.SERVER_PORT) : 8080 export const DEBUG = process.env.DEBUG === 'true' export const LOGS_PATH = process.env.LOGS_PATH ?? 'server/logs' -export const ANALYTICS_ID = process.env.ANALYTICS_ID ?? 'oss' \ No newline at end of file +export const ANALYTICS_ID = 'oss' \ No newline at end of file diff --git a/server/src/server.ts b/server/src/server.ts index dd824004..5c7fc898 100644 --- a/server/src/server.ts +++ b/server/src/server.ts @@ -62,8 +62,13 @@ readdirSync(path.join(__dirname, 'api')).forEach((r) => { } }); -const workerProcess = fork(path.resolve(__dirname, './worker.ts'), [], { - execArgv: ['--inspect=5859'], // Specify a different debug port for the worker +// Check if we're running in production or development +const isProduction = process.env.NODE_ENV === 'production'; +const workerPath = path.resolve(__dirname, isProduction ? './worker.js' : '/worker.ts'); + +// Fork the worker process +const workerProcess = fork(workerPath, [], { + execArgv: isProduction ? ['--inspect=8081'] : ['--inspect=5859'], }); workerProcess.on('message', (message) => { diff --git a/server/src/storage/db.ts b/server/src/storage/db.ts index 56c68d8b..6a23ef42 100644 --- a/server/src/storage/db.ts +++ b/server/src/storage/db.ts @@ -6,7 +6,7 @@ dotenv.config(); const sequelize = new Sequelize( `postgresql://${process.env.DB_USER}:${process.env.DB_PASSWORD}@${process.env.DB_HOST}:${process.env.DB_PORT}/${process.env.DB_NAME}`, { - host: 'localhost', + host: process.env.DB_HOST, dialect: 'postgres', logging: false, } diff --git a/server/src/utils/auth.ts b/server/src/utils/auth.ts index b1f6850f..e73a4237 100644 --- a/server/src/utils/auth.ts +++ b/server/src/utils/auth.ts @@ -24,9 +24,9 @@ export const comparePassword = (password: string, hash: string): Promise { - const ivLength = parseInt(getEnvVariable('IV_LENGTH'), 10); + const ivLength = 16; const iv = crypto.randomBytes(ivLength); - const algorithm = getEnvVariable('ALGORITHM'); + const algorithm = 'aes-256-cbc'; const key = Buffer.from(getEnvVariable('ENCRYPTION_KEY'), 'hex'); const cipher = crypto.createCipheriv(algorithm, key, iv); let encrypted = cipher.update(text, 'utf8', 'hex'); diff --git a/server/src/worker.ts b/server/src/worker.ts index 00bb13d2..fd3470d4 100644 --- a/server/src/worker.ts +++ b/server/src/worker.ts @@ -5,8 +5,13 @@ import { handleRunRecording } from "./workflow-management/scheduler"; import Robot from './models/Robot'; import { computeNextRun } from './utils/schedule'; +console.log('Environment variables:', { + REDIS_HOST: process.env.REDIS_HOST, + REDIS_PORT: process.env.REDIS_PORT, +}); + const connection = new IORedis({ - host: process.env.REDIS_HOST ? process.env.REDIS_HOST : 'redis', + host: process.env.REDIS_HOST, port: process.env.REDIS_PORT ? parseInt(process.env.REDIS_PORT, 10) : 6379, maxRetriesPerRequest: null, }); From 63b6bc83d0c71087073cf4620039520210bf422d Mon Sep 17 00:00:00 2001 From: Karishma Shukla Date: Wed, 30 Oct 2024 12:06:23 +0530 Subject: [PATCH 38/40] chore: docker wip --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index e71db720..c68d274b 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ Maxun lets you train a robot in 2 minutes and scrape the web on auto-pilot. Web # Installation ### Docker -⚠️ Work In Progress. +⚠️ Work In Progress. Will be available by EOD. ### Local Setup 1. Ensure you have Node.js, PostgreSQL, MinIO and Redis installed on your system. From f96b05f12a7b0dc6f34c145de799a69d80e96158 Mon Sep 17 00:00:00 2001 From: Karishma Shukla Date: Wed, 30 Oct 2024 12:17:29 +0530 Subject: [PATCH 39/40] chore: docs --- README.md | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index c68d274b..078f9438 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@

- +
Maxun @@ -15,9 +15,10 @@ Maxun lets you train a robot in 2 minutes and scrape the web on auto-pilot. Web

- Website • + WebsiteDiscord • - Twitter + Twitter • + Join Maxun Cloud

![maxun_demo](https://github.com/user-attachments/assets/a61ba670-e56a-4ae1-9681-0b4bd6ba9cdc) From 2cd92613821d7c0e24a7c403fc6e46baf084ee1e Mon Sep 17 00:00:00 2001 From: Karishma Shukla Date: Wed, 30 Oct 2024 12:19:29 +0530 Subject: [PATCH 40/40] chore: format --- README.md | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 078f9438..327a26e7 100644 --- a/README.md +++ b/README.md @@ -15,9 +15,9 @@ Maxun lets you train a robot in 2 minutes and scrape the web on auto-pilot. Web

- Website • - Discord • - Twitter • + Website | + Discord | + Twitter | Join Maxun Cloud

@@ -105,9 +105,6 @@ We offer a managed cloud version to run Maxun without having to manage the infra # Note This project is in early stages of development. Your feedback is very important for us - we're actively working to improve the product. Drop anonymous feedback here. -# Contributing -Please refer to [Contribution Guide](https://github.com/amhsirak/maxun/blob/master/.github/CONTRIBUTING.md). - # License

This project is licensed under AGPLv3. @@ -116,6 +113,6 @@ This project is licensed under AGPLv3. # Contributors Thank you to the combined efforts of everyone who contributes! - - + +