Merge pull request #88 from amhsirak/develop

chore: pre-release v.0.0.1
This commit is contained in:
Karishma Shukla
2024-10-30 12:19:49 +05:30
committed by GitHub
22 changed files with 387 additions and 114 deletions

70
Dockerfile Normal file
View File

@@ -0,0 +1,70 @@
# --- Base Stage ---
FROM node:18 AS base
WORKDIR /app
# Copy shared package.json and install dependencies
COPY package.json package-lock.json ./
COPY maxun-core/package.json ./maxun-core/package.json
RUN npm install
# --- Backend Build Stage ---
FROM base AS backend-build
WORKDIR /app
# Copy TypeScript configs
COPY tsconfig*.json ./
COPY server/tsconfig.json ./server/
# Copy ALL source code (both frontend and backend)
COPY src ./src
# Copy backend code and maxun-core
COPY server/src ./server/src
COPY maxun-core ./maxun-core
# Install TypeScript globally and build
RUN npm install -g typescript
RUN npm run build:server
# --- Frontend Build Stage ---
FROM base AS frontend-build
WORKDIR /app
# Copy frontend code and configs
COPY src ./src
COPY index.html ./index.html
COPY public ./public
COPY vite.config.js ./
COPY tsconfig.json ./
# Build frontend
RUN npm run build
# --- Production Stage ---
FROM nginx:alpine AS production
# Install Node.js in the production image
RUN apk add --update nodejs npm
# Copy nginx configuration
COPY nginx.conf /etc/nginx/conf.d/default.conf
# Copy built frontend
COPY --from=frontend-build /app/build /usr/share/nginx/html
COPY --from=frontend-build /app/public/img /usr/share/nginx/html/img
# Copy built backend and its dependencies
WORKDIR /app
COPY --from=backend-build /app/package*.json ./
COPY --from=backend-build /app/server/dist ./server/dist
COPY --from=backend-build /app/maxun-core ./maxun-core
COPY --from=backend-build /app/node_modules ./node_modules
# Copy start script
COPY docker-entrypoint.sh /
RUN chmod +x /docker-entrypoint.sh
EXPOSE 80 8080
# Start both nginx and node server
ENTRYPOINT ["/docker-entrypoint.sh"]

View File

@@ -1,23 +0,0 @@
# Use node image
FROM node:18-alpine
# Set working directory in the container to /app
WORKDIR /app
# Copy only the package.json and package-lock.json first for caching
COPY package.json package-lock.json ./
# Install dependencies
RUN npm install --production
# Copy the entire project (core and backend code)
COPY . .
# Set the working directory to the backend folder
WORKDIR /app/server
# Expose the port the backend listens on
EXPOSE 8080
# Start the backend server
CMD ["npm", "run", "start:server"]

View File

@@ -1,29 +0,0 @@
# Use node image for the build stage
FROM node:18-alpine AS build
# Set working directory in the container to /app
WORKDIR /app
# Copy only the package.json and package-lock.json first for caching
COPY package.json package-lock.json ./
# Install dependencies (legacy peer deps is needed for react highlight, we get rid of it soon)
RUN npm install --legacy-peer-deps
# Copy the entire project (including frontend code)
COPY . .
# Build the frontend
RUN npm run build
# Use NGINX for serving the built frontend in production
FROM nginx:stable-alpine
# Copy the build output from the previous stage
COPY --from=build /app/build /usr/share/nginx/html
# Expose the frontend port
EXPOSE 3000
# Start NGINX server
CMD ["nginx", "-g", "daemon off;"]

118
README.md Normal file
View File

@@ -0,0 +1,118 @@
<h1 align="center">
<div>
<a href="https://maxun-website.vercel.app/">
<img src="/public/img/maxunlogo.png" width="50" />
<br>
Maxun
</a>
</div>
Open-Source No-Code Web Data Extraction Platform <br>
</h1>
<p align="center">
Maxun lets you train a robot in 2 minutes and scrape the web on auto-pilot. Web data extraction doesn't get easier than this!
</p>
<p align="center">
<a href="https://maxun-website.vercel.app/"><b>Website</b></a> |
<a href="https://discord.gg/5RgZmkW"><b>Discord</b></a> |
<a href="https://x.com/maxun_io"><b>Twitter</b></a> |
<a href="https://docs.google.com/forms/d/e/1FAIpQLSdbD2uhqC4sbg4eLZ9qrFbyrfkXZ2XsI6dQ0USRCQNZNn5pzg/viewform"><b>Join Maxun Cloud</b></a>
</p>
![maxun_demo](https://github.com/user-attachments/assets/a61ba670-e56a-4ae1-9681-0b4bd6ba9cdc)
<img src="https://static.scarf.sh/a.png?x-pxid=c12a77cc-855e-4602-8a0f-614b2d0da56a" />
# Installation
### Docker
⚠️ Work In Progress. Will be available by EOD.
### Local Setup
1. Ensure you have Node.js, PostgreSQL, MinIO and Redis installed on your system.
2. Run the commands below:
```
git clone https://github.com/getmaxun/maxun
# change directory to the project root
cd maxun
# install dependencies
npm install
# change directory to maxun-core to install dependencies
cd maxun-core
npm install
# start frontend and backend together
npm run start
```
You can access the frontend at http://localhost:5173/ and backend at http://localhost:8080/
# Envirnoment Variables
| Variable | Mandatory | Description | If Not Set |
|-----------------------|-----------|----------------------------------------------------------------------------------------------|--------------------------------------------------------------|
| `NODE_ENV` | Yes | Defines the app environment (`development`, `production`). | Defaults to `development`. |
| `JWT_SECRET` | Yes | Secret key used to sign and verify JSON Web Tokens (JWTs) for authentication. | JWT authentication will not work. |
| `DB_NAME` | Yes | Name of the Postgres database to connect to. | Database connection will fail. |
| `DB_USER` | Yes | Username for Postgres database authentication. | Database connection will fail. |
| `DB_PASSWORD` | Yes | Password for Postgres database authentication. | Database connection will fail. |
| `DB_HOST` | Yes | Host address where the Postgres database server is running. | Database connection will fail. |
| `DB_PORT` | Yes | Port number used to connect to the Postgres database server. | Database connection will fail. |
| `ENCRYPTION_KEY` | Yes | Key used for encrypting sensitive data (proxies, passwords). | Encryption functionality will not work. |
| `MINIO_ENDPOINT` | Yes | Endpoint URL for MinIO, to store Robot Run Screenshots. | Connection to MinIO storage will fail. |
| `MINIO_PORT` | Yes | Port number for MinIO service. | Connection to MinIO storage will fail. |
| `MINIO_ACCESS_KEY` | Yes | Access key for authenticating with MinIO. | MinIO authentication will fail. |
| `GOOGLE_CLIENT_ID` | No | Client ID for Google OAuth, used for Google Sheet integration authentication. | Google login will not work. |
| `GOOGLE_CLIENT_SECRET`| No | Client Secret for Google OAuth. | Google login will not work. |
| `GOOGLE_REDIRECT_URI` | No | Redirect URI for handling Google OAuth responses. | Google login will not work. |
| `REDIS_HOST` | Yes | Host address of the Redis server, used by BullMQ for scheduling robots. | Redis connection will fail. |
| `REDIS_PORT` | Yes | Port number for the Redis server. | Redis connection will fail. |
| `MAXUN_TELEMETRY` | No | Disables telemetry to stop sending anonymous usage data. Keeping it enabled helps us understand how the product is used and assess the impact of any new changes. Please keep it enabled. | Telemetry data will not be collected. |
# How Does It Work?
Maxun lets you create custom robots which emulate user actions and extract data. A robot can perform any of the actions: <b>Capture List, Capture Text or Capture Screenshot. Once a robot is created, it will keep extracting data for you without manual intervention</b>
![Screenshot 2024-10-23 222138](https://github.com/user-attachments/assets/53573c98-769e-490d-829e-ada9fac0764f)
## 1. Robot Actions
1. Capture List: Useful to extract structured and bulk items from the website. Example: Scrape products from Amazon etc.
2. Capture Text: Useful to extract individual text content from the website.
3. Capture Screenshot: Get fullpage or visible section screenshots of the website.
## 2. BYOP
BYOP (Bring Your Own Proxy) lets you connect external proxies to bypass anti-bot protection. Currently, the proxies are per user. Soon you'll be able to configure proxy per robot.
# Features
- ✨ Extract Data With No-Code
- ✨ Handle Pagination & Scrolling
- ✨ Run Robots On A Specific Schedule
- ✨ Turn Websites to APIs
- ✨ Turn Websites to Spreadsheets
- ✨ Adapt To Website Layout Changes (coming soon)
- ✨ Extract Behind Login, With Two-Factor Authentication Support (coming soon)
- ✨ Integrations (currently Google Sheet)
- +++ A lot of amazing things soon!
# Cloud
We offer a managed cloud version to run Maxun without having to manage the infrastructure and extract data at scale. Maxun cloud also deals with anti-bot detection, huge proxy network with automatic proxy rotation, and CAPTCHA solving. If this interests you, [join the cloud waitlist](https://docs.google.com/forms/d/e/1FAIpQLSdbD2uhqC4sbg4eLZ9qrFbyrfkXZ2XsI6dQ0USRCQNZNn5pzg/viewform) as we launch soon.
# Note
This project is in early stages of development. Your feedback is very important for us - we're actively working to improve the product. <a href="https://forms.gle/E8vRMVB7bUbsSktPA">Drop anonymous feedback here.</a>
# License
<p>
This project is licensed under <a href="./LICENSE">AGPLv3</a>.
</p>
# Contributors
Thank you to the combined efforts of everyone who contributes!
<a href="https://github.com/getmaxun/maxun/graphs/contributors">
<img src="https://contrib.rocks/image?repo=getmaxun/maxun" />
</a>

View File

@@ -1,67 +1,53 @@
version: '3.8'
services:
# Frontend
frontend:
app:
build:
context: .
dockerfile: ./Dockerfile.frontend
dockerfile: Dockerfile
target: production
env_file: .env
ports:
- "3000:3000" # Map host port 3000 to container port 3000
- "5173:80"
- "8080:8080"
depends_on:
- backend
networks:
- app-network
# Backend
backend:
build:
context: .
dockerfile: ./Dockerfile.backend
ports:
- "8080:8080" # Map host port 8080 to container port 8080
environment:
POSTGRES_HOST: postgres
POSTGRES_DB: mydb
POSTGRES_USER: myuser
POSTGRES_PASSWORD: mypassword
MINIO_ENDPOINT: minio
MINIO_PORT: 9000
depends_on:
- postgres
- db
- minio
networks:
- app-network
- redis
# Postgres Database
postgres:
image: postgres:15
db:
image: postgres:13
environment:
POSTGRES_DB: mydb
POSTGRES_USER: myuser
POSTGRES_PASSWORD: mypassword
POSTGRES_DB: ${DB_NAME}
POSTGRES_USER: ${DB_USER}
POSTGRES_PASSWORD: ${DB_PASSWORD}
ports:
- "5432:5432"
volumes:
- postgres_data:/var/lib/postgresql/data
networks:
- app-network
# MinIO for Storage
minio:
image: minio/minio
environment:
MINIO_ROOT_USER: ${MINIO_ACCESS_KEY}
MINIO_ROOT_PASSWORD: ${MINIO_SECRET_KEY}
command: server /data
ports:
- "9000:9000"
environment:
MINIO_ROOT_USER: minioadmin
MINIO_ROOT_PASSWORD: minioadmin123
volumes:
- minio_data:/data
networks:
- app-network
redis:
image: redis:6
environment:
- REDIS_HOST=redis
- REDIS_PORT=6379
ports:
- "6379:6379"
volumes:
- redis_data:/data
volumes:
postgres_data:
minio_data:
networks:
app-network:
driver: bridge
redis_data:

7
docker-entrypoint.sh Normal file
View File

@@ -0,0 +1,7 @@
#!/bin/sh
# Start backend server
cd /app && npm run start:server &
# Start nginx
nginx -g 'daemon off;'

View File

@@ -8,8 +8,7 @@
name="description"
content="Web site created using Vite"
/>
<link rel="icon" type="image/png" href="/img/maxunlogo.png">
<link rel="manifest" href="/manifest.json" />
<link rel="icon" type="image/png" href="public/img/maxunlogo.png">
<title>Maxun | Open Source No Code Web Data Extraction Platform</title>
</head>
<body>

View File

@@ -1,6 +1,6 @@
{
"name": "maxun-core",
"version": "0.0.2",
"version": "0.0.3",
"description": "Core package for Maxun, responsible for data extraction",
"main": "build/index.js",
"typings": "build/index.d.ts",

17
nginx.conf Normal file
View File

@@ -0,0 +1,17 @@
server {
listen 80;
location / {
root /usr/share/nginx/html;
try_files $uri $uri/ /index.html;
}
location /api {
proxy_pass http://127.0.0.1:8080;
proxy_http_version 1.1;
proxy_set_header Upgrade $http_upgrade;
proxy_set_header Connection 'upgrade';
proxy_set_header Host $host;
proxy_cache_bypass $http_upgrade;
}
}

View File

@@ -41,6 +41,7 @@
"jsonwebtoken": "^9.0.2",
"loglevel": "^1.8.0",
"loglevel-plugin-remote": "^0.6.8",
"maxun-core": "^0.0.3",
"minio": "^8.0.1",
"moment-timezone": "^0.5.45",
"node-cron": "^3.0.3",
@@ -75,7 +76,9 @@
"server": "./node_modules/.bin/nodemon server/src/server.ts",
"client": "vite",
"build": "vite build",
"test": "vite preview",
"build:server": "tsc -p server/tsconfig.json",
"start:server": "node server/dist/server/src/server.js",
"preview": "vite preview",
"lint": "./node_modules/.bin/eslint ."
},
"eslintConfig": {

20
server/.gitignore vendored Normal file
View File

@@ -0,0 +1,20 @@
# dependencies
/node_modules
# misc
.DS_Store
.env.local
.env.development.local
.env.test.local
.env.production.local
.env
/.idea
/server/logs
/build
/dist
package-lock.json

View File

@@ -298,12 +298,14 @@ router.get("/robots/:id/runs", requireAPIKey, async (req: Request, res: Response
raw: true
});
const formattedRuns = runs.map(formatRunResponse);
const response = {
statusCode: 200,
messageCode: "success",
runs: {
totalCount: runs.length,
items: runs,
totalCount: formattedRuns.length,
items: formattedRuns,
},
};
@@ -319,6 +321,32 @@ router.get("/robots/:id/runs", requireAPIKey, async (req: Request, res: Response
}
);
function formatRunResponse(run: any) {
const formattedRun = {
id: run.id,
status: run.status,
name: run.name,
robotId: run.robotMetaId, // Renaming robotMetaId to robotId
startedAt: run.startedAt,
finishedAt: run.finishedAt,
runId: run.runId,
runByUserId: run.runByUserId,
runByScheduleId: run.runByScheduleId,
runByAPI: run.runByAPI,
data: {},
screenshot: null,
};
if (run.serializableOutput && run.serializableOutput['item-0']) {
formattedRun.data = run.serializableOutput['item-0'];
} else if (run.binaryOutput && run.binaryOutput['item-0']) {
formattedRun.screenshot = run.binaryOutput['item-0'];
}
return formattedRun;
}
/**
* @swagger
* /api/robots/{id}/runs/{runId}:
@@ -393,7 +421,7 @@ router.get("/robots/:id/runs/:runId", requireAPIKey, async (req: Request, res: R
const response = {
statusCode: 200,
messageCode: "success",
run: run,
run: formatRunResponse(run),
};
res.status(200).json(response);
@@ -754,7 +782,7 @@ router.post("/robots/:id/runs", requireAPIKey, async (req: AuthenticatedRequest,
const response = {
statusCode: 200,
messageCode: "success",
run: completedRun,
run: formatRunResponse(completedRun),
};
res.status(200).json(response);

View File

@@ -1,4 +1,4 @@
export const SERVER_PORT = process.env.SERVER_PORT ? Number(process.env.SERVER_PORT) : 8080
export const DEBUG = process.env.DEBUG === 'true'
export const LOGS_PATH = process.env.LOGS_PATH ?? 'server/logs'
export const ANALYTICS_ID = process.env.ANALYTICS_ID ?? 'oss'
export const ANALYTICS_ID = 'oss'

View File

@@ -62,8 +62,13 @@ readdirSync(path.join(__dirname, 'api')).forEach((r) => {
}
});
const workerProcess = fork(path.resolve(__dirname, './worker.ts'), [], {
execArgv: ['--inspect=5859'], // Specify a different debug port for the worker
// Check if we're running in production or development
const isProduction = process.env.NODE_ENV === 'production';
const workerPath = path.resolve(__dirname, isProduction ? './worker.js' : '/worker.ts');
// Fork the worker process
const workerProcess = fork(workerPath, [], {
execArgv: isProduction ? ['--inspect=8081'] : ['--inspect=5859'],
});
workerProcess.on('message', (message) => {

View File

@@ -6,7 +6,7 @@ dotenv.config();
const sequelize = new Sequelize(
`postgresql://${process.env.DB_USER}:${process.env.DB_PASSWORD}@${process.env.DB_HOST}:${process.env.DB_PORT}/${process.env.DB_NAME}`,
{
host: 'localhost',
host: process.env.DB_HOST,
dialect: 'postgres',
logging: false,
}

View File

@@ -21,6 +21,38 @@ minioClient.bucketExists('maxun-test')
console.error('Error connecting to MinIO:', err);
})
async function createBucketWithPolicy(bucketName: string, policy?: 'public-read' | 'private') {
try {
const bucketExists = await minioClient.bucketExists(bucketName);
if (!bucketExists) {
await minioClient.makeBucket(bucketName);
console.log(`Bucket ${bucketName} created successfully.`);
if (policy === 'public-read') {
// Define a public-read policy
const policyJSON = {
Version: "2012-10-17",
Statement: [
{
Effect: "Allow",
Principal: "",
Action: ["s3:GetObject"],
Resource: [`arn:aws:s3:::${bucketName}/*`]
}
]
};
await minioClient.setBucketPolicy(bucketName, JSON.stringify(policyJSON));
console.log(`Public-read policy applied to bucket ${bucketName}.`);
}
} else {
console.log(`Bucket ${bucketName} already exists.`);
}
} catch (error) {
console.error('Error in bucket creation or policy application:', error);
}
}
class BinaryOutputService {
private bucketName: string;
@@ -98,6 +130,7 @@ class BinaryOutputService {
}
async uploadBinaryOutputToMinioBucket(run: Run, key: string, data: Buffer): Promise<void> {
await createBucketWithPolicy('maxun-run-screenshots', 'public-read');
const bucketName = 'maxun-run-screenshots';
try {
console.log(`Uploading to bucket ${bucketName} with key ${key}`);

View File

@@ -24,9 +24,9 @@ export const comparePassword = (password: string, hash: string): Promise<boolean
}
export const encrypt = (text: string): string => {
const ivLength = parseInt(getEnvVariable('IV_LENGTH'), 10);
const ivLength = 16;
const iv = crypto.randomBytes(ivLength);
const algorithm = getEnvVariable('ALGORITHM');
const algorithm = 'aes-256-cbc';
const key = Buffer.from(getEnvVariable('ENCRYPTION_KEY'), 'hex');
const cipher = crypto.createCipheriv(algorithm, key, iv);
let encrypted = cipher.update(text, 'utf8', 'hex');

View File

@@ -5,9 +5,14 @@ import { handleRunRecording } from "./workflow-management/scheduler";
import Robot from './models/Robot';
import { computeNextRun } from './utils/schedule';
console.log('Environment variables:', {
REDIS_HOST: process.env.REDIS_HOST,
REDIS_PORT: process.env.REDIS_PORT,
});
const connection = new IORedis({
host: 'localhost',
port: 6379,
host: process.env.REDIS_HOST,
port: process.env.REDIS_PORT ? parseInt(process.env.REDIS_PORT, 10) : 6379,
maxRetriesPerRequest: null,
});

32
server/tsconfig.json Normal file
View File

@@ -0,0 +1,32 @@
{
"compilerOptions": {
"target": "es2018",
"module": "commonjs",
"outDir": "./dist",
"rootDir": "../",
"strict": true,
"esModuleInterop": true,
"skipLibCheck": true,
"forceConsistentCasingInFileNames": true,
"moduleResolution": "node",
"baseUrl": "../",
"paths": {
"*": ["*"],
"src/*": ["src/*"]
},
"jsx": "react-jsx",
"lib": ["dom", "dom.iterable", "esnext"],
"allowJs": true
},
"include": [
"src/**/*",
"../src/shared/**/*",
"../src/helpers/**/*"
],
"exclude": [
"node_modules",
"../src/components/**/*", // Exclude frontend components
"../src/pages/**/*", // Exclude frontend pages
"../src/app/**/*" // Exclude other frontend-specific code
]
}

View File

@@ -110,7 +110,7 @@ const ActionDescriptionBox = () => {
return (
<CustomBoxContainer>
<Logo src="../../../public/img/maxunlogo.png" alt="Maxun Logo" />
<Logo src="/img/maxunlogo.png" alt="Maxun Logo" />
<Triangle />
<Content>
{renderActionDescription()}

View File

@@ -54,7 +54,7 @@ export const NavBar: React.FC<NavBarProps> = ({ recordingName, isRecording }) =>
display: 'flex',
justifyContent: 'flex-start',
}}>
<img src="../../../public/img/maxunlogo.png" width={45} height={40} style={{ borderRadius: '5px', margin: '5px 0px 5px 15px' }} />
<img src="img/maxunlogo.png" width={45} height={40} style={{ borderRadius: '5px', margin: '5px 0px 5px 15px' }} />
<div style={{ padding: '11px' }}><ProjectName>Maxun</ProjectName></div>
</div>
{

View File

@@ -5,6 +5,8 @@ export default defineConfig(() => {
return {
build: {
outDir: 'build',
manifest: true,
chunkSizeWarningLimit: 1024,
},
plugins: [react()],
};