-> Note: Maxun is in its early stages of development and currently does not support self-hosting. However, you can run Maxun locally. Self-hosting capabilities are planned for a future release and will be available soon.
+# Installation
+1. Create a root folder for your project (e.g. 'maxun')
+2. Create a file named `.env` in the root folder of the project
+3. Example env file can be viewed [here](https://github.com/getmaxun/maxun/blob/master/ENVEXAMPLE). Copy all content of example env to your `.env` file.
+4. Choose your installation method below
-# Local Installation
### Docker Compose
+1. Copy paste the [docker-compose.yml file](https://github.com/getmaxun/maxun/blob/master/docker-compose.yml) into your root folder
+2. Ensure you have setup the `.env` file in that same folder
+3. Run the command below from a terminal
```
-git clone https://github.com/getmaxun/maxun
docker-compose up -d
```
You can access the frontend at http://localhost:5173/ and backend at http://localhost:8080/
diff --git a/docker-compose.yml b/docker-compose.yml
index 46cc72c4..3c6e3a0f 100644
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -43,7 +43,7 @@ services:
#build:
#context: .
#dockerfile: server/Dockerfile
- image: getmaxun/maxun-backend:v0.0.5
+ image: getmaxun/maxun-backend:v0.0.7
ports:
- "${BACKEND_PORT:-8080}:${BACKEND_PORT:-8080}"
env_file: .env
@@ -64,28 +64,23 @@ services:
- redis
- minio
volumes:
- - ./server:/app/server # Mount server source code for hot reloading
- - ./maxun-core:/app/maxun-core # Mount maxun-core for any shared code updates
- /var/run/dbus:/var/run/dbus
frontend:
#build:
#context: .
#dockerfile: Dockerfile
- image: getmaxun/maxun-frontend:v0.0.2
+ image: getmaxun/maxun-frontend:v0.0.3
ports:
- "${FRONTEND_PORT:-5173}:${FRONTEND_PORT:-5173}"
env_file: .env
environment:
PUBLIC_URL: ${PUBLIC_URL}
BACKEND_URL: ${BACKEND_URL}
- volumes:
- - ./:/app # Mount entire frontend app directory for hot reloading
- - /app/node_modules # Anonymous volume to prevent overwriting node_modules
depends_on:
- backend
volumes:
postgres_data:
minio_data:
- redis_data:
+ redis_data:
\ No newline at end of file
diff --git a/maxun-core/package.json b/maxun-core/package.json
index 90ee01b7..36d06aa9 100644
--- a/maxun-core/package.json
+++ b/maxun-core/package.json
@@ -1,6 +1,6 @@
{
"name": "maxun-core",
- "version": "0.0.5",
+ "version": "0.0.6",
"description": "Core package for Maxun, responsible for data extraction",
"main": "build/index.js",
"typings": "build/index.d.ts",
diff --git a/maxun-core/src/browserSide/scraper.js b/maxun-core/src/browserSide/scraper.js
index 09b6578b..a2009d78 100644
--- a/maxun-core/src/browserSide/scraper.js
+++ b/maxun-core/src/browserSide/scraper.js
@@ -265,41 +265,72 @@ function scrapableHeuristics(maxCountPerPage = 50, minArea = 20000, scrolls = 3,
const scrapedData = [];
while (scrapedData.length < limit) {
- // Get all parent elements matching the listSelector
- const parentElements = Array.from(document.querySelectorAll(listSelector));
+ let parentElements = Array.from(document.querySelectorAll(listSelector));
+
+ // If we only got one element or none, try a more generic approach
+ if (limit > 1 && parentElements.length <= 1) {
+ const [containerSelector, _] = listSelector.split('>').map(s => s.trim());
+ const container = document.querySelector(containerSelector);
+
+ if (container) {
+ const allChildren = Array.from(container.children);
+
+ const firstMatch = document.querySelector(listSelector);
+ if (firstMatch) {
+ // Get classes from the first matching element
+ const firstMatchClasses = Array.from(firstMatch.classList);
+
+ // Find similar elements by matching most of their classes
+ parentElements = allChildren.filter(element => {
+ const elementClasses = Array.from(element.classList);
- // Iterate through each parent element
- for (const parent of parentElements) {
- if (scrapedData.length >= limit) break;
- const record = {};
-
- // For each field, select the corresponding element within the parent
- for (const [label, { selector, attribute }] of Object.entries(fields)) {
- const fieldElement = parent.querySelector(selector);
-
- if (fieldElement) {
- if (attribute === 'innerText') {
- record[label] = fieldElement.innerText.trim();
- } else if (attribute === 'innerHTML') {
- record[label] = fieldElement.innerHTML.trim();
- } else if (attribute === 'src') {
- // Handle relative 'src' URLs
- const src = fieldElement.getAttribute('src');
- record[label] = src ? new URL(src, window.location.origin).href : null;
- } else if (attribute === 'href') {
- // Handle relative 'href' URLs
- const href = fieldElement.getAttribute('href');
- record[label] = href ? new URL(href, window.location.origin).href : null;
- } else {
- record[label] = fieldElement.getAttribute(attribute);
+ // Element should share at least 70% of classes with the first match
+ const commonClasses = firstMatchClasses.filter(cls =>
+ elementClasses.includes(cls));
+ return commonClasses.length >= Math.floor(firstMatchClasses.length * 0.7);
+ });
+ }
}
- }
}
- scrapedData.push(record);
- }
+
+ // Iterate through each parent element
+ for (const parent of parentElements) {
+ if (scrapedData.length >= limit) break;
+ const record = {};
+
+ // For each field, select the corresponding element within the parent
+ for (const [label, { selector, attribute }] of Object.entries(fields)) {
+ const fieldElement = parent.querySelector(selector);
+
+ if (fieldElement) {
+ if (attribute === 'innerText') {
+ record[label] = fieldElement.innerText.trim();
+ } else if (attribute === 'innerHTML') {
+ record[label] = fieldElement.innerHTML.trim();
+ } else if (attribute === 'src') {
+ // Handle relative 'src' URLs
+ const src = fieldElement.getAttribute('src');
+ record[label] = src ? new URL(src, window.location.origin).href : null;
+ } else if (attribute === 'href') {
+ // Handle relative 'href' URLs
+ const href = fieldElement.getAttribute('href');
+ record[label] = href ? new URL(href, window.location.origin).href : null;
+ } else {
+ record[label] = fieldElement.getAttribute(attribute);
+ }
+ }
+ }
+ scrapedData.push(record);
+ }
+
+ // If we've processed all available elements and still haven't reached the limit,
+ // break to avoid infinite loop
+ if (parentElements.length === 0 || scrapedData.length >= parentElements.length) {
+ break;
+ }
}
- return scrapedData
- };
+ return scrapedData;
+};
/**
diff --git a/maxun-core/src/interpret.ts b/maxun-core/src/interpret.ts
index afea8e47..14d8f46e 100644
--- a/maxun-core/src/interpret.ts
+++ b/maxun-core/src/interpret.ts
@@ -102,7 +102,7 @@ export default class Interpreter extends EventEmitter {
};
}
- PlaywrightBlocker.fromPrebuiltAdsAndTracking(fetch).then(blocker => {
+ PlaywrightBlocker.fromLists(fetch, ['https://easylist.to/easylist/easylist.txt']).then(blocker => {
this.blocker = blocker;
}).catch(err => {
this.log(`Failed to initialize ad-blocker:`, Level.ERROR);
@@ -192,8 +192,8 @@ export default class Interpreter extends EventEmitter {
// const actionable = async (selector: string): Promise
+ Run the commands below
+ # cd to project directory (eg: maxun)
+
+ cd maxun
+
+
+ # pull latest changes
+
+ git pull origin master
+
+
+ # install dependencies
+
+ npm install
+
+
+ # start maxun
+
+ npm run start
+
+
+ Run the commands below
+ # cd to project directory (eg: maxun)
+
+ cd maxun
+
+
+ # stop the working containers
+
+ docker-compose down
+
+
+ # pull latest docker images
+
+ docker-compose pull
+
+
+ # start maxun
+
+ docker-compose up -d
+
+