diff --git a/api/.gitignore b/api/.gitignore deleted file mode 100644 index 662ac77a..00000000 --- a/api/.gitignore +++ /dev/null @@ -1,108 +0,0 @@ -# Created by .ignore support plugin (hsz.mobi) -### Python template -# Byte-compiled / optimized / DLL files -__pycache__/ -*.py[cod] -*$py.class - -# C extensions -*.so - -# Distribution / packaging -.Python -build/ -develop-eggs/ -dist/ -downloads/ -eggs/ -.eggs/ -lib/ -lib64/ -parts/ -sdist/ -var/ -wheels/ -*.egg-info/ -.installed.cfg -*.egg -MANIFEST - -# PyInstaller -# Usually these files are written by a python script from a template -# before PyInstaller builds the exe, so as to inject date/other infos into it. -*.manifest -*.spec - -# Installer logs -pip-log.txt -pip-delete-this-directory.txt - -# Unit test / coverage reports -htmlcov/ -.tox/ -.coverage -.coverage.* -.cache -nosetests.xml -coverage.xml -*.cover -.hypothesis/ -.pytest_cache/ - -# Translations -*.mo -*.pot - -# Django stuff: -*.log -local_settings.py -db.sqlite3 - -# Flask stuff: -instance/ -.webassets-cache - -# Scrapy stuff: -.scrapy - -# Sphinx documentation -docs/_build/ - -# PyBuilder -target/ - -# Jupyter Notebook -.ipynb_checkpoints - -# pyenv -.python-version - -# celery beat schedule file -celerybeat-schedule - -# SageMath parsed files -*.sage.py - -# Environments -.env -.venv -env/ -venv/ -ENV/ -env.bak/ -venv.bak/ - -# Spyder project settings -.spyderproject -.spyproject - -# Rope project settings -.ropeproject - -# mkdocs documentation -/site - -# mypy -.mypy_cache/ - -.idea/* \ No newline at end of file diff --git a/api/README.md b/api/README.md deleted file mode 100644 index e2891b85..00000000 --- a/api/README.md +++ /dev/null @@ -1,10 +0,0 @@ -## Maxon API - - -### Local Setup - -### Windows OS - -1. Activate the virtual environment `venv\Scripts\Activate` -2. To install dependencies, run `pip install -r requirements.txt` -3. Run the command `uvicorn app.main:app --reload` \ No newline at end of file diff --git a/api/requirements.txt b/api/requirements.txt deleted file mode 100644 index 5a4e0ef9..00000000 --- a/api/requirements.txt +++ /dev/null @@ -1,56 +0,0 @@ -annotated-types==0.6.0 -anyio==4.2.0 -attrs==23.2.0 -Automat==22.10.0 -certifi==2023.11.17 -cffi==1.16.0 -charset-normalizer==3.3.2 -click==8.1.7 -colorama==0.4.6 -constantly==23.10.4 -cryptography==41.0.7 -cssselect==1.2.0 -fastapi==0.108.0 -filelock==3.13.1 -h11==0.14.0 -hyperlink==21.0.0 -idna==3.6 -incremental==22.10.0 -itemadapter==0.8.0 -itemloaders==1.1.0 -jmespath==1.0.1 -lxml==5.0.0 -outcome==1.3.0.post0 -packaging==23.2 -parsel==1.8.1 -playwright==1.42.0 -Protego==0.3.0 -pyasn1==0.5.1 -pyasn1-modules==0.3.0 -pycparser==2.21 -pydantic==2.5.3 -pydantic_core==2.14.6 -PyDispatcher==2.0.7 -pyOpenSSL==23.3.0 -PySocks==1.7.1 -queuelib==1.6.2 -requests==2.31.0 -requests-file==1.5.1 -Scrapy==2.11.0 -selenium==4.16.0 -service-identity==23.1.0 -six==1.16.0 -sniffio==1.3.0 -sortedcontainers==2.4.0 -starlette==0.32.0.post1 -tldextract==5.1.1 -trio==0.23.2 -trio-websocket==0.11.1 -Twisted==22.10.0 -twisted-iocpsupport==1.0.4 -typing_extensions==4.9.0 -urllib3==2.1.0 -uvicorn==0.25.0 -w3lib==2.1.2 -wsproto==1.2.0 -zope.interface==6.1 \ No newline at end of file diff --git a/api/src/main.py b/api/src/main.py deleted file mode 100644 index 66929cfd..00000000 --- a/api/src/main.py +++ /dev/null @@ -1,31 +0,0 @@ -from fastapi import FastAPI -from scrapy import signals -from scrapy.crawler import CrawlerProcess -from scrapy.signalmanager import dispatcher -from sample_spider import SampleSpider -from contextlib import asynccontextmanager - -app = FastAPI() - -@asynccontextmanager -async def lifespan(app: FastAPI): - - dispatcher.connect(start_crawler, signal=signals.engine_started) - process = CrawlerProcess(settings={ - 'USffER_AGENT': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' - }) - process.crawl(SampleSpider) - process.start() - -def start_crawler(): - - spider = SampleSpider() - crawler = CrawlerProcess() - crawler.crawl(spider) - crawler.start() - -@app.get("/scrape") -async def scrape_endpoint(): - # Add your API endpoint logic here - # Retrieve and return the scraped data - return {"message": "Scraping in progress. Check logs for details."} diff --git a/api/src/sample_spider.py b/api/src/sample_spider.py deleted file mode 100644 index 7f4d555e..00000000 --- a/api/src/sample_spider.py +++ /dev/null @@ -1,8 +0,0 @@ -import scrapy - -class SampleSpider(scrapy.Spider): - name = 'SampleSpider' - start_urls = ['https://www.google.com/'] - - def parse(self, response): - print(response.body) \ No newline at end of file