refactor: engine structur
This commit is contained in:
108
api/.gitignore
vendored
108
api/.gitignore
vendored
@@ -1,108 +0,0 @@
|
||||
# Created by .ignore support plugin (hsz.mobi)
|
||||
### Python template
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
.Python
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe, so as to inject date/other infos into it.
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.tox/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# pyenv
|
||||
.python-version
|
||||
|
||||
# celery beat schedule file
|
||||
celerybeat-schedule
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
|
||||
.idea/*
|
||||
@@ -1,10 +0,0 @@
|
||||
## Maxon API
|
||||
|
||||
|
||||
### Local Setup
|
||||
|
||||
### Windows OS
|
||||
|
||||
1. Activate the virtual environment `venv\Scripts\Activate`
|
||||
2. To install dependencies, run `pip install -r requirements.txt`
|
||||
3. Run the command `uvicorn app.main:app --reload`
|
||||
@@ -1,56 +0,0 @@
|
||||
annotated-types==0.6.0
|
||||
anyio==4.2.0
|
||||
attrs==23.2.0
|
||||
Automat==22.10.0
|
||||
certifi==2023.11.17
|
||||
cffi==1.16.0
|
||||
charset-normalizer==3.3.2
|
||||
click==8.1.7
|
||||
colorama==0.4.6
|
||||
constantly==23.10.4
|
||||
cryptography==41.0.7
|
||||
cssselect==1.2.0
|
||||
fastapi==0.108.0
|
||||
filelock==3.13.1
|
||||
h11==0.14.0
|
||||
hyperlink==21.0.0
|
||||
idna==3.6
|
||||
incremental==22.10.0
|
||||
itemadapter==0.8.0
|
||||
itemloaders==1.1.0
|
||||
jmespath==1.0.1
|
||||
lxml==5.0.0
|
||||
outcome==1.3.0.post0
|
||||
packaging==23.2
|
||||
parsel==1.8.1
|
||||
playwright==1.42.0
|
||||
Protego==0.3.0
|
||||
pyasn1==0.5.1
|
||||
pyasn1-modules==0.3.0
|
||||
pycparser==2.21
|
||||
pydantic==2.5.3
|
||||
pydantic_core==2.14.6
|
||||
PyDispatcher==2.0.7
|
||||
pyOpenSSL==23.3.0
|
||||
PySocks==1.7.1
|
||||
queuelib==1.6.2
|
||||
requests==2.31.0
|
||||
requests-file==1.5.1
|
||||
Scrapy==2.11.0
|
||||
selenium==4.16.0
|
||||
service-identity==23.1.0
|
||||
six==1.16.0
|
||||
sniffio==1.3.0
|
||||
sortedcontainers==2.4.0
|
||||
starlette==0.32.0.post1
|
||||
tldextract==5.1.1
|
||||
trio==0.23.2
|
||||
trio-websocket==0.11.1
|
||||
Twisted==22.10.0
|
||||
twisted-iocpsupport==1.0.4
|
||||
typing_extensions==4.9.0
|
||||
urllib3==2.1.0
|
||||
uvicorn==0.25.0
|
||||
w3lib==2.1.2
|
||||
wsproto==1.2.0
|
||||
zope.interface==6.1
|
||||
@@ -1,31 +0,0 @@
|
||||
from fastapi import FastAPI
|
||||
from scrapy import signals
|
||||
from scrapy.crawler import CrawlerProcess
|
||||
from scrapy.signalmanager import dispatcher
|
||||
from sample_spider import SampleSpider
|
||||
from contextlib import asynccontextmanager
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
@asynccontextmanager
|
||||
async def lifespan(app: FastAPI):
|
||||
|
||||
dispatcher.connect(start_crawler, signal=signals.engine_started)
|
||||
process = CrawlerProcess(settings={
|
||||
'USffER_AGENT': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
||||
})
|
||||
process.crawl(SampleSpider)
|
||||
process.start()
|
||||
|
||||
def start_crawler():
|
||||
|
||||
spider = SampleSpider()
|
||||
crawler = CrawlerProcess()
|
||||
crawler.crawl(spider)
|
||||
crawler.start()
|
||||
|
||||
@app.get("/scrape")
|
||||
async def scrape_endpoint():
|
||||
# Add your API endpoint logic here
|
||||
# Retrieve and return the scraped data
|
||||
return {"message": "Scraping in progress. Check logs for details."}
|
||||
@@ -1,8 +0,0 @@
|
||||
import scrapy
|
||||
|
||||
class SampleSpider(scrapy.Spider):
|
||||
name = 'SampleSpider'
|
||||
start_urls = ['https://www.google.com/']
|
||||
|
||||
def parse(self, response):
|
||||
print(response.body)
|
||||
Reference in New Issue
Block a user