refactor: engine structur

This commit is contained in:
karishmas6
2024-03-19 01:39:01 +05:30
parent 8d78321635
commit faaa4b57c9
5 changed files with 0 additions and 213 deletions

108
api/.gitignore vendored
View File

@@ -1,108 +0,0 @@
# Created by .ignore support plugin (hsz.mobi)
### Python template
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# celery beat schedule file
celerybeat-schedule
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.idea/*

View File

@@ -1,10 +0,0 @@
## Maxon API
### Local Setup
### Windows OS
1. Activate the virtual environment `venv\Scripts\Activate`
2. To install dependencies, run `pip install -r requirements.txt`
3. Run the command `uvicorn app.main:app --reload`

View File

@@ -1,56 +0,0 @@
annotated-types==0.6.0
anyio==4.2.0
attrs==23.2.0
Automat==22.10.0
certifi==2023.11.17
cffi==1.16.0
charset-normalizer==3.3.2
click==8.1.7
colorama==0.4.6
constantly==23.10.4
cryptography==41.0.7
cssselect==1.2.0
fastapi==0.108.0
filelock==3.13.1
h11==0.14.0
hyperlink==21.0.0
idna==3.6
incremental==22.10.0
itemadapter==0.8.0
itemloaders==1.1.0
jmespath==1.0.1
lxml==5.0.0
outcome==1.3.0.post0
packaging==23.2
parsel==1.8.1
playwright==1.42.0
Protego==0.3.0
pyasn1==0.5.1
pyasn1-modules==0.3.0
pycparser==2.21
pydantic==2.5.3
pydantic_core==2.14.6
PyDispatcher==2.0.7
pyOpenSSL==23.3.0
PySocks==1.7.1
queuelib==1.6.2
requests==2.31.0
requests-file==1.5.1
Scrapy==2.11.0
selenium==4.16.0
service-identity==23.1.0
six==1.16.0
sniffio==1.3.0
sortedcontainers==2.4.0
starlette==0.32.0.post1
tldextract==5.1.1
trio==0.23.2
trio-websocket==0.11.1
Twisted==22.10.0
twisted-iocpsupport==1.0.4
typing_extensions==4.9.0
urllib3==2.1.0
uvicorn==0.25.0
w3lib==2.1.2
wsproto==1.2.0
zope.interface==6.1

View File

@@ -1,31 +0,0 @@
from fastapi import FastAPI
from scrapy import signals
from scrapy.crawler import CrawlerProcess
from scrapy.signalmanager import dispatcher
from sample_spider import SampleSpider
from contextlib import asynccontextmanager
app = FastAPI()
@asynccontextmanager
async def lifespan(app: FastAPI):
dispatcher.connect(start_crawler, signal=signals.engine_started)
process = CrawlerProcess(settings={
'USffER_AGENT': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
})
process.crawl(SampleSpider)
process.start()
def start_crawler():
spider = SampleSpider()
crawler = CrawlerProcess()
crawler.crawl(spider)
crawler.start()
@app.get("/scrape")
async def scrape_endpoint():
# Add your API endpoint logic here
# Retrieve and return the scraped data
return {"message": "Scraping in progress. Check logs for details."}

View File

@@ -1,8 +0,0 @@
import scrapy
class SampleSpider(scrapy.Spider):
name = 'SampleSpider'
start_urls = ['https://www.google.com/']
def parse(self, response):
print(response.body)