refactor: engine structur

This commit is contained in:
karishmas6
2024-03-19 01:39:01 +05:30
parent 8d78321635
commit faaa4b57c9
5 changed files with 0 additions and 213 deletions

View File

@@ -1,31 +0,0 @@
from fastapi import FastAPI
from scrapy import signals
from scrapy.crawler import CrawlerProcess
from scrapy.signalmanager import dispatcher
from sample_spider import SampleSpider
from contextlib import asynccontextmanager
app = FastAPI()
@asynccontextmanager
async def lifespan(app: FastAPI):
dispatcher.connect(start_crawler, signal=signals.engine_started)
process = CrawlerProcess(settings={
'USffER_AGENT': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
})
process.crawl(SampleSpider)
process.start()
def start_crawler():
spider = SampleSpider()
crawler = CrawlerProcess()
crawler.crawl(spider)
crawler.start()
@app.get("/scrape")
async def scrape_endpoint():
# Add your API endpoint logic here
# Retrieve and return the scraped data
return {"message": "Scraping in progress. Check logs for details."}

View File

@@ -1,8 +0,0 @@
import scrapy
class SampleSpider(scrapy.Spider):
name = 'SampleSpider'
start_urls = ['https://www.google.com/']
def parse(self, response):
print(response.body)