diff --git a/api/src/main.py b/api/src/main.py new file mode 100644 index 00000000..66929cfd --- /dev/null +++ b/api/src/main.py @@ -0,0 +1,31 @@ +from fastapi import FastAPI +from scrapy import signals +from scrapy.crawler import CrawlerProcess +from scrapy.signalmanager import dispatcher +from sample_spider import SampleSpider +from contextlib import asynccontextmanager + +app = FastAPI() + +@asynccontextmanager +async def lifespan(app: FastAPI): + + dispatcher.connect(start_crawler, signal=signals.engine_started) + process = CrawlerProcess(settings={ + 'USffER_AGENT': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' + }) + process.crawl(SampleSpider) + process.start() + +def start_crawler(): + + spider = SampleSpider() + crawler = CrawlerProcess() + crawler.crawl(spider) + crawler.start() + +@app.get("/scrape") +async def scrape_endpoint(): + # Add your API endpoint logic here + # Retrieve and return the scraped data + return {"message": "Scraping in progress. Check logs for details."} diff --git a/api/src/sample_spider.py b/api/src/sample_spider.py new file mode 100644 index 00000000..7f4d555e --- /dev/null +++ b/api/src/sample_spider.py @@ -0,0 +1,8 @@ +import scrapy + +class SampleSpider(scrapy.Spider): + name = 'SampleSpider' + start_urls = ['https://www.google.com/'] + + def parse(self, response): + print(response.body) \ No newline at end of file diff --git a/ui/src/components/Topbar/index.tsx b/ui/src/components/Topbar/index.tsx index f377ecf6..263434bc 100644 --- a/ui/src/components/Topbar/index.tsx +++ b/ui/src/components/Topbar/index.tsx @@ -33,19 +33,6 @@ const Topbar = () => { -