mwmbl/mwmbl/main.py
2022-06-26 21:11:51 +01:00

66 lines
2.2 KiB
Python

import argparse
import logging
import os
from multiprocessing import Process
import uvicorn
from fastapi import FastAPI
from mwmbl.indexer import historical, retrieve, preprocess, update_pages
from mwmbl.crawler.app import router as crawler_router
from mwmbl.tinysearchengine import search
from mwmbl.tinysearchengine.completer import Completer
from mwmbl.tinysearchengine.indexer import TinyIndex, Document, NUM_PAGES, PAGE_SIZE
from mwmbl.tinysearchengine.rank import HeuristicRanker
logging.basicConfig()
def setup_args():
parser = argparse.ArgumentParser(description="mwmbl-tinysearchengine")
parser.add_argument("--index", help="Path to the tinysearchengine index file", default="/app/storage/index.tinysearch")
args = parser.parse_args()
return args
def run():
args = setup_args()
try:
existing_index = TinyIndex(item_factory=Document, index_path=args.index)
if existing_index.page_size != PAGE_SIZE or existing_index.num_pages != NUM_PAGES:
print(f"Existing index page sizes ({existing_index.page_size}) and number of pages "
f"({existing_index.num_pages}) does not match - removing.")
os.remove(args.index)
existing_index = None
except FileNotFoundError:
existing_index = None
if existing_index is None:
print("Creating a new index")
TinyIndex.create(item_factory=Document, index_path=args.index, num_pages=NUM_PAGES, page_size=PAGE_SIZE)
Process(target=historical.run).start()
Process(target=retrieve.run).start()
Process(target=preprocess.run, args=(args.index,)).start()
Process(target=update_pages.run, args=(args.index,)).start()
completer = Completer()
with TinyIndex(item_factory=Document, index_path=args.index) as tiny_index:
ranker = HeuristicRanker(tiny_index, completer)
# Initialize FastApi instance
app = FastAPI()
search_router = search.create_router(ranker)
app.include_router(search_router)
app.include_router(crawler_router)
# Initialize uvicorn server using global app instance and server config params
uvicorn.run(app, host="0.0.0.0", port=5000)
if __name__ == "__main__":
run()