49 lines
2.0 KiB
Python
49 lines
2.0 KiB
Python
import os
|
|
import shutil
|
|
from multiprocessing import Process, Queue
|
|
from pathlib import Path
|
|
|
|
from django.apps import AppConfig
|
|
from django.conf import settings
|
|
|
|
from mwmbl.crawler.urls import URLDatabase
|
|
from mwmbl.database import Database
|
|
from mwmbl.indexer.indexdb import IndexDatabase
|
|
|
|
|
|
class MwmblConfig(AppConfig):
|
|
name = "mwmbl"
|
|
verbose_name = "Mwmbl Application"
|
|
|
|
def ready(self):
|
|
# Imports here to avoid AppRegistryNotReady exception
|
|
from mwmbl.search_setup import queued_batches
|
|
from mwmbl import background
|
|
from mwmbl.indexer.paths import INDEX_NAME
|
|
from mwmbl.indexer.update_urls import update_urls_continuously
|
|
from mwmbl.tinysearchengine.indexer import TinyIndex, Document, PAGE_SIZE
|
|
from mwmbl.url_queue import update_queue_continuously
|
|
|
|
index_path = Path(settings.DATA_PATH) / INDEX_NAME
|
|
try:
|
|
existing_index = TinyIndex(item_factory=Document, index_path=index_path)
|
|
if existing_index.page_size != PAGE_SIZE or existing_index.num_pages != settings.NUM_PAGES:
|
|
raise ValueError(f"Existing index page sizes ({existing_index.page_size}) or number of pages "
|
|
f"({existing_index.num_pages}) do not match")
|
|
except FileNotFoundError:
|
|
print("Creating a new index")
|
|
TinyIndex.create(item_factory=Document, index_path=index_path, num_pages=settings.NUM_PAGES,
|
|
page_size=PAGE_SIZE)
|
|
|
|
with Database() as db:
|
|
url_db = URLDatabase(db.connection)
|
|
url_db.create_tables()
|
|
index_db = IndexDatabase(db.connection)
|
|
index_db.create_tables()
|
|
|
|
if settings.RUN_BACKGROUND_PROCESSES:
|
|
new_item_queue = Queue()
|
|
Process(target=background.run, args=(settings.DATA_PATH,)).start()
|
|
Process(target=update_queue_continuously, args=(new_item_queue, queued_batches,)).start()
|
|
Process(target=update_urls_continuously, args=(settings.DATA_PATH, new_item_queue)).start()
|