mwmbl/mwmbl/background.py

42 lines
1.1 KiB
Python
Raw Permalink Normal View History

"""
Script that updates data in a background process.
"""
2023-11-18 18:49:41 +00:00
import logging
import sys
from logging import getLogger, basicConfig
from pathlib import Path
from time import sleep
2022-06-30 19:00:38 +00:00
from mwmbl.crawler.urls import URLDatabase
from mwmbl.database import Database
2023-01-22 20:28:18 +00:00
from mwmbl.indexer import index_batches, historical
from mwmbl.indexer.batch_cache import BatchCache
from mwmbl.indexer.paths import BATCH_DIR_NAME, INDEX_NAME
2023-11-18 18:49:41 +00:00
basicConfig(stream=sys.stdout, level=logging.INFO)
2022-06-30 19:00:38 +00:00
logger = getLogger(__name__)
2023-01-22 20:28:18 +00:00
def run(data_path: str):
logger.info("Started background process")
with Database() as db:
url_db = URLDatabase(db.connection)
url_db.create_tables()
2023-01-22 20:28:18 +00:00
historical.run()
index_path = Path(data_path) / INDEX_NAME
batch_cache = BatchCache(Path(data_path) / BATCH_DIR_NAME)
2022-06-29 21:39:21 +00:00
while True:
2022-08-26 21:20:35 +00:00
try:
2023-01-22 20:28:18 +00:00
batch_cache.retrieve_batches(num_batches=10000)
2022-08-26 21:20:35 +00:00
except Exception:
2023-01-22 20:28:18 +00:00
logger.exception("Error retrieving batches")
2022-06-30 19:00:38 +00:00
try:
2022-07-23 22:19:36 +00:00
index_batches.run(batch_cache, index_path)
2022-06-30 19:00:38 +00:00
except Exception:
2022-07-23 22:19:36 +00:00
logger.exception("Error indexing batches")
sleep(10)