mwmbl/test/test_url_queue.py

38 lines
1.2 KiB
Python

from datetime import datetime
from queue import Queue
from mwmbl.crawler.urls import FoundURL, URLStatus
from mwmbl.url_queue import URLQueue
def test_url_queue_empties():
new_item_queue = Queue()
queued_batches = Queue()
url_queue = URLQueue(new_item_queue, queued_batches, min_top_domains=1)
new_item_queue.put([FoundURL("https://google.com", "123", 10.0, URLStatus.NEW.value, datetime(2023, 1, 19))])
url_queue.update()
items = queued_batches.get(block=False)
assert items == ["https://google.com"]
def test_url_queue_multiple_puts():
new_item_queue = Queue()
queued_batches = Queue()
url_queue = URLQueue(new_item_queue, queued_batches, min_top_domains=1)
new_item_queue.put([FoundURL("https://google.com", "123", 10.0, URLStatus.NEW.value, datetime(2023, 1, 19))])
url_queue.update()
new_item_queue.put([FoundURL("https://www.supermemo.com", "124", 10.0, URLStatus.NEW.value, datetime(2023, 1, 20))])
url_queue.update()
items = queued_batches.get(block=False)
assert items == ["https://google.com"]
items_2 = queued_batches.get(block=False)
assert items_2 == ["https://www.supermemo.com"]