Merge pull request #123 from mwmbl/use-htmx-for-search-results

Use htmx for search results
This commit is contained in:
Daoud Clarke 2023-11-05 13:24:34 +00:00 committed by GitHub
commit 19a8c8ac79
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
15 changed files with 173 additions and 213 deletions

View file

@ -1,33 +0,0 @@
import define from '../utils/define.js';
import addResult from "./molecules/add-result.js";
import save from "./organisms/save.js";
const template = () => /*html*/`
<header class="search-menu">
<ul>
<li is="${save}"></li>
</ul>
<div><a href="/accounts/login/">Login</a> <a href="/accounts/signup/">Sign up</a> </div>
<div class="branding">
<img class="brand-icon" src="/static/images/logo.svg" width="40" height="40" alt="mwmbl logo">
<span class="brand-title">MWMBL</span>
</div>
<mwmbl-search-bar></mwmbl-search-bar>
</header>
<main>
<mwmbl-results></mwmbl-results>
</main>
<div is="${addResult}"></div>
<footer is="mwmbl-footer"></footer>
`;
export default define('app', class extends HTMLElement {
constructor() {
super();
this.__setup();
}
__setup() {
this.innerHTML = template();
}
});

View file

@ -3,7 +3,7 @@ import config from "../../../config.js";
import {globalBus} from "../../utils/events.js"; import {globalBus} from "../../utils/events.js";
const FETCH_URL = `${config['publicApiURL']}crawler/fetch?` const FETCH_URL = '/app/fetch?'
const template = () => /*html*/` const template = () => /*html*/`
@ -56,7 +56,7 @@ export default define('add-result', class extends HTMLDivElement {
const url = `${FETCH_URL}url=${encodeURIComponent(value)}&query=${encodeURIComponent(query)}`; const url = `${FETCH_URL}url=${encodeURIComponent(value)}&query=${encodeURIComponent(query)}`;
const response = await fetch(url); const response = await fetch(url);
if (response.status === 200) { if (response.status === 200) {
const data = await response.json(); const data = await response.text();
console.log("Data", data); console.log("Data", data);
const addResultEvent = new CustomEvent('curate-add-result', {detail: data}); const addResultEvent = new CustomEvent('curate-add-result', {detail: data});

View file

@ -1,26 +1,7 @@
import define from '../../utils/define.js'; import define from '../../utils/define.js';
import escapeString from '../../utils/escapeString.js'; import escapeString from '../../utils/escapeString.js';
import { globalBus } from '../../utils/events.js'; import { globalBus } from '../../utils/events.js';
import deleteButton from "./delete-button.js";
import validateButton from "./validate-button.js";
import addButton from "./add-button.js";
const template = ({ data }) => /*html*/`
<div class="result-container">
<div class="curation-buttons">
<button class="curation-button curate-delete" is="${deleteButton}"></button>
<button class="curation-button curate-approve" is="${validateButton}"></button>
<button class="curation-button curate-add" is="${addButton}"></button>
</div>
<div class="result-link">
<a href='${data.url}'>
<p class='link'>${data.url}</p>
<p class='title'>${data.title}</p>
<p class='extract'>${data.extract}</p>
</a>
</div>
</div>
`;
export default define('result', class extends HTMLLIElement { export default define('result', class extends HTMLLIElement {
constructor() { constructor() {
@ -30,11 +11,6 @@ export default define('result', class extends HTMLLIElement {
} }
__setup() { __setup() {
this.innerHTML = template({ data: {
url: this.dataset.url,
title: this.__handleBold(JSON.parse(this.dataset.title)),
extract: this.__handleBold(JSON.parse(this.dataset.extract))
}});
this.__events(); this.__events();
} }

View file

@ -1,21 +1,13 @@
import define from '../../utils/define.js';
import {globalBus} from '../../utils/events.js'; import {globalBus} from '../../utils/events.js';
// Components
import result from '../molecules/result.js';
import emptyResult from '../molecules/empty-result.js';
import home from './home.js';
import escapeString from '../../utils/escapeString.js';
const template = () => /*html*/` document.body.addEventListener('htmx:load', function(evt) {
<ul class='results'>
<li is='${home}'></li>
</ul>
`;
export default define('results', class extends HTMLElement { });
class ResultsHandler {
constructor() { constructor() {
super();
this.results = null; this.results = null;
this.oldIndex = null; this.oldIndex = null;
this.curating = false; this.curating = false;
@ -23,50 +15,12 @@ export default define('results', class extends HTMLElement {
} }
__setup() { __setup() {
this.innerHTML = template();
this.results = this.querySelector('.results');
this.__events(); this.__events();
} }
__events() { __events() {
globalBus.on('search', (e) => { document.body.addEventListener('htmx:load', e => {
this.results.innerHTML = ''; this.results = document.querySelector('.results');
let resultsHTML = '';
if (!e.detail.error) {
// If there is no details the input is empty
if (!e.detail.results) {
resultsHTML = /*html*/`
<li is='${home}'></li>
`;
}
// If the details array has results display them
else if (e.detail.results.length > 0) {
for(const resultData of e.detail.results) {
resultsHTML += /*html*/`
<li
is='${result}'
data-url='${escapeString(resultData.url)}'
data-title='${escapeString(JSON.stringify(resultData.title))}'
data-extract='${escapeString(JSON.stringify(resultData.extract))}'
></li>
`;
}
}
// If the details array is empty there is no result
else {
resultsHTML = /*html*/`
<li is='${emptyResult}'></li>
`;
}
}
else {
// If there is an error display an empty result
resultsHTML = /*html*/`
<li is='${emptyResult}'></li>
`;
}
// Bind HTML to the DOM
this.results.innerHTML = resultsHTML;
// Allow the user to re-order search results // Allow the user to re-order search results
$(".results").sortable({ $(".results").sortable({
@ -142,15 +96,7 @@ export default define('results', class extends HTMLElement {
console.log("Add result", e); console.log("Add result", e);
this.__beginCurating(); this.__beginCurating();
const resultData = e.detail; const resultData = e.detail;
const resultHTML = /*html*/` this.results.insertAdjacentHTML('afterbegin', resultData);
<li
is='${result}'
data-url='${escapeString(resultData.url)}'
data-title='${escapeString(JSON.stringify(resultData.title))}'
data-extract='${escapeString(JSON.stringify(resultData.extract))}'
></li>
`;
this.results.insertAdjacentHTML('afterbegin', resultHTML);
const newResults = this.__getResults(); const newResults = this.__getResults();
@ -236,4 +182,6 @@ export default define('results', class extends HTMLElement {
}); });
globalBus.dispatch(curationMoveEvent); globalBus.dispatch(curationMoveEvent);
} }
}); }
const resultsHandler = new ResultsHandler();

View file

@ -48,6 +48,8 @@
<!-- <mwmbl-register></mwmbl-register>--> <!-- <mwmbl-register></mwmbl-register>-->
<mwmbl-app></mwmbl-app> <mwmbl-app></mwmbl-app>
<noscript> <noscript>
<!-- https://stackoverflow.com/a/431554 -->
<style> .jsonly { display: none } </style>
<main class="noscript"> <main class="noscript">
<img class="brand-icon" src="/static/images/logo.svg" width="40" height="40" alt="mwmbl logo"> <img class="brand-icon" src="/static/images/logo.svg" width="40" height="40" alt="mwmbl logo">
<h1> <h1>
@ -63,8 +65,46 @@
</p> </p>
</main> </main>
</noscript> </noscript>
<!-- Javasript entrypoint --> <!-- Javasript entrypoint -->
<script src="https://unpkg.com/htmx.org@1.9.6"></script>
<script src="./index.js" type="module"></script> <script src="./index.js" type="module"></script>
<main class="jsonly">
<header class="search-menu">
<ul>
<li is="${save}"></li>
</ul>
<div><a href="/accounts/login/">Login</a> <a href="/accounts/signup/">Sign up</a> </div>
<div class="branding">
<img class="brand-icon" src="/static/images/logo.svg" width="40" height="40" alt="mwmbl logo">
<span class="brand-title">MWMBL</span>
</div>
<form class="search-bar">
<i class="ph-magnifying-glass-bold"></i>
<input
type='search'
name='query'
class='search-bar-input'
placeholder='Search on mwmbl...'
title='Use "CTRL+K" or "/" to focus.'
autocomplete='off'
hx-get="/app/search/"
hx-trigger="keyup changed delay:100ms"
hx-target=".results"
>
</form>
</header>
<main>
<mwmbl-results>
<ul class='results'>
<li is='${home}'></li>
</ul>
</mwmbl-results>
</main>
<div is="mwmbl-add-result"></div>
<footer is="mwmbl-footer"></footer>
</main>
</body> </body>
</html> </html>

View file

@ -14,7 +14,6 @@
if (!redirected) { if (!redirected) {
// Load components only after redirects are checked. // Load components only after redirects are checked.
import('./components/app.js');
import('./components/login.js'); import('./components/login.js');
import('./components/register.js'); import('./components/register.js');
import("./components/organisms/search-bar.js"); import("./components/organisms/search-bar.js");

View file

@ -1,28 +1,10 @@
from multiprocessing import Queue
from pathlib import Path
from django.conf import settings
from ninja import NinjaAPI from ninja import NinjaAPI
from ninja.security import django_auth from ninja.security import django_auth
import mwmbl.crawler.app as crawler import mwmbl.crawler.app as crawler
from mwmbl.indexer.batch_cache import BatchCache
from mwmbl.indexer.paths import INDEX_NAME, BATCH_DIR_NAME
from mwmbl.platform import curate from mwmbl.platform import curate
from mwmbl.search_setup import queued_batches, index_path, ranker, batch_cache
from mwmbl.tinysearchengine import search from mwmbl.tinysearchengine import search
from mwmbl.tinysearchengine.completer import Completer
from mwmbl.tinysearchengine.indexer import TinyIndex, Document
from mwmbl.tinysearchengine.rank import HeuristicRanker
queued_batches = Queue()
completer = Completer()
index_path = Path(settings.DATA_PATH) / INDEX_NAME
tiny_index = TinyIndex(item_factory=Document, index_path=index_path)
tiny_index.__enter__()
ranker = HeuristicRanker(tiny_index, completer)
batch_cache = BatchCache(Path(settings.DATA_PATH) / BATCH_DIR_NAME)
def create_api(version): def create_api(version):

View file

@ -13,7 +13,7 @@ class MwmblConfig(AppConfig):
def ready(self): def ready(self):
# Imports here to avoid AppRegistryNotReady exception # Imports here to avoid AppRegistryNotReady exception
from mwmbl.api import queued_batches from mwmbl.search_setup import queued_batches
from mwmbl import background from mwmbl import background
from mwmbl.indexer.paths import INDEX_NAME from mwmbl.indexer.paths import INDEX_NAME
from mwmbl.indexer.update_urls import update_urls_continuously from mwmbl.indexer.update_urls import update_urls_continuously

View file

@ -8,12 +8,8 @@ from typing import Union
from uuid import uuid4 from uuid import uuid4
import boto3 import boto3
import justext
import requests import requests
from fastapi import HTTPException from fastapi import HTTPException
from justext.core import html_to_dom, ParagraphMaker, classify_paragraphs, revise_paragraph_classification, \
LENGTH_LOW_DEFAULT, STOPWORDS_LOW_DEFAULT, MAX_LINK_DENSITY_DEFAULT, NO_HEADINGS_DEFAULT, LENGTH_HIGH_DEFAULT, \
STOPWORDS_HIGH_DEFAULT, MAX_HEADING_DISTANCE_DEFAULT, DEFAULT_ENCODING, DEFAULT_ENC_ERRORS, preprocessor
from ninja import Router from ninja import Router
from redis import Redis from redis import Redis
@ -21,7 +17,6 @@ from mwmbl.crawler.batch import Batch, NewBatchRequest, HashedBatch
from mwmbl.crawler.stats import MwmblStats, StatsManager from mwmbl.crawler.stats import MwmblStats, StatsManager
from mwmbl.crawler.urls import URLDatabase, FoundURL, URLStatus from mwmbl.crawler.urls import URLDatabase, FoundURL, URLStatus
from mwmbl.database import Database from mwmbl.database import Database
from mwmbl.format import format_result
from mwmbl.indexer.batch_cache import BatchCache from mwmbl.indexer.batch_cache import BatchCache
from mwmbl.indexer.indexdb import IndexDatabase, BatchInfo, BatchStatus from mwmbl.indexer.indexdb import IndexDatabase, BatchInfo, BatchStatus
from mwmbl.settings import ( from mwmbl.settings import (
@ -35,9 +30,7 @@ from mwmbl.settings import (
PUBLIC_URL_PREFIX, PUBLIC_URL_PREFIX,
PUBLIC_USER_ID_LENGTH, PUBLIC_USER_ID_LENGTH,
FILE_NAME_SUFFIX, FILE_NAME_SUFFIX,
DATE_REGEX, NUM_EXTRACT_CHARS) DATE_REGEX)
from mwmbl.tinysearchengine.indexer import Document
stats_manager = StatsManager(Redis.from_url(os.environ.get("REDIS_URL"))) stats_manager = StatsManager(Redis.from_url(os.environ.get("REDIS_URL")))
@ -57,32 +50,6 @@ def upload(data: bytes, name: str):
last_batch = None last_batch = None
def justext_with_dom(html_text, stoplist, length_low=LENGTH_LOW_DEFAULT,
length_high=LENGTH_HIGH_DEFAULT, stopwords_low=STOPWORDS_LOW_DEFAULT,
stopwords_high=STOPWORDS_HIGH_DEFAULT, max_link_density=MAX_LINK_DENSITY_DEFAULT,
max_heading_distance=MAX_HEADING_DISTANCE_DEFAULT, no_headings=NO_HEADINGS_DEFAULT,
encoding=None, default_encoding=DEFAULT_ENCODING,
enc_errors=DEFAULT_ENC_ERRORS):
"""
Converts an HTML page into a list of classified paragraphs. Each paragraph
is represented as instance of class ˙˙justext.paragraph.Paragraph˙˙.
"""
dom = html_to_dom(html_text, default_encoding, encoding, enc_errors)
titles = dom.xpath("//title")
title = titles[0].text if len(titles) > 0 else None
dom = preprocessor(dom)
paragraphs = ParagraphMaker.make_paragraphs(dom)
classify_paragraphs(paragraphs, stoplist, length_low, length_high,
stopwords_low, stopwords_high, max_link_density, no_headings)
revise_paragraph_classification(paragraphs, max_heading_distance)
return paragraphs, title
def create_router(batch_cache: BatchCache, queued_batches: Queue) -> Router: def create_router(batch_cache: BatchCache, queued_batches: Queue) -> Router:
router = Router(tags=["crawler"]) router = Router(tags=["crawler"])
@ -90,19 +57,6 @@ def create_router(batch_cache: BatchCache, queued_batches: Queue) -> Router:
# # # #
# # url_db.create_tables() # # url_db.create_tables()
@router.get('/fetch')
def fetch_url(request, url: str, query: str):
response = requests.get(url)
paragraphs, title = justext_with_dom(response.content, justext.get_stoplist("English"))
good_paragraphs = [p for p in paragraphs if p.class_type == 'good']
extract = ' '.join([p.text for p in good_paragraphs])
if len(extract) > NUM_EXTRACT_CHARS:
extract = extract[:NUM_EXTRACT_CHARS - 1] + ''
result = Document(title=title, url=url, extract=extract, score=0.0)
return format_result(result, query)
@router.post('/batches/') @router.post('/batches/')
def post_batch(request, batch: Batch): def post_batch(request, batch: Batch):
if len(batch.items) > MAX_BATCH_SIZE: if len(batch.items) > MAX_BATCH_SIZE:

19
mwmbl/search_setup.py Normal file
View file

@ -0,0 +1,19 @@
from multiprocessing import Queue
from pathlib import Path
from django.conf import settings
from mwmbl.indexer.batch_cache import BatchCache
from mwmbl.indexer.paths import INDEX_NAME, BATCH_DIR_NAME
from mwmbl.tinysearchengine.completer import Completer
from mwmbl.tinysearchengine.indexer import TinyIndex, Document
from mwmbl.tinysearchengine.rank import HeuristicRanker
queued_batches = Queue()
completer = Completer()
index_path = Path(settings.DATA_PATH) / INDEX_NAME
tiny_index = TinyIndex(item_factory=Document, index_path=index_path)
tiny_index.__enter__()
ranker = HeuristicRanker(tiny_index, completer)
batch_cache = BatchCache(Path(settings.DATA_PATH) / BATCH_DIR_NAME)

View file

@ -0,0 +1,19 @@
{% load result_filters %}
{% for result in results %}
<li class="result" is="mwmbl-result">
<div class="result-container">
<div class="curation-buttons">
<button class="curation-button curate-delete" is="mwmbl-delete-button"></button>
<button class="curation-button curate-approve" is="mwmbl-validate-button"></button>
<button class="curation-button curate-add" is="mwmbl-add-button"></button>
</div>
<div class="result-link">
<a href="{{result.url}}">
<p class='link'>{{result.url}}</p>
<p class='title'>{{result.title|strengthen}}</p>
<p class='extract'>{{result.extract|strengthen}}</p>
</a>
</div>
</div>
</li>
{% endfor %}

View file

View file

@ -0,0 +1,18 @@
from django.template import Library
from django.utils.html import conditional_escape
from django.utils.safestring import mark_safe
register = Library()
@register.filter(needs_autoescape=True)
def strengthen(spans, autoescape=True):
escape = conditional_escape if autoescape else lambda x: x
strengthened = []
for span in spans:
escaped_value = escape(span["value"])
if span["is_bold"]:
strengthened.append(f"<strong>{escaped_value}</strong>")
else:
strengthened.append(escaped_value)
return mark_safe("".join(strengthened))

View file

@ -15,12 +15,10 @@ Including another URLconf
2. Add a URL to urlpatterns: path('blog/', include('blog.urls')) 2. Add a URL to urlpatterns: path('blog/', include('blog.urls'))
""" """
from django.contrib import admin from django.contrib import admin
from django.contrib.auth import login, logout
from django.template.defaulttags import url
from django.urls import path, include from django.urls import path, include
from mwmbl.api import api_original as api, api_v1 from mwmbl.api import api_original as api, api_v1
from mwmbl.views import signup, profile from mwmbl.views import profile, search_results, fetch_url
urlpatterns = [ urlpatterns = [
path('admin/', admin.site.urls), path('admin/', admin.site.urls),
@ -28,9 +26,7 @@ urlpatterns = [
path('api/v1/', api_v1.urls), path('api/v1/', api_v1.urls),
path('accounts/', include('allauth.urls')), path('accounts/', include('allauth.urls')),
# path("accounts/", include("django.contrib.auth.urls")),
# path('accounts/new/', signup, name='signup'),
path('accounts/profile/', profile, name='profile'), path('accounts/profile/', profile, name='profile'),
# path('login/', login, {'template_name': 'login.html'}, name='login'), path('app/search/', search_results, name="search_results"),
# path('logout/', logout, {'next_page': 'login'}, name='logout'), path('app/fetch/', fetch_url, name="fetch_url")
] ]

View file

@ -1,24 +1,66 @@
from django.contrib.auth import authenticate, login import justext
import requests
from django.contrib.auth.decorators import login_required from django.contrib.auth.decorators import login_required
from django.contrib.auth.forms import UserCreationForm from django.shortcuts import render
from django.shortcuts import redirect, render
from mwmbl.format import format_result
from mwmbl.search_setup import ranker
from justext.core import html_to_dom, ParagraphMaker, classify_paragraphs, revise_paragraph_classification, \
LENGTH_LOW_DEFAULT, STOPWORDS_LOW_DEFAULT, MAX_LINK_DENSITY_DEFAULT, NO_HEADINGS_DEFAULT, LENGTH_HIGH_DEFAULT, \
STOPWORDS_HIGH_DEFAULT, MAX_HEADING_DISTANCE_DEFAULT, DEFAULT_ENCODING, DEFAULT_ENC_ERRORS, preprocessor
from mwmbl.settings import NUM_EXTRACT_CHARS
from mwmbl.tinysearchengine.indexer import Document
def signup(request): def justext_with_dom(html_text, stoplist, length_low=LENGTH_LOW_DEFAULT,
if request.method == 'POST': length_high=LENGTH_HIGH_DEFAULT, stopwords_low=STOPWORDS_LOW_DEFAULT,
form = UserCreationForm(request.POST) stopwords_high=STOPWORDS_HIGH_DEFAULT, max_link_density=MAX_LINK_DENSITY_DEFAULT,
if form.is_valid(): max_heading_distance=MAX_HEADING_DISTANCE_DEFAULT, no_headings=NO_HEADINGS_DEFAULT,
form.save() encoding=None, default_encoding=DEFAULT_ENCODING,
username = form.cleaned_data.get('username') enc_errors=DEFAULT_ENC_ERRORS):
raw_password = form.cleaned_data.get('password1') """
user = authenticate(username=username, password=raw_password) Converts an HTML page into a list of classified paragraphs. Each paragraph
login(request, user) is represented as instance of class ˙˙justext.paragraph.Paragraph˙˙.
return redirect('/') """
else: dom = html_to_dom(html_text, default_encoding, encoding, enc_errors)
form = UserCreationForm()
return render(request, 'signup.html', {'form': form}) titles = dom.xpath("//title")
title = titles[0].text if len(titles) > 0 else None
dom = preprocessor(dom)
paragraphs = ParagraphMaker.make_paragraphs(dom)
classify_paragraphs(paragraphs, stoplist, length_low, length_high,
stopwords_low, stopwords_high, max_link_density, no_headings)
revise_paragraph_classification(paragraphs, max_heading_distance)
return paragraphs, title
@login_required @login_required
def profile(request): def profile(request):
return render(request, 'profile.html') return render(request, 'profile.html')
def search_results(request):
query = request.GET["query"]
results = ranker.search(query)
return render(request, "results.html", {"results": results})
def fetch_url(request):
url = request.GET["url"]
query = request.GET["query"]
response = requests.get(url)
paragraphs, title = justext_with_dom(response.content, justext.get_stoplist("English"))
good_paragraphs = [p for p in paragraphs if p.class_type == 'good']
extract = ' '.join([p.text for p in good_paragraphs])
if len(extract) > NUM_EXTRACT_CHARS:
extract = extract[:NUM_EXTRACT_CHARS - 1] + ''
result = Document(title=title, url=url, extract=extract, score=0.0)
return render(request, "results.html", {"results": [format_result(result, query)]})