People: Lower wildcard threshold for subject and keyword search #22 #882

This commit is contained in:
Michael Mayer 2021-08-29 19:19:54 +02:00
parent fa5e906c96
commit cbf604b477
4 changed files with 154 additions and 42 deletions

View file

@ -54,7 +54,7 @@ func Geo(f form.GeoSearch) (results GeoResults, err error) {
if err := Db().Where(AnySlug("custom_slug", f.Query, " ")).Find(&labels).Error; len(labels) == 0 || err != nil {
log.Infof("search: label %s not found, using fuzzy search", txt.Quote(f.Query))
for _, where := range LikeAny("k.keyword", f.Query) {
for _, where := range LikeAnyKeyword("k.keyword", f.Query) {
s = s.Where("photos.id IN (SELECT pk.photo_id FROM keywords k JOIN photos_keywords pk ON k.id = pk.keyword_id WHERE (?))", gorm.Expr(where))
}
} else {
@ -70,7 +70,7 @@ func Geo(f form.GeoSearch) (results GeoResults, err error) {
}
}
if wheres := LikeAny("k.keyword", f.Query); len(wheres) > 0 {
if wheres := LikeAnyKeyword("k.keyword", f.Query); len(wheres) > 0 {
for _, where := range wheres {
s = s.Where("photos.id IN (SELECT pk.photo_id FROM keywords k JOIN photos_keywords pk ON k.id = pk.keyword_id WHERE (?)) OR "+
"photos.id IN (SELECT pl.photo_id FROM photos_labels pl WHERE pl.uncertainty < 100 AND pl.label_id IN (?))", gorm.Expr(where), labelIds)
@ -83,7 +83,7 @@ func Geo(f form.GeoSearch) (results GeoResults, err error) {
// Search for one or more keywords?
if f.Keywords != "" {
for _, where := range LikeAll("k.keyword", f.Keywords) {
for _, where := range LikeAllKeywords("k.keyword", f.Keywords) {
s = s.Where("photos.id IN (SELECT pk.photo_id FROM keywords k JOIN photos_keywords pk ON k.id = pk.keyword_id WHERE (?))", gorm.Expr(where))
}
}
@ -93,7 +93,7 @@ func Geo(f form.GeoSearch) (results GeoResults, err error) {
s = s.Where(fmt.Sprintf("photos.id IN (SELECT photo_id FROM files f JOIN %s m ON f.id = m.file_id AND m.marker_invalid = 0 WHERE subject_uid IN (?))",
entity.Marker{}.TableName()), strings.Split(strings.ToLower(f.Subject), Or))
} else if f.Subjects != "" {
for _, where := range LikeAny("s.subject_name", f.Subjects) {
for _, where := range LikeAnyWord("s.subject_name", f.Subjects) {
s = s.Where(fmt.Sprintf("photos.id IN (SELECT photo_id FROM files f JOIN %s m ON f.id = m.file_id AND m.marker_invalid = 0 JOIN %s s ON s.subject_uid = m.subject_uid WHERE (?))",
entity.Marker{}.TableName(), entity.Subject{}.TableName()), gorm.Expr(where))
}
@ -104,7 +104,7 @@ func Geo(f form.GeoSearch) (results GeoResults, err error) {
s = s.Joins("JOIN photos_albums ON photos_albums.photo_uid = photos.photo_uid").
Where("photos_albums.hidden = 0 AND photos_albums.album_uid = ?", f.Album)
} else if f.Albums != "" {
for _, where := range LikeAny("a.album_title", f.Albums) {
for _, where := range LikeAnyWord("a.album_title", f.Albums) {
s = s.Where("photos.photo_uid IN (SELECT pa.photo_uid FROM photos_albums pa JOIN albums a ON a.album_uid = pa.album_uid WHERE (?))", gorm.Expr(where))
}
}

View file

@ -10,36 +10,53 @@ import (
"github.com/jinzhu/inflection"
)
// LikeAny returns a single where condition matching the search keywords.
func LikeAny(col, keywords string) (wheres []string) {
keywords = strings.ReplaceAll(keywords, Or, " ")
keywords = strings.ReplaceAll(keywords, OrEn, " ")
keywords = strings.ReplaceAll(keywords, AndEn, And)
// LikeAny returns a single where condition matching the search words.
func LikeAny(col, s string, keywords bool) (wheres []string) {
if s == "" {
return wheres
}
for _, k := range strings.Split(keywords, And) {
s = strings.ReplaceAll(s, Or, " ")
s = strings.ReplaceAll(s, OrEn, " ")
s = strings.ReplaceAll(s, AndEn, And)
var wildcardThreshold int
if keywords {
wildcardThreshold = 4
} else {
wildcardThreshold = 2
}
for _, k := range strings.Split(s, And) {
var orWheres []string
var words []string
words := txt.UniqueKeywords(k)
if keywords {
words = txt.UniqueKeywords(k)
} else {
words = txt.UniqueWords(txt.Words(k))
}
if len(words) == 0 {
continue
}
for _, w := range words {
if len(w) > 3 {
if len(w) >= wildcardThreshold {
orWheres = append(orWheres, fmt.Sprintf("%s LIKE '%s%%'", col, w))
} else {
orWheres = append(orWheres, fmt.Sprintf("%s = '%s'", col, w))
orWheres = append(orWheres, fmt.Sprintf("%s LIKE '%s'", col, w))
}
if !txt.ContainsASCIILetters(w) {
if !keywords || !txt.ContainsASCIILetters(w) {
continue
}
singular := inflection.Singular(w)
if singular != w {
orWheres = append(orWheres, fmt.Sprintf("%s = '%s'", col, singular))
orWheres = append(orWheres, fmt.Sprintf("%s LIKE '%s'", col, singular))
}
}
@ -51,25 +68,58 @@ func LikeAny(col, keywords string) (wheres []string) {
return wheres
}
// LikeAll returns a list of where conditions matching all search keywords.
func LikeAll(col, keywords string) (wheres []string) {
words := txt.UniqueKeywords(keywords)
// LikeAnyKeyword returns a single where condition matching the search keywords.
func LikeAnyKeyword(col, s string) (wheres []string) {
return LikeAny(col, s, true)
}
// LikeAnyWord returns a single where condition matching the search word.
func LikeAnyWord(col, s string) (wheres []string) {
return LikeAny(col, s, false)
}
// LikeAll returns a list of where conditions matching all search words.
func LikeAll(col, s string, keywords bool) (wheres []string) {
if s == "" {
return wheres
}
var words []string
var wildcardThreshold int
if keywords {
words = txt.UniqueKeywords(s)
wildcardThreshold = 4
} else {
words = txt.UniqueWords(txt.Words(s))
wildcardThreshold = 2
}
if len(words) == 0 {
return wheres
}
for _, w := range words {
if len(w) > 3 {
if len(w) >= wildcardThreshold {
wheres = append(wheres, fmt.Sprintf("%s LIKE '%s%%'", col, w))
} else {
wheres = append(wheres, fmt.Sprintf("%s = '%s'", col, w))
wheres = append(wheres, fmt.Sprintf("%s LIKE '%s'", col, w))
}
}
return wheres
}
// LikeAllKeywords returns a list of where conditions matching all search keywords.
func LikeAllKeywords(col, s string) (wheres []string) {
return LikeAll(col, s, true)
}
// LikeAllWords returns a list of where conditions matching all search words.
func LikeAllWords(col, s string) (wheres []string) {
return LikeAll(col, s, false)
}
// AnySlug returns a where condition that matches any slug in search.
func AnySlug(col, search, sep string) (where string) {
if search == "" {

View file

@ -8,47 +8,47 @@ import (
func TestLikeAny(t *testing.T) {
t.Run("and_or_search", func(t *testing.T) {
if w := LikeAny("k.keyword", "table spoon & usa | img json"); len(w) != 2 {
if w := LikeAny("k.keyword", "table spoon & usa | img json", true); len(w) != 2 {
t.Fatal("two where conditions expected")
} else {
assert.Equal(t, "k.keyword LIKE 'spoon%' OR k.keyword LIKE 'table%'", w[0])
assert.Equal(t, "k.keyword LIKE 'json%' OR k.keyword = 'usa'", w[1])
assert.Equal(t, "k.keyword LIKE 'json%' OR k.keyword LIKE 'usa'", w[1])
}
})
t.Run("and_or_search_en", func(t *testing.T) {
if w := LikeAny("k.keyword", "table spoon and usa or img json"); len(w) != 2 {
if w := LikeAny("k.keyword", "table spoon and usa or img json", true); len(w) != 2 {
t.Fatal("two where conditions expected")
} else {
assert.Equal(t, "k.keyword LIKE 'spoon%' OR k.keyword LIKE 'table%'", w[0])
assert.Equal(t, "k.keyword LIKE 'json%' OR k.keyword = 'usa'", w[1])
assert.Equal(t, "k.keyword LIKE 'json%' OR k.keyword LIKE 'usa'", w[1])
}
})
t.Run("table spoon usa img json", func(t *testing.T) {
if w := LikeAny("k.keyword", "table spoon usa img json"); len(w) != 1 {
if w := LikeAny("k.keyword", "table spoon usa img json", true); len(w) != 1 {
t.Fatal("one where condition expected")
} else {
assert.Equal(t, "k.keyword LIKE 'json%' OR k.keyword LIKE 'spoon%' OR k.keyword LIKE 'table%' OR k.keyword = 'usa'", w[0])
assert.Equal(t, "k.keyword LIKE 'json%' OR k.keyword LIKE 'spoon%' OR k.keyword LIKE 'table%' OR k.keyword LIKE 'usa'", w[0])
}
})
t.Run("cat dog", func(t *testing.T) {
if w := LikeAny("k.keyword", "cat dog"); len(w) != 1 {
if w := LikeAny("k.keyword", "cat dog", true); len(w) != 1 {
t.Fatal("one where condition expected")
} else {
assert.Equal(t, "k.keyword = 'cat' OR k.keyword = 'dog'", w[0])
assert.Equal(t, "k.keyword LIKE 'cat' OR k.keyword LIKE 'dog'", w[0])
}
})
t.Run("cats dogs", func(t *testing.T) {
if w := LikeAny("k.keyword", "cats dogs"); len(w) != 1 {
if w := LikeAny("k.keyword", "cats dogs", true); len(w) != 1 {
t.Fatal("one where condition expected")
} else {
assert.Equal(t, "k.keyword LIKE 'cats%' OR k.keyword = 'cat' OR k.keyword LIKE 'dogs%' OR k.keyword = 'dog'", w[0])
assert.Equal(t, "k.keyword LIKE 'cats%' OR k.keyword LIKE 'cat' OR k.keyword LIKE 'dogs%' OR k.keyword LIKE 'dog'", w[0])
}
})
t.Run("spoon", func(t *testing.T) {
if w := LikeAny("k.keyword", "spoon"); len(w) != 1 {
if w := LikeAny("k.keyword", "spoon", true); len(w) != 1 {
t.Fatal("one where condition expected")
} else {
assert.Equal(t, "k.keyword LIKE 'spoon%'", w[0])
@ -56,23 +56,61 @@ func TestLikeAny(t *testing.T) {
})
t.Run("img", func(t *testing.T) {
if w := LikeAny("k.keyword", "img"); len(w) > 0 {
if w := LikeAny("k.keyword", "img", true); len(w) > 0 {
t.Fatal("no where condition expected")
}
})
t.Run("empty", func(t *testing.T) {
if w := LikeAny("k.keyword", ""); len(w) > 0 {
if w := LikeAny("k.keyword", "", true); len(w) > 0 {
t.Fatal("no where condition expected")
}
})
}
func TestLikeAnyKeyword(t *testing.T) {
t.Run("and_or_search", func(t *testing.T) {
if w := LikeAnyKeyword("k.keyword", "table spoon & usa | img json"); len(w) != 2 {
t.Fatal("two where conditions expected")
} else {
assert.Equal(t, "k.keyword LIKE 'spoon%' OR k.keyword LIKE 'table%'", w[0])
assert.Equal(t, "k.keyword LIKE 'json%' OR k.keyword LIKE 'usa'", w[1])
}
})
t.Run("and_or_search_en", func(t *testing.T) {
if w := LikeAnyKeyword("k.keyword", "table spoon and usa or img json"); len(w) != 2 {
t.Fatal("two where conditions expected")
} else {
assert.Equal(t, "k.keyword LIKE 'spoon%' OR k.keyword LIKE 'table%'", w[0])
assert.Equal(t, "k.keyword LIKE 'json%' OR k.keyword LIKE 'usa'", w[1])
}
})
}
func TestLikeAnyWord(t *testing.T) {
t.Run("and_or_search", func(t *testing.T) {
if w := LikeAnyWord("k.keyword", "table spoon & usa | img json"); len(w) != 2 {
t.Fatal("two where conditions expected")
} else {
assert.Equal(t, "k.keyword LIKE 'spoon%' OR k.keyword LIKE 'table%'", w[0])
assert.Equal(t, "k.keyword LIKE 'img%' OR k.keyword LIKE 'json%' OR k.keyword LIKE 'usa%'", w[1])
}
})
t.Run("and_or_search_en", func(t *testing.T) {
if w := LikeAnyWord("k.keyword", "table spoon and usa or img json"); len(w) != 2 {
t.Fatal("two where conditions expected")
} else {
assert.Equal(t, "k.keyword LIKE 'spoon%' OR k.keyword LIKE 'table%'", w[0])
assert.Equal(t, "k.keyword LIKE 'img%' OR k.keyword LIKE 'json%' OR k.keyword LIKE 'usa%'", w[1])
}
})
}
func TestLikeAll(t *testing.T) {
t.Run("keywords", func(t *testing.T) {
if w := LikeAll("k.keyword", "Jo Mander 李"); len(w) == 2 {
if w := LikeAll("k.keyword", "Jo Mander 李", true); len(w) == 2 {
assert.Equal(t, "k.keyword LIKE 'mander%'", w[0])
assert.Equal(t, "k.keyword = '李'", w[1])
assert.Equal(t, "k.keyword LIKE '李'", w[1])
} else {
t.Logf("wheres: %#v", w)
t.Fatal("two where conditions expected")
@ -80,6 +118,30 @@ func TestLikeAll(t *testing.T) {
})
}
func TestLikeAllKeywords(t *testing.T) {
t.Run("keywords", func(t *testing.T) {
if w := LikeAllKeywords("k.keyword", "Jo Mander 李"); len(w) == 2 {
assert.Equal(t, "k.keyword LIKE 'mander%'", w[0])
assert.Equal(t, "k.keyword LIKE '李'", w[1])
} else {
t.Logf("wheres: %#v", w)
t.Fatal("two where conditions expected")
}
})
}
func TestLikeAllWords(t *testing.T) {
t.Run("keywords", func(t *testing.T) {
if w := LikeAllWords("k.name", "Jo Mander 王"); len(w) == 3 {
assert.Equal(t, "k.name LIKE 'jo%'", w[0])
assert.Equal(t, "k.name LIKE 'mander%'", w[1])
assert.Equal(t, "k.name LIKE '王%'", w[2])
} else {
t.Logf("wheres: %#v", w)
t.Fatal("two where conditions expected")
}
})
}
func TestAnySlug(t *testing.T) {
t.Run("table spoon usa img json", func(t *testing.T) {
where := AnySlug("custom_slug", "table spoon usa img json", " ")

View file

@ -136,14 +136,14 @@ func PhotoSearch(f form.PhotoSearch) (results PhotoResults, count int, err error
if f.Geo == true {
s = s.Where("photos.cell_id <> 'zz'")
for _, where := range LikeAny("k.keyword", f.Query) {
for _, where := range LikeAnyKeyword("k.keyword", f.Query) {
s = s.Where("photos.id IN (SELECT pk.photo_id FROM keywords k JOIN photos_keywords pk ON k.id = pk.keyword_id WHERE (?))", gorm.Expr(where))
}
} else if f.Query != "" {
if err := Db().Where(AnySlug("custom_slug", f.Query, " ")).Find(&labels).Error; len(labels) == 0 || err != nil {
log.Infof("search: label %s not found, using fuzzy search", txt.Quote(f.Query))
for _, where := range LikeAny("k.keyword", f.Query) {
for _, where := range LikeAnyKeyword("k.keyword", f.Query) {
s = s.Where("photos.id IN (SELECT pk.photo_id FROM keywords k JOIN photos_keywords pk ON k.id = pk.keyword_id WHERE (?))", gorm.Expr(where))
}
} else {
@ -159,7 +159,7 @@ func PhotoSearch(f form.PhotoSearch) (results PhotoResults, count int, err error
}
}
if wheres := LikeAny("k.keyword", f.Query); len(wheres) > 0 {
if wheres := LikeAnyKeyword("k.keyword", f.Query); len(wheres) > 0 {
for _, where := range wheres {
s = s.Where("photos.id IN (SELECT pk.photo_id FROM keywords k JOIN photos_keywords pk ON k.id = pk.keyword_id WHERE (?)) OR "+
"photos.id IN (SELECT pl.photo_id FROM photos_labels pl WHERE pl.uncertainty < 100 AND pl.label_id IN (?))", gorm.Expr(where), labelIds)
@ -172,7 +172,7 @@ func PhotoSearch(f form.PhotoSearch) (results PhotoResults, count int, err error
// Search for one or more keywords?
if f.Keywords != "" {
for _, where := range LikeAll("k.keyword", f.Keywords) {
for _, where := range LikeAllKeywords("k.keyword", f.Keywords) {
s = s.Where("photos.id IN (SELECT pk.photo_id FROM keywords k JOIN photos_keywords pk ON k.id = pk.keyword_id WHERE (?))", gorm.Expr(where))
}
}
@ -182,7 +182,7 @@ func PhotoSearch(f form.PhotoSearch) (results PhotoResults, count int, err error
s = s.Where(fmt.Sprintf("photos.id IN (SELECT photo_id FROM files f JOIN %s m ON f.id = m.file_id AND m.marker_invalid = 0 WHERE subject_uid IN (?))",
entity.Marker{}.TableName()), strings.Split(strings.ToLower(f.Subject), Or))
} else if f.Subjects != "" {
for _, where := range LikeAny("s.subject_name", f.Subjects) {
for _, where := range LikeAnyWord("s.subject_name", f.Subjects) {
s = s.Where(fmt.Sprintf("photos.id IN (SELECT photo_id FROM files f JOIN %s m ON f.id = m.file_id AND m.marker_invalid = 0 JOIN %s s ON s.subject_uid = m.subject_uid WHERE (?))",
entity.Marker{}.TableName(), entity.Subject{}.TableName()), gorm.Expr(where))
}
@ -402,7 +402,7 @@ func PhotoSearch(f form.PhotoSearch) (results PhotoResults, count int, err error
} else if f.Unsorted && f.Filter == "" {
s = s.Where("photos.photo_uid NOT IN (SELECT photo_uid FROM photos_albums pa WHERE pa.hidden = 0)")
} else if f.Albums != "" {
for _, where := range LikeAny("a.album_title", f.Albums) {
for _, where := range LikeAnyWord("a.album_title", f.Albums) {
s = s.Where("photos.photo_uid IN (SELECT pa.photo_uid FROM photos_albums pa JOIN albums a ON a.album_uid = pa.album_uid WHERE (?))", gorm.Expr(where))
}
}