Metadata: Allow single quotes in keywords #1196

This commit is contained in:
Michael Mayer 2021-05-05 12:32:49 +02:00
parent 256c82fc93
commit d3e53a89dd
2 changed files with 10 additions and 5 deletions

View file

@ -6,7 +6,7 @@ import (
"strings" "strings"
) )
var KeywordsRegexp = regexp.MustCompile("[\\p{L}\\-]{1,}") var KeywordsRegexp = regexp.MustCompile("[\\p{L}\\-']{1,}")
// UnknownWord returns true if the string does not seem to be a real word. // UnknownWord returns true if the string does not seem to be a real word.
func UnknownWord(s string) bool { func UnknownWord(s string) bool {
@ -34,7 +34,7 @@ func Words(s string) (results []string) {
} }
for _, w := range KeywordsRegexp.FindAllString(s, -1) { for _, w := range KeywordsRegexp.FindAllString(s, -1) {
w = strings.Trim(w, "- ") w = strings.Trim(w, "- '")
if w == "" || len(w) < 2 && IsLatin(w) { if w == "" || len(w) < 2 && IsLatin(w) {
continue continue
@ -119,7 +119,7 @@ func UniqueWords(words []string) (results []string) {
SortCaseInsensitive(words) SortCaseInsensitive(words)
for _, w := range words { for _, w := range words {
w = strings.Trim(strings.ToLower(w), "- ") w = strings.Trim(strings.ToLower(w), "- '")
if w == "" || len(w) < 2 && IsLatin(w) || w == last { if w == "" || len(w) < 2 && IsLatin(w) || w == last {
continue continue

View file

@ -25,7 +25,7 @@ func TestWords(t *testing.T) {
}) })
t.Run("I'm a lazy-brown fox!", func(t *testing.T) { t.Run("I'm a lazy-brown fox!", func(t *testing.T) {
result := Words("I'm a lazy-BRoWN fox!") result := Words("I'm a lazy-BRoWN fox!")
assert.Equal(t, []string{"lazy-BRoWN", "fox"}, result) assert.Equal(t, []string{"I'm", "lazy-BRoWN", "fox"}, result)
}) })
t.Run("no result", func(t *testing.T) { t.Run("no result", func(t *testing.T) {
result := Words("x") result := Words("x")
@ -51,6 +51,11 @@ func TestWords(t *testing.T) {
result := Words(" -foo- -") result := Words(" -foo- -")
assert.Equal(t, []string{"foo"}, result) assert.Equal(t, []string{"foo"}, result)
}) })
t.Run("McDonalds", func(t *testing.T) {
result := Words(" McDonald's FOO'bar-'")
assert.Equal(t, []string{"McDonald's", "FOO'bar"}, result)
})
// McDonald's
} }
func TestReplaceSpaces(t *testing.T) { func TestReplaceSpaces(t *testing.T) {
@ -97,7 +102,7 @@ func TestAddToWords(t *testing.T) {
func TestMergeWords(t *testing.T) { func TestMergeWords(t *testing.T) {
t.Run("I'm a lazy-BRoWN fox!", func(t *testing.T) { t.Run("I'm a lazy-BRoWN fox!", func(t *testing.T) {
result := MergeWords("I'm a lazy-BRoWN fox!", "Yellow banana, apple; pan-pot") result := MergeWords("I'm a lazy-BRoWN fox!", "Yellow banana, apple; pan-pot")
assert.Equal(t, "apple, banana, fox, lazy-brown, pan-pot, yellow", result) assert.Equal(t, "apple, banana, fox, i'm, lazy-brown, pan-pot, yellow", result)
}) })
} }