From d3e53a89ddbefed33771cb42c305dbd200f09636 Mon Sep 17 00:00:00 2001 From: Michael Mayer Date: Wed, 5 May 2021 12:32:49 +0200 Subject: [PATCH] Metadata: Allow single quotes in keywords #1196 --- pkg/txt/words.go | 6 +++--- pkg/txt/words_test.go | 9 +++++++-- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/pkg/txt/words.go b/pkg/txt/words.go index 6810c3788..11988b047 100644 --- a/pkg/txt/words.go +++ b/pkg/txt/words.go @@ -6,7 +6,7 @@ import ( "strings" ) -var KeywordsRegexp = regexp.MustCompile("[\\p{L}\\-]{1,}") +var KeywordsRegexp = regexp.MustCompile("[\\p{L}\\-']{1,}") // UnknownWord returns true if the string does not seem to be a real word. func UnknownWord(s string) bool { @@ -34,7 +34,7 @@ func Words(s string) (results []string) { } for _, w := range KeywordsRegexp.FindAllString(s, -1) { - w = strings.Trim(w, "- ") + w = strings.Trim(w, "- '") if w == "" || len(w) < 2 && IsLatin(w) { continue @@ -119,7 +119,7 @@ func UniqueWords(words []string) (results []string) { SortCaseInsensitive(words) for _, w := range words { - w = strings.Trim(strings.ToLower(w), "- ") + w = strings.Trim(strings.ToLower(w), "- '") if w == "" || len(w) < 2 && IsLatin(w) || w == last { continue diff --git a/pkg/txt/words_test.go b/pkg/txt/words_test.go index 8aa1d0b05..a2807a10a 100644 --- a/pkg/txt/words_test.go +++ b/pkg/txt/words_test.go @@ -25,7 +25,7 @@ func TestWords(t *testing.T) { }) t.Run("I'm a lazy-brown fox!", func(t *testing.T) { result := Words("I'm a lazy-BRoWN fox!") - assert.Equal(t, []string{"lazy-BRoWN", "fox"}, result) + assert.Equal(t, []string{"I'm", "lazy-BRoWN", "fox"}, result) }) t.Run("no result", func(t *testing.T) { result := Words("x") @@ -51,6 +51,11 @@ func TestWords(t *testing.T) { result := Words(" -foo- -") assert.Equal(t, []string{"foo"}, result) }) + t.Run("McDonalds", func(t *testing.T) { + result := Words(" McDonald's FOO'bar-'") + assert.Equal(t, []string{"McDonald's", "FOO'bar"}, result) + }) + // McDonald's } func TestReplaceSpaces(t *testing.T) { @@ -97,7 +102,7 @@ func TestAddToWords(t *testing.T) { func TestMergeWords(t *testing.T) { t.Run("I'm a lazy-BRoWN fox!", func(t *testing.T) { result := MergeWords("I'm a lazy-BRoWN fox!", "Yellow banana, apple; pan-pot") - assert.Equal(t, "apple, banana, fox, lazy-brown, pan-pot, yellow", result) + assert.Equal(t, "apple, banana, fox, i'm, lazy-brown, pan-pot, yellow", result) }) }