photoprism/pkg/txt/words.go
Michael Mayer 46f5fcef40 Backend: Index file names with txt.FilenameKeywords()
Signed-off-by: Michael Mayer <michael@liquidbytes.net>
2020-04-16 23:30:42 +02:00

102 lines
2.2 KiB
Go

package txt
import (
"regexp"
"sort"
"strings"
)
var KeywordsRegexp = regexp.MustCompile("[\\p{L}\\-]{3,}")
// Words returns a slice of words with at least 3 characters from a string, dashes count as character ("ile-de-france").
func Words(s string) (results []string) {
return KeywordsRegexp.FindAllString(s, -1)
}
// ReplaceSpaces replaces all spaces with another string.
func ReplaceSpaces(s string, char string) string {
return strings.Replace(s, " ", char, -1)
}
var FilenameKeywordsRegexp = regexp.MustCompile("[\\p{L}]{3,}")
// FilenameWords returns a slice of words with at least 3 characters from a string ("ile", "france").
func FilenameWords(s string) (results []string) {
return FilenameKeywordsRegexp.FindAllString(s, -1)
}
// FilenameKeywords returns a slice of keywords without stopwords.
func FilenameKeywords(s string) (results []string) {
for _, w := range FilenameWords(s) {
w = strings.ToLower(w)
if _, ok := Stopwords[w]; ok == false {
results = append(results, w)
}
}
return results
}
// Keywords returns a slice of keywords without stopwords but including dashes.
func Keywords(s string) (results []string) {
for _, w := range Words(s) {
w = strings.ToLower(w)
if _, ok := Stopwords[w]; ok == false {
results = append(results, w)
}
}
return results
}
// UniqueWords sorts and filters a string slice for unique words.
func UniqueWords(words []string) (results []string) {
last := ""
SortCaseInsensitive(words)
for _, w := range words {
w = strings.ToLower(w)
if len(w) < 3 || w == last {
continue
}
last = w
results = append(results, w)
}
return results
}
// UniqueKeywords returns a slice of unique and sorted keywords without stopwords.
func UniqueKeywords(s string) (results []string) {
last := ""
words := Keywords(s)
SortCaseInsensitive(words)
for _, w := range words {
w = strings.ToLower(w)
if len(w) < 3 || w == last {
continue
}
last = w
results = append(results, w)
}
return results
}
// Sorts string slice case insensitive.
func SortCaseInsensitive(words []string) {
sort.Slice(words, func(i, j int) bool { return strings.ToLower(words[i]) < strings.ToLower(words[j]) })
}