photoprism/pkg/txt/words.go

124 lines
2.6 KiB
Go
Raw Normal View History

package txt
import (
"regexp"
"sort"
"strings"
)
var KeywordsRegexp = regexp.MustCompile("[\\p{L}\\-]{3,}")
// Words returns a slice of words with at least 3 characters from a string, dashes count as character ("ile-de-france").
func Words(s string) (results []string) {
return KeywordsRegexp.FindAllString(s, -1)
}
// ReplaceSpaces replaces all spaces with another string.
func ReplaceSpaces(s string, char string) string {
return strings.Replace(s, " ", char, -1)
}
var FilenameKeywordsRegexp = regexp.MustCompile("[\\p{L}]{3,}")
// FilenameWords returns a slice of words with at least 3 characters from a string ("ile", "france").
func FilenameWords(s string) (results []string) {
return FilenameKeywordsRegexp.FindAllString(s, -1)
}
// FilenameKeywords returns a slice of keywords without stopwords.
func FilenameKeywords(s string) (results []string) {
for _, w := range FilenameWords(s) {
w = strings.ToLower(w)
if _, ok := Stopwords[w]; ok == false {
results = append(results, w)
}
}
return results
}
// Keywords returns a slice of keywords without stopwords but including dashes.
func Keywords(s string) (results []string) {
for _, w := range Words(s) {
w = strings.ToLower(w)
if _, ok := Stopwords[w]; ok == false {
results = append(results, w)
}
}
return results
}
// UniqueWords sorts and filters a string slice for unique words.
func UniqueWords(words []string) (results []string) {
last := ""
SortCaseInsensitive(words)
for _, w := range words {
w = strings.ToLower(w)
if len(w) < 3 || w == last {
continue
}
last = w
results = append(results, w)
}
return results
}
// RemoveFromWords removes words from a string slice and returns the sorted result.
func RemoveFromWords(words []string, remove string) (results []string) {
remove = strings.ToLower(remove)
last := ""
SortCaseInsensitive(words)
for _, w := range words {
w = strings.ToLower(w)
if len(w) < 3 || w == last || strings.Contains(remove, w) {
continue
}
last = w
results = append(results, w)
}
return results
}
// UniqueKeywords returns a slice of unique and sorted keywords without stopwords.
func UniqueKeywords(s string) (results []string) {
last := ""
words := Keywords(s)
SortCaseInsensitive(words)
for _, w := range words {
w = strings.ToLower(w)
if len(w) < 3 || w == last {
continue
}
last = w
results = append(results, w)
}
return results
}
// Sorts string slice case insensitive.
func SortCaseInsensitive(words []string) {
sort.Slice(words, func(i, j int) bool { return strings.ToLower(words[i]) < strings.ToLower(words[j]) })
}