Metadata: Sanitize bad Unicode strings #2897

Signed-off-by: Michael Mayer <michael@photoprism.app>
This commit is contained in:
Michael Mayer 2022-11-15 14:45:21 +01:00
parent 61b4be2c6f
commit ae130dc500
4 changed files with 50 additions and 13 deletions

View file

@ -9,15 +9,14 @@ import (
"strings" "strings"
"time" "time"
"github.com/photoprism/photoprism/pkg/video"
"github.com/photoprism/photoprism/pkg/projection"
"github.com/photoprism/photoprism/pkg/clean"
"github.com/photoprism/photoprism/pkg/rnd"
"github.com/photoprism/photoprism/pkg/txt"
"github.com/tidwall/gjson" "github.com/tidwall/gjson"
"gopkg.in/photoprism/go-tz.v2/tz" "gopkg.in/photoprism/go-tz.v2/tz"
"github.com/photoprism/photoprism/pkg/clean"
"github.com/photoprism/photoprism/pkg/projection"
"github.com/photoprism/photoprism/pkg/rnd"
"github.com/photoprism/photoprism/pkg/txt"
"github.com/photoprism/photoprism/pkg/video"
) )
const MimeVideoMP4 = "video/mp4" const MimeVideoMP4 = "video/mp4"
@ -47,7 +46,7 @@ func (data *Data) Exiftool(jsonData []byte, originalName string) (err error) {
jsonValues := j.Map() jsonValues := j.Map()
for key, val := range jsonValues { for key, val := range jsonValues {
data.json[key] = val.String() data.json[key] = SanitizeString(val.String())
} }
if fileName, ok := data.json["FileName"]; ok && fileName != "" && originalName != "" && fileName != originalName { if fileName, ok := data.json["FileName"]; ok && fileName != "" && originalName != "" && fileName != originalName {
@ -134,22 +133,22 @@ func (data *Data) Exiftool(jsonData []byte, originalName string) (err error) {
} }
case []string: case []string:
existing := fieldValue.Interface().([]string) existing := fieldValue.Interface().([]string)
fieldValue.Set(reflect.ValueOf(txt.AddToWords(existing, strings.TrimSpace(jsonValue.String())))) fieldValue.Set(reflect.ValueOf(txt.AddToWords(existing, SanitizeString(jsonValue.String()))))
case Keywords: case Keywords:
existing := fieldValue.Interface().(Keywords) existing := fieldValue.Interface().(Keywords)
fieldValue.Set(reflect.ValueOf(txt.AddToWords(existing, strings.TrimSpace(jsonValue.String())))) fieldValue.Set(reflect.ValueOf(txt.AddToWords(existing, SanitizeString(jsonValue.String()))))
case projection.Type: case projection.Type:
if !fieldValue.IsZero() { if !fieldValue.IsZero() {
continue continue
} }
fieldValue.Set(reflect.ValueOf(projection.Type(strings.TrimSpace(jsonValue.String())))) fieldValue.Set(reflect.ValueOf(projection.Type(SanitizeString(jsonValue.String()))))
case string: case string:
if !fieldValue.IsZero() { if !fieldValue.IsZero() {
continue continue
} }
fieldValue.SetString(strings.TrimSpace(jsonValue.String())) fieldValue.SetString(SanitizeString(jsonValue.String()))
case bool: case bool:
if !fieldValue.IsZero() { if !fieldValue.IsZero() {
continue continue

View file

@ -5,6 +5,7 @@ import (
"regexp" "regexp"
"strings" "strings"
"github.com/photoprism/photoprism/pkg/clean"
"github.com/photoprism/photoprism/pkg/fs" "github.com/photoprism/photoprism/pkg/fs"
"github.com/photoprism/photoprism/pkg/txt" "github.com/photoprism/photoprism/pkg/txt"
) )
@ -64,7 +65,7 @@ func SanitizeString(s string) string {
s = strings.TrimSpace(s) s = strings.TrimSpace(s)
return strings.Replace(s, "\"", "", -1) return clean.Unicode(strings.Replace(s, "\"", "", -1))
} }
// SanitizeUID normalizes unique IDs found in XMP or Exif metadata. // SanitizeUID normalizes unique IDs found in XMP or Exif metadata.

21
pkg/clean/unicode.go Normal file
View file

@ -0,0 +1,21 @@
package clean
import "strings"
// Unicode returns a string a valid unicode.
func Unicode(s string) string {
if s == "" {
return ""
}
var b strings.Builder
for _, c := range s {
if c == '\uFFFD' {
continue
}
b.WriteRune(c)
}
return b.String()
}

16
pkg/clean/unicode_test.go Normal file
View file

@ -0,0 +1,16 @@
package clean
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestUnicode(t *testing.T) {
t.Run("Valid", func(t *testing.T) {
assert.Equal(t, "Naïve bonds and futures surge as inflation eases 🚀🚀🚀", Unicode("Naïve bonds and futures surge as inflation eases 🚀🚀🚀"))
})
t.Run("Empty", func(t *testing.T) {
assert.Equal(t, "", Unicode(""))
})
}