Metadata: Sanitize bad Unicode strings #2897

Signed-off-by: Michael Mayer <michael@photoprism.app>
This commit is contained in:
Michael Mayer 2022-11-15 14:45:21 +01:00
parent 61b4be2c6f
commit ae130dc500
4 changed files with 50 additions and 13 deletions

View file

@ -9,15 +9,14 @@ import (
"strings"
"time"
"github.com/photoprism/photoprism/pkg/video"
"github.com/photoprism/photoprism/pkg/projection"
"github.com/photoprism/photoprism/pkg/clean"
"github.com/photoprism/photoprism/pkg/rnd"
"github.com/photoprism/photoprism/pkg/txt"
"github.com/tidwall/gjson"
"gopkg.in/photoprism/go-tz.v2/tz"
"github.com/photoprism/photoprism/pkg/clean"
"github.com/photoprism/photoprism/pkg/projection"
"github.com/photoprism/photoprism/pkg/rnd"
"github.com/photoprism/photoprism/pkg/txt"
"github.com/photoprism/photoprism/pkg/video"
)
const MimeVideoMP4 = "video/mp4"
@ -47,7 +46,7 @@ func (data *Data) Exiftool(jsonData []byte, originalName string) (err error) {
jsonValues := j.Map()
for key, val := range jsonValues {
data.json[key] = val.String()
data.json[key] = SanitizeString(val.String())
}
if fileName, ok := data.json["FileName"]; ok && fileName != "" && originalName != "" && fileName != originalName {
@ -134,22 +133,22 @@ func (data *Data) Exiftool(jsonData []byte, originalName string) (err error) {
}
case []string:
existing := fieldValue.Interface().([]string)
fieldValue.Set(reflect.ValueOf(txt.AddToWords(existing, strings.TrimSpace(jsonValue.String()))))
fieldValue.Set(reflect.ValueOf(txt.AddToWords(existing, SanitizeString(jsonValue.String()))))
case Keywords:
existing := fieldValue.Interface().(Keywords)
fieldValue.Set(reflect.ValueOf(txt.AddToWords(existing, strings.TrimSpace(jsonValue.String()))))
fieldValue.Set(reflect.ValueOf(txt.AddToWords(existing, SanitizeString(jsonValue.String()))))
case projection.Type:
if !fieldValue.IsZero() {
continue
}
fieldValue.Set(reflect.ValueOf(projection.Type(strings.TrimSpace(jsonValue.String()))))
fieldValue.Set(reflect.ValueOf(projection.Type(SanitizeString(jsonValue.String()))))
case string:
if !fieldValue.IsZero() {
continue
}
fieldValue.SetString(strings.TrimSpace(jsonValue.String()))
fieldValue.SetString(SanitizeString(jsonValue.String()))
case bool:
if !fieldValue.IsZero() {
continue

View file

@ -5,6 +5,7 @@ import (
"regexp"
"strings"
"github.com/photoprism/photoprism/pkg/clean"
"github.com/photoprism/photoprism/pkg/fs"
"github.com/photoprism/photoprism/pkg/txt"
)
@ -64,7 +65,7 @@ func SanitizeString(s string) string {
s = strings.TrimSpace(s)
return strings.Replace(s, "\"", "", -1)
return clean.Unicode(strings.Replace(s, "\"", "", -1))
}
// SanitizeUID normalizes unique IDs found in XMP or Exif metadata.

21
pkg/clean/unicode.go Normal file
View file

@ -0,0 +1,21 @@
package clean
import "strings"
// Unicode returns a string a valid unicode.
func Unicode(s string) string {
if s == "" {
return ""
}
var b strings.Builder
for _, c := range s {
if c == '\uFFFD' {
continue
}
b.WriteRune(c)
}
return b.String()
}

16
pkg/clean/unicode_test.go Normal file
View file

@ -0,0 +1,16 @@
package clean
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestUnicode(t *testing.T) {
t.Run("Valid", func(t *testing.T) {
assert.Equal(t, "Naïve bonds and futures surge as inflation eases 🚀🚀🚀", Unicode("Naïve bonds and futures surge as inflation eases 🚀🚀🚀"))
})
t.Run("Empty", func(t *testing.T) {
assert.Equal(t, "", Unicode(""))
})
}