Indexer: Improve titles, labels and performance

Signed-off-by: Michael Mayer <michael@liquidbytes.net>
This commit is contained in:
Michael Mayer 2019-12-12 16:31:55 +01:00
parent 4df887fffa
commit 645d02d782
7 changed files with 182 additions and 131 deletions

View file

@ -145,11 +145,11 @@ velvet:
hair slide: hair slide:
label: jewelry label: jewelry
threshold: 0.4 threshold: 0.6
shower curtain: shower curtain:
label: bathroom label: bathroom
threshold: 0.15 threshold: 0.6
windsor tie: windsor tie:
priority: -1 priority: -1
@ -770,18 +770,14 @@ jellyfish:
sea anemone: sea anemone:
categories: categories:
- animal - animal
- coral - water
brain coral: brain coral:
categories: label: nature
- animal
- coral
coral reef: coral reef:
label: nature
threshold: 0.6 threshold: 0.6
categories:
- ocean
- water
flatworm: flatworm:
categories: categories:
@ -2222,9 +2218,10 @@ computer keyboard:
- laptop - laptop
confectionery: confectionery:
label: shop
categorie: categorie:
- sweets - store
- food - commercial
container ship: container ship:
label: ship label: ship
@ -2477,10 +2474,6 @@ jeep:
categories: categories:
- car - car
jigsaw puzzle:
categories:
- game
ladle: ladle:
categories: categories:
- kitchen - kitchen
@ -2794,6 +2787,8 @@ power drill:
- tool - tool
prayer rug: prayer rug:
threshold: 0.6
priority: -1
categories: categories:
- religion - religion
- carpet - carpet
@ -3007,8 +3002,10 @@ submarine:
- boat - boat
suspension bridge: suspension bridge:
label: architecture
categories: categories:
- bridge - bridge
- building
swimming trunks: swimming trunks:
categories: categories:
@ -3419,7 +3416,8 @@ buckeye:
coral fungus: coral fungus:
categories: categories:
- coral - plant
- mushroom
agaric: agaric:
categories: categories:
@ -3537,3 +3535,15 @@ web site:
categories: categories:
- sign - sign
- screenshot - screenshot
crossword puzzle:
threshold: 0.6
priority: -1
categories:
- game
jigsaw puzzle:
threshold: 0.6
priority: -1
categories:
- game

View file

@ -22,6 +22,8 @@ type File struct {
FileType string `gorm:"type:varchar(32)"` FileType string `gorm:"type:varchar(32)"`
FileMime string `gorm:"type:varchar(64)"` FileMime string `gorm:"type:varchar(64)"`
FilePrimary bool FilePrimary bool
FileSidecar bool
FileVideo bool
FileMissing bool FileMissing bool
FileDuplicate bool FileDuplicate bool
FilePortrait bool FilePortrait bool

View file

@ -6,25 +6,33 @@ import (
_ "image/png" _ "image/png"
) )
type FileType string
const ( const (
// FileTypeOther is an unkown file format. // JPEG image file.
FileTypeOther = "unknown" FileTypeJpeg FileType = "jpg"
// FileTypeYaml is a yaml file format. // PNG image file.
FileTypeYaml = "yml" FileTypePng FileType = "png"
// FileTypeJpeg is a jpeg file format. // RAW image file.
FileTypeJpeg = "jpg" FileTypeRaw FileType = "raw"
// FileTypePng is a png file format. // High Efficiency Image File Format.
FileTypePng = "png" FileTypeHEIF FileType = "heif" // High Efficiency Image File Format
// FileTypeRaw is a raw file format. // Movie file.
FileTypeRaw = "raw" FileTypeMovie FileType = "mov"
// FileTypeXmp is an xmp file format. // Adobe XMP sidecar file (XML).
FileTypeXmp = "xmp" FileTypeXMP FileType = "xmp"
// FileTypeAae is an aae file format. // Apple sidecar file (XML).
FileTypeAae = "aae" FileTypeAAE FileType = "aae"
// FileTypeMovie is a movie file format. // XML metadata / config / sidecar file.
FileTypeMovie = "mov" FileTypeXML FileType = "xml"
// FileTypeHEIF High Efficiency Image File Format // YAML metadata / config / sidecar file.
FileTypeHEIF = "heif" // High Efficiency Image File Format FileTypeYaml FileType = "yml"
// Text config / sidecar file.
FileTypeText FileType = "txt"
// Markdown text sidecar file.
FileTypeMarkdown FileType = "md"
// Unknown file format.
FileTypeOther FileType = "unknown"
) )
const ( const (
@ -33,7 +41,7 @@ const (
) )
// FileExtensions lists all the available and supported image file formats. // FileExtensions lists all the available and supported image file formats.
var FileExtensions = map[string]string{ var FileExtensions = map[string]FileType{
".crw": FileTypeRaw, ".crw": FileTypeRaw,
".cr2": FileTypeRaw, ".cr2": FileTypeRaw,
".nef": FileTypeRaw, ".nef": FileTypeRaw,
@ -45,8 +53,8 @@ var FileExtensions = map[string]string{
".jpg": FileTypeJpeg, ".jpg": FileTypeJpeg,
".thm": FileTypeJpeg, ".thm": FileTypeJpeg,
".jpeg": FileTypeJpeg, ".jpeg": FileTypeJpeg,
".xmp": FileTypeXmp, ".xmp": FileTypeXMP,
".aae": FileTypeAae, ".aae": FileTypeAAE,
".heif": FileTypeHEIF, ".heif": FileTypeHEIF,
".heic": FileTypeHEIF, ".heic": FileTypeHEIF,
".3fr": FileTypeRaw, ".3fr": FileTypeRaw,

View file

@ -21,19 +21,18 @@ const (
type IndexResult string type IndexResult string
func (i *Indexer) indexMediaFile(mediaFile *MediaFile, o IndexerOptions) IndexResult { func (i *Indexer) indexMediaFile(m *MediaFile, o IndexerOptions) IndexResult {
var photo entity.Photo var photo entity.Photo
var file, primaryFile entity.File var file, primaryFile entity.File
var isPrimary = false
var exifData *Exif var exifData *Exif
var photoQuery, fileQuery *gorm.DB var photoQuery, fileQuery *gorm.DB
var keywords []string var keywords []string
labels := Labels{} labels := Labels{}
fileBase := mediaFile.Basename() fileBase := m.Basename()
filePath := mediaFile.RelativePath(i.originalsPath()) filePath := m.RelativePath(i.originalsPath())
fileName := mediaFile.RelativeFilename(i.originalsPath()) fileName := m.RelativeFilename(i.originalsPath())
fileHash := mediaFile.Hash() fileHash := m.Hash()
fileChanged := true fileChanged := true
fileExists := false fileExists := false
photoExists := false photoExists := false
@ -50,35 +49,47 @@ func (i *Indexer) indexMediaFile(mediaFile *MediaFile, o IndexerOptions) IndexRe
if !fileExists { if !fileExists {
photoQuery = i.db.Unscoped().First(&photo, "photo_path = ? AND photo_name = ?", filePath, fileBase) photoQuery = i.db.Unscoped().First(&photo, "photo_path = ? AND photo_name = ?", filePath, fileBase)
if photoQuery.Error != nil && mediaFile.HasTimeAndPlace() { if photoQuery.Error != nil && m.HasTimeAndPlace() {
exifData, _ = mediaFile.Exif() exifData, _ = m.Exif()
photoQuery = i.db.Unscoped().First(&photo, "photo_lat = ? AND photo_long = ? AND taken_at = ?", exifData.Lat, exifData.Long, exifData.TakenAt) photoQuery = i.db.Unscoped().First(&photo, "photo_lat = ? AND photo_long = ? AND taken_at = ?", exifData.Lat, exifData.Long, exifData.TakenAt)
} }
} else { } else {
photoQuery = i.db.Unscoped().First(&photo, "id = ?", file.PhotoID) photoQuery = i.db.Unscoped().First(&photo, "id = ?", file.PhotoID)
fileChanged = file.FileHash != fileHash fileChanged = file.FileHash != fileHash
isPrimary = file.FilePrimary
} }
photoExists = photoQuery.Error == nil photoExists = photoQuery.Error == nil
if !fileChanged && photoExists && !photo.TakenAt.IsZero() && o.SkipUnchanged() { if !fileChanged && photoExists && o.SkipUnchanged() {
return indexResultSkipped return indexResultSkipped
} }
if !file.FilePrimary {
if photoExists {
if q := i.db.Where("file_type = 'jpg' AND file_primary = 1 AND photo_id = ?", photo.ID).First(&primaryFile); q.Error != nil {
file.FilePrimary = m.IsJpeg()
}
} else {
file.FilePrimary = m.IsJpeg()
}
}
if file.FilePrimary {
primaryFile = file
}
photo.PhotoPath = filePath photo.PhotoPath = filePath
photo.PhotoName = fileBase photo.PhotoName = fileBase
if isPrimary || !photoExists || photo.TakenAt.IsZero() { if file.FilePrimary {
if jpeg, err := mediaFile.Jpeg(); err == nil {
if fileChanged || o.UpdateLabels || o.UpdateTitle { if fileChanged || o.UpdateLabels || o.UpdateTitle {
// Image classification labels // Image classification labels
labels = i.classifyImage(jpeg) labels = i.classifyImage(m)
} }
if fileChanged || o.UpdateExif { if fileChanged || o.UpdateExif {
// Read UpdateExif data // Read UpdateExif data
if exifData, err := jpeg.Exif(); err == nil { if exifData, err := m.Exif(); err == nil {
photo.PhotoLat = exifData.Lat photo.PhotoLat = exifData.Lat
photo.PhotoLong = exifData.Long photo.PhotoLong = exifData.Long
photo.TakenAt = exifData.TakenAt photo.TakenAt = exifData.TakenAt
@ -87,33 +98,31 @@ func (i *Indexer) indexMediaFile(mediaFile *MediaFile, o IndexerOptions) IndexRe
photo.PhotoAltitude = exifData.Altitude photo.PhotoAltitude = exifData.Altitude
photo.PhotoArtist = exifData.Artist photo.PhotoArtist = exifData.Artist
if exifData.UUID != "" { if len(exifData.UUID) > 15 {
log.Debugf("index: photo uuid \"%s\"", exifData.UUID) log.Debugf("index: file uuid \"%s\"", exifData.UUID)
photo.PhotoUUID = exifData.UUID
} else { file.FileUUID = exifData.UUID
log.Debug("index: no photo uuid in exif data")
} }
} }
} }
if fileChanged || o.UpdateCamera { if fileChanged || o.UpdateCamera {
// Set UpdateCamera, Lens, Focal Length and F Number // Set UpdateCamera, Lens, Focal Length and F Number
photo.Camera = entity.NewCamera(mediaFile.CameraModel(), mediaFile.CameraMake()).FirstOrCreate(i.db) photo.Camera = entity.NewCamera(m.CameraModel(), m.CameraMake()).FirstOrCreate(i.db)
photo.Lens = entity.NewLens(mediaFile.LensModel(), mediaFile.LensMake()).FirstOrCreate(i.db) photo.Lens = entity.NewLens(m.LensModel(), m.LensMake()).FirstOrCreate(i.db)
photo.PhotoFocalLength = mediaFile.FocalLength() photo.PhotoFocalLength = m.FocalLength()
photo.PhotoFNumber = mediaFile.FNumber() photo.PhotoFNumber = m.FNumber()
photo.PhotoIso = mediaFile.Iso() photo.PhotoIso = m.Iso()
photo.PhotoExposure = mediaFile.Exposure() photo.PhotoExposure = m.Exposure()
}
} }
if fileChanged || o.UpdateLocation || o.UpdateTitle { if fileChanged || o.UpdateKeywords || o.UpdateLocation || o.UpdateTitle {
keywords, labels = i.indexLocation(mediaFile, &photo, keywords, labels, fileChanged, o) keywords, labels = i.indexLocation(m, &photo, keywords, labels, fileChanged, o)
} }
if (fileChanged || o.UpdateTitle) && photo.PhotoTitle == "" { if (fileChanged || o.UpdateTitle) && photo.PhotoTitle == "" {
if len(labels) > 0 && labels[0].Priority >= -1 && labels[0].Uncertainty <= 85 && labels[0].Name != "" { if len(labels) > 0 && labels[0].Priority >= -1 && labels[0].Uncertainty <= 85 && labels[0].Name != "" {
photo.PhotoTitle = fmt.Sprintf("%s / %s", util.Title(labels[0].Name), mediaFile.DateCreated().Format("2006")) photo.PhotoTitle = fmt.Sprintf("%s / %s", util.Title(labels[0].Name), m.DateCreated().Format("2006"))
} else if !photo.TakenAtLocal.IsZero() { } else if !photo.TakenAtLocal.IsZero() {
var daytimeString string var daytimeString string
hour := photo.TakenAtLocal.Hour() hour := photo.TakenAtLocal.Hour()
@ -134,14 +143,11 @@ func (i *Indexer) indexMediaFile(mediaFile *MediaFile, o IndexerOptions) IndexRe
log.Infof("index: changed empty photo title to \"%s\"", photo.PhotoTitle) log.Infof("index: changed empty photo title to \"%s\"", photo.PhotoTitle)
} }
}
// This should never happen
if photo.TakenAt.IsZero() || photo.TakenAtLocal.IsZero() { if photo.TakenAt.IsZero() || photo.TakenAtLocal.IsZero() {
photo.TakenAt = mediaFile.DateCreated() photo.TakenAt = m.DateCreated()
photo.TakenAtLocal = photo.TakenAt photo.TakenAtLocal = photo.TakenAt
}
log.Warnf("index: %s has invalid date, set to \"%s\"", filepath.Base(mediaFile.Filename()), photo.TakenAt.Format("2006-01-02 15:04:05"))
} }
if photoExists { if photoExists {
@ -163,35 +169,23 @@ func (i *Indexer) indexMediaFile(mediaFile *MediaFile, o IndexerOptions) IndexRe
if len(labels) > 0 { if len(labels) > 0 {
log.Infof("index: adding labels %+v", labels) log.Infof("index: adding labels %+v", labels)
}
if fileChanged || o.UpdateLabels {
i.addLabels(photo.ID, labels) i.addLabels(photo.ID, labels)
} }
if result := i.db.Where("file_type = 'jpg' AND file_primary = 1 AND photo_id = ?", photo.ID).First(&primaryFile); result.Error != nil {
isPrimary = mediaFile.IsJpeg()
} else {
isPrimary = mediaFile.IsJpeg() && (fileName == primaryFile.FileName || fileHash == primaryFile.FileHash)
}
if (fileChanged || o.UpdateKeywords || o.UpdateTitle) && isPrimary {
photo.IndexKeywords(keywords, i.db)
}
file.PhotoID = photo.ID file.PhotoID = photo.ID
file.PhotoUUID = photo.PhotoUUID file.PhotoUUID = photo.PhotoUUID
file.FilePrimary = isPrimary file.FileSidecar = m.IsSidecar()
file.FileVideo = m.IsVideo()
file.FileMissing = false file.FileMissing = false
file.FileName = fileName file.FileName = fileName
file.FileHash = fileHash file.FileHash = fileHash
file.FileType = mediaFile.Type() file.FileType = string(m.Type())
file.FileMime = mediaFile.MimeType() file.FileMime = m.MimeType()
file.FileOrientation = mediaFile.Orientation() file.FileOrientation = m.Orientation()
if fileChanged || o.UpdateColors { if m.IsJpeg() && (fileChanged || o.UpdateColors) {
// Color information // Color information
if p, err := mediaFile.Colors(i.thumbnailsPath()); err == nil { if p, err := m.Colors(i.thumbnailsPath()); err == nil {
file.FileMainColor = p.MainColor.Name() file.FileMainColor = p.MainColor.Name()
file.FileColors = p.Colors.Hex() file.FileColors = p.Colors.Hex()
file.FileLuminance = p.Luminance.Hex() file.FileLuminance = p.Luminance.Hex()
@ -199,15 +193,20 @@ func (i *Indexer) indexMediaFile(mediaFile *MediaFile, o IndexerOptions) IndexRe
} }
} }
if fileChanged || o.UpdateSize { if m.IsJpeg() && (fileChanged || o.UpdateSize) {
if mediaFile.Width() > 0 && mediaFile.Height() > 0 { if m.Width() > 0 && m.Height() > 0 {
file.FileWidth = mediaFile.Width() file.FileWidth = m.Width()
file.FileHeight = mediaFile.Height() file.FileHeight = m.Height()
file.FileAspectRatio = mediaFile.AspectRatio() file.FileAspectRatio = m.AspectRatio()
file.FilePortrait = mediaFile.Width() < mediaFile.Height() file.FilePortrait = m.Width() < m.Height()
} }
} }
if file.FilePrimary && (fileChanged || o.UpdateKeywords || o.UpdateTitle) {
keywords = append(keywords, file.FileMainColor)
photo.IndexKeywords(keywords, i.db)
}
if fileQuery.Error == nil { if fileQuery.Error == nil {
i.db.Unscoped().Save(&file) i.db.Unscoped().Save(&file)
return indexResultUpdated return indexResultUpdated
@ -327,11 +326,13 @@ func (i *Indexer) indexLocation(mediaFile *MediaFile, photo *entity.Photo, keywo
labels = append(labels, NewLocationLabel(location.LocCountry, 0, -2)) labels = append(labels, NewLocationLabel(location.LocCountry, 0, -2))
} }
if location.LocCategory != "" { // TODO: Needs refactoring
if location.LocCategory != "" && location.LocCategory != "highway" && location.LocCategory != "tourism" {
labels = append(labels, NewLocationLabel(location.LocCategory, 0, -2)) labels = append(labels, NewLocationLabel(location.LocCategory, 0, -2))
} }
if location.LocType != "" { // TODO: Needs refactoring
if location.LocType != "" && location.LocType != "tertiary" && location.LocType != "attraction" {
labels = append(labels, NewLocationLabel(location.LocType, 0, -1)) labels = append(labels, NewLocationLabel(location.LocType, 0, -1))
} }

View file

@ -23,7 +23,7 @@ type MediaFile struct {
dateCreated time.Time dateCreated time.Time
timeZone string timeZone string
hash string hash string
fileType string fileType FileType
mimeType string mimeType string
perceptualHash string perceptualHash string
width int width int
@ -484,12 +484,12 @@ func (m *MediaFile) Copy(destinationFilename string) error {
return nil return nil
} }
// Extension returns the extension of a mediafile. // Extension returns the filename extension of this media file.
func (m *MediaFile) Extension() string { func (m *MediaFile) Extension() string {
return strings.ToLower(filepath.Ext(m.filename)) return strings.ToLower(filepath.Ext(m.filename))
} }
// IsJpeg return true if the given mediafile is of mimetype Jpeg. // IsJpeg return true if this media file is a JPEG image.
func (m *MediaFile) IsJpeg() bool { func (m *MediaFile) IsJpeg() bool {
// Don't import/use existing thumbnail files (we create our own) // Don't import/use existing thumbnail files (we create our own)
if m.Extension() == ".thm" { if m.Extension() == ".thm" {
@ -500,30 +500,60 @@ func (m *MediaFile) IsJpeg() bool {
} }
// Type returns the type of the media file. // Type returns the type of the media file.
func (m *MediaFile) Type() string { func (m *MediaFile) Type() FileType {
return FileExtensions[m.Extension()] return FileExtensions[m.Extension()]
} }
// HasType checks whether a media file is of a given type. // HasType returns true if this media file is of a given type.
func (m *MediaFile) HasType(typeString string) bool { func (m *MediaFile) HasType(t FileType) bool {
if typeString == FileTypeJpeg { if t == FileTypeJpeg {
return m.IsJpeg() return m.IsJpeg()
} }
return m.Type() == typeString return m.Type() == t
} }
// IsRaw check whether the given media file a RAW file. // IsRaw returns true if this media file a RAW file.
func (m *MediaFile) IsRaw() bool { func (m *MediaFile) IsRaw() bool {
return m.HasType(FileTypeRaw) return m.HasType(FileTypeRaw)
} }
// IsHEIF check if a given media file is a High Efficiency Image File Format file. // IsHEIF returns true if this media file is a High Efficiency Image File Format file.
func (m *MediaFile) IsHEIF() bool { func (m *MediaFile) IsHEIF() bool {
return m.HasType(FileTypeHEIF) return m.HasType(FileTypeHEIF)
} }
// IsPhoto checks if a media file is a photo / image. // IsSidecar returns true if this media file is a sidecar file (containing metadata).
func (m *MediaFile) IsSidecar() bool {
switch m.Type() {
case FileTypeXMP:
return true
case FileTypeAAE:
return true
case FileTypeXML:
return true
case FileTypeYaml:
return true
case FileTypeText:
return true
case FileTypeMarkdown:
return true
default:
return false
}
}
// IsVideo returns true if this media file is a video file.
func (m *MediaFile) IsVideo() bool {
switch m.Type() {
case FileTypeMovie:
return true
}
return false
}
// IsPhoto checks if this media file is a photo / image.
func (m *MediaFile) IsPhoto() bool { func (m *MediaFile) IsPhoto() bool {
return m.IsJpeg() || m.IsRaw() || m.IsHEIF() return m.IsJpeg() || m.IsRaw() || m.IsHEIF()
} }

View file

@ -147,7 +147,7 @@ func (m *MediaFile) Resample(path string, typeName string) (img image.Image, err
return imaging.Open(filename, imaging.AutoOrientation(true)) return imaging.Open(filename, imaging.AutoOrientation(true))
} }
func ResampleOptions(opts ...ResampleOption) (method ResampleOption, filter imaging.ResampleFilter, format string) { func ResampleOptions(opts ...ResampleOption) (method ResampleOption, filter imaging.ResampleFilter, format FileType) {
method = ResampleFit method = ResampleFit
filter = imaging.Lanczos filter = imaging.Lanczos
format = FileTypeJpeg format = FileTypeJpeg
@ -280,7 +280,7 @@ func CreateThumbnail(img image.Image, fileName string, width, height int, opts .
var saveOption imaging.EncodeOption var saveOption imaging.EncodeOption
if filepath.Ext(fileName) == "."+FileTypePng { if filepath.Ext(fileName) == "."+string(FileTypePng) {
saveOption = imaging.PNGCompressionLevel(png.DefaultCompression) saveOption = imaging.PNGCompressionLevel(png.DefaultCompression)
} else if width <= 150 && height <= 150 { } else if width <= 150 && height <= 150 {
saveOption = imaging.JPEGQuality(JpegQualitySmall) saveOption = imaging.JPEGQuality(JpegQualitySmall)

View file

@ -178,7 +178,7 @@ func (s *Repo) Photos(f form.PhotoSearch) (results []PhotoResult, err error) {
log.Infof("search: label \"%s\" not found, using fuzzy search", f.Query) log.Infof("search: label \"%s\" not found, using fuzzy search", f.Query)
q = q.Joins("LEFT JOIN labels ON photos_labels.label_id = labels.id"). q = q.Joins("LEFT JOIN labels ON photos_labels.label_id = labels.id").
Where("labels.label_name LIKE ? OR keywords.keyword LIKE ? OR files.file_main_color = ?", likeString, likeString, lowerString) Where("labels.label_name LIKE ? OR keywords.keyword LIKE ?", likeString, likeString)
} else { } else {
labelIds = append(labelIds, label.ID) labelIds = append(labelIds, label.ID)
@ -190,7 +190,7 @@ func (s *Repo) Photos(f form.PhotoSearch) (results []PhotoResult, err error) {
log.Infof("search: label \"%s\" includes %d categories", label.LabelName, len(labelIds)) log.Infof("search: label \"%s\" includes %d categories", label.LabelName, len(labelIds))
q = q.Where("photos_labels.label_id IN (?) OR keywords.keyword LIKE ? OR files.file_main_color = ?", labelIds, likeString, lowerString) q = q.Where("photos_labels.label_id IN (?) OR keywords.keyword LIKE ?", labelIds, likeString)
} }
} }