People: Add "faces audit" command #22

This commit is contained in:
Michael Mayer 2021-08-28 15:26:26 +02:00
parent 327cd20a50
commit 0503011f87
8 changed files with 195 additions and 3 deletions

View file

@ -23,6 +23,17 @@ var FacesCommand = cli.Command{
Usage: "Shows stats on face samples",
Action: facesStatsAction,
},
{
Name: "audit",
Usage: "Conducts a data integrity audit",
Flags: []cli.Flag{
cli.BoolFlag{
Name: "fix, f",
Usage: "issues will be fixed automatically",
},
},
Action: facesAuditAction,
},
{
Name: "reset",
Usage: "Resets recognized faces",
@ -65,7 +76,38 @@ func facesStatsAction(ctx *cli.Context) error {
w := service.Faces()
if err := w.Analyze(); err != nil {
if err := w.Stats(); err != nil {
return err
} else {
elapsed := time.Since(start)
log.Infof("completed in %s", elapsed)
}
conf.Shutdown()
return nil
}
// facesAuditAction shows stats on face embeddings.
func facesAuditAction(ctx *cli.Context) error {
start := time.Now()
conf := config.NewConfig(ctx)
service.SetConfig(conf)
_, cancel := context.WithCancel(context.Background())
defer cancel()
if err := conf.Init(); err != nil {
return err
}
conf.InitDb()
w := service.Faces()
if err := w.Audit(ctx.Bool("fix")); err != nil {
return err
} else {
elapsed := time.Since(start)

View file

@ -20,6 +20,15 @@ const (
SrcLocation = classify.SrcLocation
)
// SrcString returns a source string for logging.
func SrcString(src string) string {
if src == SrcAuto {
return "auto"
}
return src
}
// SrcPriority maps source priorities.
var SrcPriority = Priorities{
SrcAuto: 1,

View file

@ -0,0 +1,79 @@
package photoprism
import (
"github.com/photoprism/photoprism/internal/entity"
"github.com/photoprism/photoprism/internal/face"
"github.com/photoprism/photoprism/internal/query"
"github.com/photoprism/photoprism/pkg/txt"
)
// Audit face clusters and subjects.
func (w *Faces) Audit(fix bool) (err error) {
invalidFaces, invalidSubj, err := query.MarkersWithInvalidReferences()
if err != nil {
return err
}
subj, err := query.SubjectMap()
if err != nil {
log.Error(err)
}
log.Infof("%d subjects indexed", len(subj))
log.Infof("%d markers with invalid subjects", len(invalidSubj))
log.Infof("%d markers with invalid faces", len(invalidFaces))
conflicts := 0
faces, err := query.Faces(true, "")
if err != nil {
return err
}
for _, f1 := range faces {
for _, f2 := range faces {
if ok, dist := f1.Match(entity.Embeddings{f2.Embedding()}); ok {
if f1.SubjectUID == f2.SubjectUID {
continue
}
conflicts++
r := f1.SampleRadius + face.ClusterRadius
log.Infof("%s is ambiguous at dist %f, Ø %f from %d samples, collision Ø %f", f1.ID, dist, r, f1.Samples, f1.CollisionRadius)
if f1.SubjectUID != "" {
log.Infof("%s has subject %s (%s %s)", f1.ID, txt.Quote(subj[f1.SubjectUID].SubjectName), f1.SubjectUID, entity.SrcString(f1.FaceSrc))
} else {
log.Infof("%s has no subject (%s)", f1.ID, entity.SrcString(f1.FaceSrc))
}
if f2.SubjectUID != "" {
log.Infof("%s has subject %s (%s %s)", f2.ID, txt.Quote(subj[f2.SubjectUID].SubjectName), f2.SubjectUID, entity.SrcString(f2.FaceSrc))
} else {
log.Infof("%s has no subject (%s)", f2.ID, entity.SrcString(f2.FaceSrc))
}
if !fix {
// Do nothing.
} else if reported, err := f1.ReportCollision(entity.Embeddings{f2.Embedding()}); err != nil {
log.Error(err)
} else if reported {
log.Infof("collision has been reported")
} else {
log.Infof("collision has not been reported")
}
}
}
}
log.Infof("%d ambiguous faces clusters", conflicts)
return nil
}

View file

@ -6,8 +6,8 @@ import (
"github.com/photoprism/photoprism/pkg/clusters"
)
// Analyze face embeddings.
func (w *Faces) Analyze() (err error) {
// Stats shows statistics on face embeddings.
func (w *Faces) Stats() (err error) {
if embeddings, err := query.Embeddings(true, false, 0); err != nil {
return err
} else if samples := len(embeddings); samples == 0 {

View file

@ -118,6 +118,26 @@ func RemoveInvalidMarkerReferences() (removed int64, err error) {
return removed, nil
}
// MarkersWithInvalidReferences finds markers with invalid references.
func MarkersWithInvalidReferences() (faces entity.Markers, subjects entity.Markers, err error) {
// Find markers with invalid face IDs.
if res := Db().
Where("marker_type = ?", entity.MarkerFace).
Where(fmt.Sprintf("face_id <> '' AND face_id NOT IN (SELECT id FROM %s)", entity.Face{}.TableName())).
Find(&faces); res.Error != nil {
err = res.Error
}
// Find markers with invalid subject UIDs.
if res := Db().
Where(fmt.Sprintf("subject_uid <> '' AND subject_uid NOT IN (SELECT subject_uid FROM %s)", entity.Subject{}.TableName())).
Find(&subjects); res.Error != nil {
err = res.Error
}
return faces, subjects, err
}
// ResetFaceMarkerMatches removes automatically added subject and face references from the markers table.
func ResetFaceMarkerMatches() (removed int64, err error) {
res := Db().Model(&entity.Marker{}).

View file

@ -80,6 +80,15 @@ func TestRemoveInvalidMarkerReferences(t *testing.T) {
assert.GreaterOrEqual(t, affected, int64(1))
}
func TestMarkersWithInvalidReferences(t *testing.T) {
f, s, err := MarkersWithInvalidReferences()
assert.NoError(t, err)
assert.GreaterOrEqual(t, len(f), 0)
assert.GreaterOrEqual(t, len(s), 0)
}
func TestCountUnmatchedFaceMarkers(t *testing.T) {
n, threshold := CountUnmatchedFaceMarkers()

View file

@ -18,6 +18,25 @@ func Subjects(limit, offset int) (result entity.Subjects, err error) {
return result, err
}
// SubjectMap returns a map of subjects indexed by UID.
func SubjectMap() (result map[string]entity.Subject, err error) {
result = make(map[string]entity.Subject)
var subj entity.Subjects
stmt := Db().Where("subject_src <> ?", entity.SrcDefault)
if err = stmt.Find(&subj).Error; err != nil {
return result, err
}
for _, s := range subj {
result[s.SubjectUID] = s
}
return result, err
}
// RemoveDanglingMarkerSubjects permanently deletes dangling marker subjects from the index.
func RemoveDanglingMarkerSubjects() (removed int64, err error) {
res := UnscopedDb().

View file

@ -22,6 +22,20 @@ func TestSubjects(t *testing.T) {
}
}
func TestSubjectMap(t *testing.T) {
results, err := SubjectMap()
if err != nil {
t.Fatal(err)
}
assert.GreaterOrEqual(t, len(results), 1)
for _, val := range results {
assert.IsType(t, entity.Subject{}, val)
}
}
func TestRemoveDanglingMarkerSubjects(t *testing.T) {
affected, err := RemoveDanglingMarkerSubjects()