From 818019a7ec06acba41e2a59ca91e05264b6c073a Mon Sep 17 00:00:00 2001 From: Michael Mayer Date: Tue, 30 Apr 2019 13:17:01 +0200 Subject: [PATCH] Use nasnet mobile instead of inception v1 for image classification --- .dockerignore | 3 +- .gitignore | 3 +- Dockerfile | 2 +- Makefile | 2 +- assets/tensorflow/.gitignore | 2 - docker/development/Dockerfile | 4 +- docker/photoprism/Dockerfile | 2 +- go.mod | 2 +- go.sum | 2 + internal/photoprism/tensorflow.go | 136 ++++++++++++------------- internal/photoprism/tensorflow_test.go | 58 +++++++++-- scripts/download-assets.sh | 15 +++ scripts/download-inception.sh | 15 --- 13 files changed, 141 insertions(+), 105 deletions(-) delete mode 100644 assets/tensorflow/.gitignore create mode 100755 scripts/download-assets.sh delete mode 100755 scripts/download-inception.sh diff --git a/.dockerignore b/.dockerignore index 4fc04ba86..9b7d4b39c 100644 --- a/.dockerignore +++ b/.dockerignore @@ -5,10 +5,11 @@ /frontend/node_modules/* /assets/server/public/build/* /assets/testdata +/assets/tensorflow Dockerfile /photoprism docker-compose* /coverage.* .dockerignore .idea -.DS_Store \ No newline at end of file +.DS_Store diff --git a/.gitignore b/.gitignore index 2a4e12773..e5774d9b5 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ /frontend/node_modules/* /frontend/tests/result.html /assets/testdata +/assets/tensorflow *.log # Binaries for programs and plugins @@ -39,4 +40,4 @@ Thumbs.db .c9revisions .settings .swp -.tmp \ No newline at end of file +.tmp diff --git a/Dockerfile b/Dockerfile index 8e16ccf5f..f80c79c10 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM photoprism/development:20190418 +FROM photoprism/development:20190430 # Set up project directory WORKDIR "/go/src/github.com/photoprism/photoprism" diff --git a/Makefile b/Makefile index b9add47eb..98c7713ba 100644 --- a/Makefile +++ b/Makefile @@ -43,7 +43,7 @@ test-coverage: clean: rm -f $(BINARY_NAME) download: - scripts/download-inception.sh + scripts/download-assets.sh deploy-photoprism: scripts/docker-build.sh photoprism $(DOCKER_TAG) scripts/docker-push.sh photoprism $(DOCKER_TAG) diff --git a/assets/tensorflow/.gitignore b/assets/tensorflow/.gitignore deleted file mode 100644 index c96a04f00..000000000 --- a/assets/tensorflow/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -* -!.gitignore \ No newline at end of file diff --git a/docker/development/Dockerfile b/docker/development/Dockerfile index f1f91aaf9..e2badf7f6 100644 --- a/docker/development/Dockerfile +++ b/docker/development/Dockerfile @@ -86,8 +86,8 @@ RUN mkdir -p "$GOPATH/src" "$GOPATH/bin" && chmod -R 777 "$GOPATH" # Download TensorFlow model and test files RUN rm -rf /tmp/* && mkdir -p /tmp/photoprism -RUN wget "https://storage.googleapis.com/download.tensorflow.org/models/inception5h.zip" -O /tmp/photoprism/inception.zip -RUN wget "https://www.dropbox.com/s/na9p9wwt98l7m5b/import.zip?dl=1" -O /tmp/photoprism/testdata.zip +RUN wget "https://dl.photoprism.org/tensorflow/nasnet.zip" -O /tmp/photoprism/nasnet.zip +RUN wget "https://dl.photoprism.org/fixtures/test.zip" -O /tmp/photoprism/testdata.zip # Install goimports RUN env GO111MODULE=off /usr/local/go/bin/go get golang.org/x/tools/cmd/goimports diff --git a/docker/photoprism/Dockerfile b/docker/photoprism/Dockerfile index 477fccc44..4f178541d 100644 --- a/docker/photoprism/Dockerfile +++ b/docker/photoprism/Dockerfile @@ -1,4 +1,4 @@ -FROM photoprism/development:20190418 as build +FROM photoprism/development:20190430 as build # Set up project directory WORKDIR "/go/src/github.com/photoprism/photoprism" diff --git a/go.mod b/go.mod index bd6dadaaf..be1bc0149 100644 --- a/go.mod +++ b/go.mod @@ -64,7 +64,7 @@ require ( github.com/spaolacci/murmur3 v0.0.0-20180118202830-f09979ecbc72 // indirect github.com/steakknife/hamming v0.0.0-20180906055917-c99c65617cd3 github.com/stretchr/testify v1.2.2 - github.com/tensorflow/tensorflow v1.12.0 + github.com/tensorflow/tensorflow v1.13.1 github.com/tmc/grpc-websocket-proxy v0.0.0-20171017195756-830351dc03c6 // indirect github.com/twinj/uuid v1.0.0 // indirect github.com/unrolled/render v0.0.0-20181210145518-4c664cb3ad2f // indirect diff --git a/go.sum b/go.sum index 1ddb785fc..fbdc5e314 100644 --- a/go.sum +++ b/go.sum @@ -224,6 +224,8 @@ github.com/stretchr/testify v1.2.2 h1:bSDNvY7ZPG5RlJ8otE/7V6gMiyenm9RtJ7IUVIAoJ1 github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/tensorflow/tensorflow v1.12.0 h1:fT4okrN4BkpgotWmDwS56wM6BdkRpTL0lLMzvkM+bLo= github.com/tensorflow/tensorflow v1.12.0/go.mod h1:itOSERT4trABok4UOoG+X4BoKds9F3rIsySdn+Lvu90= +github.com/tensorflow/tensorflow v1.13.1 h1:ygn0+ztXusm6RGVP4Od5IF+8h5sAgD5qbeTvqYyMnjo= +github.com/tensorflow/tensorflow v1.13.1/go.mod h1:itOSERT4trABok4UOoG+X4BoKds9F3rIsySdn+Lvu90= github.com/tmc/grpc-websocket-proxy v0.0.0-20171017195756-830351dc03c6 h1:lYIiVDtZnyTWlNwiAxLj0bbpTcx1BWCFhXjfsvmPdNc= github.com/tmc/grpc-websocket-proxy v0.0.0-20171017195756-830351dc03c6/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= github.com/twinj/uuid v0.0.0-20150629100731-70cac2bcd273/go.mod h1:mMgcE1RHFUFqe5AfiwlINXisXfDGro23fWdPUfOMjRY= diff --git a/internal/photoprism/tensorflow.go b/internal/photoprism/tensorflow.go index 096b51f70..e31521918 100644 --- a/internal/photoprism/tensorflow.go +++ b/internal/photoprism/tensorflow.go @@ -2,20 +2,24 @@ package photoprism import ( "bufio" + "bytes" "errors" + "image" "io/ioutil" "log" + "math" "os" "sort" + "github.com/disintegration/imaging" tf "github.com/tensorflow/tensorflow/tensorflow/go" - "github.com/tensorflow/tensorflow/tensorflow/go/op" ) // TensorFlow if a tensorflow wrapper given a graph, labels and a modelPath. type TensorFlow struct { modelPath string graph *tf.Graph + model *tf.SavedModel labels []string } @@ -30,6 +34,10 @@ type TensorFlowLabel struct { Probability float32 `json:"probability"` } +func (a *TensorFlowLabel) Percent() int { + return int(math.Round(float64(a.Probability * 100))) +} + // TensorFlowLabels is a slice of tensorflow labels. type TensorFlowLabels []TensorFlowLabel @@ -37,6 +45,12 @@ func (a TensorFlowLabels) Len() int { return len(a) } func (a TensorFlowLabels) Swap(i, j int) { a[i], a[j] = a[j], a[i] } func (a TensorFlowLabels) Less(i, j int) bool { return a[i].Probability > a[j].Probability } +func (t *TensorFlow) closeSession(s *tf.Session) { + if err := s.Close(); err != nil { + log.Print(err) + } +} + // GetImageTagsFromFile returns tags for a jpeg image file. func (t *TensorFlow) GetImageTagsFromFile(filename string) (result []TensorFlowLabel, err error) { imageBuffer, err := ioutil.ReadFile(filename) @@ -45,43 +59,41 @@ func (t *TensorFlow) GetImageTagsFromFile(filename string) (result []TensorFlowL return nil, err } - return t.GetImageTags(string(imageBuffer)) + return t.GetImageTags(imageBuffer) } // GetImageTags returns tags for a jpeg image string. -func (t *TensorFlow) GetImageTags(image string) (result []TensorFlowLabel, err error) { +func (t *TensorFlow) GetImageTags(img []byte) (result []TensorFlowLabel, err error) { if err := t.loadModel(); err != nil { return nil, err } // Make tensor - tensor, err := t.makeTensorFromImage(image, "jpeg") + tensor, err := t.makeTensorFromImage(img, "jpeg") if err != nil { return nil, errors.New("invalid image") } // Run inference - session, err := tf.NewSession(t.graph, nil) - if err != nil { - log.Fatal(err) - } - - defer session.Close() - - output, err := session.Run( + output, err := t.model.Session.Run( map[tf.Output]*tf.Tensor{ - t.graph.Operation("input").Output(0): tensor, + t.model.Graph.Operation("input_1").Output(0): tensor, }, []tf.Output{ - t.graph.Operation("output").Output(0), + t.model.Graph.Operation("predictions/Softmax").Output(0), }, nil) if err != nil { - return nil, errors.New("could not run inference") + return result, errors.New("could not run inference") } + if len(output) < 1 { + return result, errors.New("result is empty") + } + + // Return best labels return t.findBestLabels(output[0].Value().([][]float32)[0]), nil } @@ -92,22 +104,22 @@ func (t *TensorFlow) loadModel() error { return nil } - // Load inception model - model, err := ioutil.ReadFile(t.modelPath + "/inception/tensorflow_inception_graph.pb") + // Load model + model, err := tf.LoadSavedModel(t.modelPath + "/nasnet", []string{"photoprism"}, nil) + if err != nil { return err } - t.graph = tf.NewGraph() - if err := t.graph.Import(model, ""); err != nil { - return err - } + + t.model = model // Load labels - labelsFile, err := os.Open(t.modelPath + "/inception/imagenet_comp_graph_label_strings.txt") + labelsFile, err := os.Open(t.modelPath + "/nasnet/labels.txt") if err != nil { return err } defer labelsFile.Close() + scanner := bufio.NewScanner(labelsFile) // Labels are separated by newlines @@ -137,61 +149,39 @@ func (t *TensorFlow) findBestLabels(probabilities []float32) []TensorFlowLabel { return resultLabels[:5] } -func (t *TensorFlow) makeTensorFromImage(image string, imageFormat string) (*tf.Tensor, error) { - tensor, err := tf.NewTensor(image) +func (t *TensorFlow) makeTensorFromImage(image []byte, imageFormat string) (*tf.Tensor, error) { + img, err:= imaging.Decode(bytes.NewReader(image), imaging.AutoOrientation(true)) + if err != nil { return nil, err } - graph, input, output, err := t.makeTransformImageGraph(imageFormat) - if err != nil { - return nil, err - } - session, err := tf.NewSession(graph, nil) - if err != nil { - return nil, err - } - defer session.Close() - normalized, err := session.Run( - map[tf.Output]*tf.Tensor{input: tensor}, - []tf.Output{output}, - nil) - if err != nil { - return nil, err - } - return normalized[0], nil + + width, height := 224, 224 + + img = imaging.Fill(img, width, height, imaging.Center, imaging.CatmullRom) + + return imageToTensorTF(img, width, height) } -// Creates a graph to decode, resize and normalize an image -func (t *TensorFlow) makeTransformImageGraph(imageFormat string) ( - graph *tf.Graph, input, output tf.Output, err error) { - const ( - H, W = 224, 224 - Mean = float32(117) - Scale = float32(1) - ) - s := op.NewScope() - input = op.Placeholder(s, tf.String) - // Decode PNG or JPEG - var decode tf.Output - if imageFormat == "png" { - decode = op.DecodePng(s, input, op.DecodePngChannels(3)) - } else { - decode = op.DecodeJpeg(s, input, op.DecodeJpegChannels(3)) - } - // Div and Sub perform (value-Mean)/Scale for each pixel - output = op.Div(s, - op.Sub(s, - // Resize to 224x224 with bi-linear interpolation - op.ResizeBilinear(s, - // Create a batch containing a single image - op.ExpandDims(s, - // Use decoded pixel values - op.Cast(s, decode, tf.Float), - op.Const(s.SubScope("make_batch"), int32(0))), - op.Const(s.SubScope("size"), []int32{H, W})), - op.Const(s.SubScope("mean"), Mean)), - op.Const(s.SubScope("scale"), Scale)) - graph, err = s.Finalize() +func imageToTensorTF(img image.Image, imageHeight, imageWidth int) (*tf.Tensor, error) { + var tfImage [1][][][3]float32 - return graph, input, output, err + for j := 0; j < imageHeight; j++ { + tfImage[0] = append(tfImage[0], make([][3]float32, imageWidth)) + } + + for i := 0; i < imageWidth; i++ { + for j := 0; j < imageHeight; j++ { + r, g, b, _ := img.At(i, j).RGBA() + tfImage[0][j][i][0] = convertTF(r) + tfImage[0][j][i][1] = convertTF(g) + tfImage[0][j][i][2] = convertTF(b) + } + } + + return tf.NewTensor(tfImage) +} + +func convertTF(value uint32) float32 { + return (float32(value>>8) - float32(127.5)) / float32(127.5) } diff --git a/internal/photoprism/tensorflow_test.go b/internal/photoprism/tensorflow_test.go index 21b8a6996..5224a0e63 100644 --- a/internal/photoprism/tensorflow_test.go +++ b/internal/photoprism/tensorflow_test.go @@ -2,7 +2,6 @@ package photoprism import ( "io/ioutil" - "math" "testing" "github.com/photoprism/photoprism/internal/test" @@ -18,15 +17,24 @@ func TestTensorFlow_GetImageTagsFromFile(t *testing.T) { result, err := tensorFlow.GetImageTagsFromFile(conf.ImportPath() + "/iphone/IMG_6788.JPG") - assert.NotNil(t, result) assert.Nil(t, err) + + if err != nil { + t.Log(err.Error()) + t.Fail() + } + + assert.NotNil(t, result) assert.IsType(t, []TensorFlowLabel{}, result) assert.Equal(t, 5, len(result)) - assert.Equal(t, "tabby", result[0].Label) + t.Log(result) + + assert.Equal(t, "tabby, cat", result[0].Label) assert.Equal(t, "tiger cat", result[1].Label) - assert.Equal(t, float64(0.165), math.Round(float64(result[1].Probability)*1000)/1000) + assert.Equal(t, 63, result[0].Percent()) + assert.Equal(t, 16, result[1].Percent()) } func TestTensorFlow_GetImageTags(t *testing.T) { @@ -43,16 +51,52 @@ func TestTensorFlow_GetImageTags(t *testing.T) { if imageBuffer, err := ioutil.ReadFile(conf.ImportPath() + "/iphone/IMG_6788.JPG"); err != nil { t.Error(err) } else { - result, err := tensorFlow.GetImageTags(string(imageBuffer)) + result, err := tensorFlow.GetImageTags(imageBuffer) + + t.Log(result) assert.NotNil(t, result) + assert.Nil(t, err) assert.IsType(t, []TensorFlowLabel{}, result) assert.Equal(t, 5, len(result)) - assert.Equal(t, "tabby", result[0].Label) + assert.Equal(t, "tabby, cat", result[0].Label) assert.Equal(t, "tiger cat", result[1].Label) - assert.Equal(t, float64(0.165), math.Round(float64(result[1].Probability)*1000)/1000) + assert.Equal(t, 63, result[0].Percent()) + assert.Equal(t, 16, result[1].Percent()) + } +} + +func TestTensorFlow_GetImageTags_Dog(t *testing.T) { + if testing.Short() { + t.Skip("skipping test in short mode.") + } + + conf := test.NewConfig() + + conf.InitializeTestData(t) + + tensorFlow := NewTensorFlow(conf.TensorFlowModelPath()) + + if imageBuffer, err := ioutil.ReadFile(conf.ImportPath() + "/dog.jpg"); err != nil { + t.Error(err) + } else { + result, err := tensorFlow.GetImageTags(imageBuffer) + + t.Log(result) + + assert.NotNil(t, result) + + assert.Nil(t, err) + assert.IsType(t, []TensorFlowLabel{}, result) + assert.Equal(t, 5, len(result)) + + assert.Equal(t, "belt", result[0].Label) + assert.Equal(t, "basenji, dog", result[1].Label) + + assert.Equal(t, 13, result[1].Percent()) + assert.Equal(t, 13, result[1].Percent()) } } diff --git a/scripts/download-assets.sh b/scripts/download-assets.sh new file mode 100755 index 000000000..519407540 --- /dev/null +++ b/scripts/download-assets.sh @@ -0,0 +1,15 @@ +#!/usr/bin/env bash + +FILENAME="/tmp/photoprism/nasnet.zip" + +if [[ ! -e assets/tensorflow/nasnet/saved_model.pb ]]; then + if [[ ! -e ${FILENAME} ]]; then + mkdir -p /tmp/photoprism + wget "https://dl.photoprism.org/tensorflow/nasnet.zip" -O ${FILENAME} + fi + + mkdir -p assets/tensorflow + unzip ${FILENAME} -d assets/tensorflow +else + echo "TensorFlow model already downloaded." +fi diff --git a/scripts/download-inception.sh b/scripts/download-inception.sh deleted file mode 100755 index e605eb885..000000000 --- a/scripts/download-inception.sh +++ /dev/null @@ -1,15 +0,0 @@ -#!/usr/bin/env bash - -FILENAME="/tmp/photoprism/inception.zip" - -if [[ ! -e assets/tensorflow/inception/tensorflow_inception_graph.pb ]]; then - if [[ ! -e ${FILENAME} ]]; then - mkdir -p /tmp/photoprism - wget "https://storage.googleapis.com/download.tensorflow.org/models/inception5h.zip" -O ${FILENAME} - fi - - mkdir -p assets/tensorflow/inception - unzip ${FILENAME} -d assets/tensorflow/inception -else - echo "TensorFlow Inception V1 model already downloaded." -fi \ No newline at end of file