From 5eb6f32ff0a0252caab5fbc8e154912b4b5e4676 Mon Sep 17 00:00:00 2001 From: Infinoid Date: Mon, 3 Aug 2020 01:16:47 -0400 Subject: [PATCH] Switch to a more comprehensive mimetype detection library (#231) --- go.mod | 2 +- go.sum | 4 ++-- helpers/helpers.go | 17 ++++----------- helpers/helpers_test.go | 46 ++++++++++++++++++++++++++++++++++++++++- upload.go | 8 +++---- 5 files changed, 56 insertions(+), 21 deletions(-) diff --git a/go.mod b/go.mod index 50a48ab..f433699 100644 --- a/go.mod +++ b/go.mod @@ -8,6 +8,7 @@ require ( github.com/dchest/uniuri v0.0.0-20200228104902-7aecb25e1fe5 github.com/dustin/go-humanize v1.0.0 github.com/flosch/pongo2 v0.0.0-20190707114632-bbf5a6c351f4 + github.com/gabriel-vasile/mimetype v1.1.1 github.com/microcosm-cc/bluemonday v1.0.2 github.com/minio/sha256-simd v0.1.1 github.com/russross/blackfriday v1.5.1 @@ -15,5 +16,4 @@ require ( github.com/zeebo/bencode v1.0.0 github.com/zenazn/goji v0.9.0 golang.org/x/crypto v0.0.0-20200302210943-78000ba7a073 - gopkg.in/h2non/filetype.v1 v1.0.5 ) diff --git a/go.sum b/go.sum index 15a736c..99d63bf 100644 --- a/go.sum +++ b/go.sum @@ -15,6 +15,8 @@ github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4 github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk= github.com/flosch/pongo2 v0.0.0-20190707114632-bbf5a6c351f4 h1:GY1+t5Dr9OKADM64SYnQjw/w99HMYvQ0A8/JoUkxVmc= github.com/flosch/pongo2 v0.0.0-20190707114632-bbf5a6c351f4/go.mod h1:T9YF2M40nIgbVgp3rreNmTged+9HrbNTIQf1PsaIiTA= +github.com/gabriel-vasile/mimetype v1.1.1 h1:qbN9MPuRf3bstHu9zkI9jDWNfH//9+9kHxr9oRBBBOA= +github.com/gabriel-vasile/mimetype v1.1.1/go.mod h1:6CDPel/o/3/s4+bp6kIbsWATq8pmgOisOPG40CJa6To= github.com/go-check/check v0.0.0-20180628173108-788fd7840127 h1:0gkP6mzaMqkmpcJYCFOLkIBwI7xFExG03bbkOkCvUPI= github.com/go-check/check v0.0.0-20180628173108-788fd7840127/go.mod h1:9ES+weclKsC9YodN5RgxqK/VD9HM9JsCSh7rNhMZE98= github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg= @@ -68,8 +70,6 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/tools v0.0.0-20181221001348-537d06c36207/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= -gopkg.in/h2non/filetype.v1 v1.0.5 h1:CC1jjJjoEhNVbMhXYalmGBhOBK2V70Q1N850wt/98/Y= -gopkg.in/h2non/filetype.v1 v1.0.5/go.mod h1:M0yem4rwSX5lLVrkEuRRp2/NinFMD5vgJ4DlAhZcfNo= gopkg.in/mgo.v2 v2.0.0-20180705113604-9856a29383ce h1:xcEWjVhvbDy+nHP67nPDDpbYrY+ILlfndk4bRioVHaU= gopkg.in/mgo.v2 v2.0.0-20180705113604-9856a29383ce/go.mod h1:yeKp02qBN3iKW1OzL3MGk2IdtZzaj7SFntXj72NppTA= gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw= diff --git a/helpers/helpers.go b/helpers/helpers.go index f51d998..f13e302 100644 --- a/helpers/helpers.go +++ b/helpers/helpers.go @@ -7,8 +7,8 @@ import ( "unicode" "github.com/andreimarcu/linx-server/backends" + "github.com/gabriel-vasile/mimetype" "github.com/minio/sha256-simd" - "gopkg.in/h2non/filetype.v1" ) func GenerateMetadata(r io.Reader) (m backends.Metadata, err error) { @@ -21,7 +21,7 @@ func GenerateMetadata(r io.Reader) (m backends.Metadata, err error) { // Get first 512 bytes for mimetype detection header := make([]byte, 512) - _, err = teeReader.Read(header) + headerlen, err := teeReader.Read(header) if err != nil { return } @@ -47,17 +47,8 @@ func GenerateMetadata(r io.Reader) (m backends.Metadata, err error) { // Use the bytes we extracted earlier and attempt to determine the file // type - kind, err := filetype.Match(header) - if err != nil { - m.Mimetype = "application/octet-stream" - return m, err - } else if kind.MIME.Value != "" { - m.Mimetype = kind.MIME.Value - } else if printable(header) { - m.Mimetype = "text/plain" - } else { - m.Mimetype = "application/octet-stream" - } + kind := mimetype.Detect(header[:headerlen]) + m.Mimetype = kind.String() return } diff --git a/helpers/helpers_test.go b/helpers/helpers_test.go index 800d0d2..d891173 100644 --- a/helpers/helpers_test.go +++ b/helpers/helpers_test.go @@ -1,8 +1,10 @@ package helpers import ( + "bytes" "strings" "testing" + "unicode/utf16" ) func TestGenerateMetadata(t *testing.T) { @@ -17,7 +19,7 @@ func TestGenerateMetadata(t *testing.T) { t.Fatalf("Sha256sum was %q instead of expected value of %q", m.Sha256sum, expectedSha256sum) } - expectedMimetype := "text/plain" + expectedMimetype := "text/plain; charset=utf-8" if m.Mimetype != expectedMimetype { t.Fatalf("Mimetype was %q instead of expected value of %q", m.Mimetype, expectedMimetype) } @@ -27,3 +29,45 @@ func TestGenerateMetadata(t *testing.T) { t.Fatalf("Size was %d instead of expected value of %d", m.Size, expectedSize) } } + +func TestTextCharsets(t *testing.T) { + // verify that different text encodings are detected and passed through + orig := "This is a text string" + utf16 := utf16.Encode([]rune(orig)) + utf16LE := make([]byte, len(utf16)*2+2) + utf16BE := make([]byte, len(utf16)*2+2) + utf8 := []byte(orig) + utf16LE[0] = 0xff + utf16LE[1] = 0xfe + utf16BE[0] = 0xfe + utf16BE[1] = 0xff + for i := 0; i < len(utf16); i++ { + lsb := utf16[i] & 0xff + msb := utf16[i] >> 8 + utf16LE[i*2+2] = byte(lsb) + utf16LE[i*2+3] = byte(msb) + utf16BE[i*2+2] = byte(msb) + utf16BE[i*2+3] = byte(lsb) + } + + testcases := []struct { + data []byte + extension string + mimetype string + }{ + {mimetype: "text/plain; charset=utf-8", data: utf8}, + {mimetype: "text/plain; charset=utf-16le", data: utf16LE}, + {mimetype: "text/plain; charset=utf-16be", data: utf16BE}, + } + + for i, testcase := range testcases { + r := bytes.NewReader(testcase.data) + m, err := GenerateMetadata(r) + if err != nil { + t.Fatalf("[%d] unexpected error return %v\n", i, err) + } + if m.Mimetype != testcase.mimetype { + t.Errorf("[%d] Expected mimetype '%s', got mimetype '%s'\n", i, testcase.mimetype, m.Mimetype) + } + } +} diff --git a/upload.go b/upload.go index 8526260..3cac122 100644 --- a/upload.go +++ b/upload.go @@ -18,8 +18,8 @@ import ( "github.com/andreimarcu/linx-server/backends" "github.com/andreimarcu/linx-server/expiry" "github.com/dchest/uniuri" + "github.com/gabriel-vasile/mimetype" "github.com/zenazn/goji/web" - "gopkg.in/h2non/filetype.v1" ) var FileTooLargeError = errors.New("File too large.") @@ -263,11 +263,11 @@ func processUpload(upReq UploadRequest) (upload Upload, err error) { header = header[:n] // Determine the type of file from header - kind, err := filetype.Match(header) - if err != nil || kind.Extension == "unknown" { + kind := mimetype.Detect(header) + if len(kind.Extension()) < 2 { extension = "file" } else { - extension = kind.Extension + extension = kind.Extension()[1:] // remove leading "." } }