Skip to content

Commit 5eb6f32

Browse files
authored
Switch to a more comprehensive mimetype detection library (#231)
1 parent 6ce2bd6 commit 5eb6f32

File tree

5 files changed

+56
-21
lines changed

5 files changed

+56
-21
lines changed

go.mod

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,12 @@ require (
88
github.com/dchest/uniuri v0.0.0-20200228104902-7aecb25e1fe5
99
github.com/dustin/go-humanize v1.0.0
1010
github.com/flosch/pongo2 v0.0.0-20190707114632-bbf5a6c351f4
11+
github.com/gabriel-vasile/mimetype v1.1.1
1112
github.com/microcosm-cc/bluemonday v1.0.2
1213
github.com/minio/sha256-simd v0.1.1
1314
github.com/russross/blackfriday v1.5.1
1415
github.com/vharitonsky/iniflags v0.0.0-20180513140207-a33cd0b5f3de
1516
github.com/zeebo/bencode v1.0.0
1617
github.com/zenazn/goji v0.9.0
1718
golang.org/x/crypto v0.0.0-20200302210943-78000ba7a073
18-
gopkg.in/h2non/filetype.v1 v1.0.5
1919
)

go.sum

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ github.com/dustin/go-humanize v1.0.0 h1:VSnTsYCnlFHaM2/igO1h6X3HA71jcobQuxemgkq4
1515
github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
1616
github.com/flosch/pongo2 v0.0.0-20190707114632-bbf5a6c351f4 h1:GY1+t5Dr9OKADM64SYnQjw/w99HMYvQ0A8/JoUkxVmc=
1717
github.com/flosch/pongo2 v0.0.0-20190707114632-bbf5a6c351f4/go.mod h1:T9YF2M40nIgbVgp3rreNmTged+9HrbNTIQf1PsaIiTA=
18+
github.com/gabriel-vasile/mimetype v1.1.1 h1:qbN9MPuRf3bstHu9zkI9jDWNfH//9+9kHxr9oRBBBOA=
19+
github.com/gabriel-vasile/mimetype v1.1.1/go.mod h1:6CDPel/o/3/s4+bp6kIbsWATq8pmgOisOPG40CJa6To=
1820
github.com/go-check/check v0.0.0-20180628173108-788fd7840127 h1:0gkP6mzaMqkmpcJYCFOLkIBwI7xFExG03bbkOkCvUPI=
1921
github.com/go-check/check v0.0.0-20180628173108-788fd7840127/go.mod h1:9ES+weclKsC9YodN5RgxqK/VD9HM9JsCSh7rNhMZE98=
2022
github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg=
@@ -68,8 +70,6 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
6870
golang.org/x/tools v0.0.0-20181221001348-537d06c36207/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
6971
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
7072
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
71-
gopkg.in/h2non/filetype.v1 v1.0.5 h1:CC1jjJjoEhNVbMhXYalmGBhOBK2V70Q1N850wt/98/Y=
72-
gopkg.in/h2non/filetype.v1 v1.0.5/go.mod h1:M0yem4rwSX5lLVrkEuRRp2/NinFMD5vgJ4DlAhZcfNo=
7373
gopkg.in/mgo.v2 v2.0.0-20180705113604-9856a29383ce h1:xcEWjVhvbDy+nHP67nPDDpbYrY+ILlfndk4bRioVHaU=
7474
gopkg.in/mgo.v2 v2.0.0-20180705113604-9856a29383ce/go.mod h1:yeKp02qBN3iKW1OzL3MGk2IdtZzaj7SFntXj72NppTA=
7575
gopkg.in/yaml.v2 v2.2.2 h1:ZCJp+EgiOT7lHqUV2J862kp8Qj64Jo6az82+3Td9dZw=

helpers/helpers.go

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,8 @@ import (
77
"unicode"
88

99
"github.com/andreimarcu/linx-server/backends"
10+
"github.com/gabriel-vasile/mimetype"
1011
"github.com/minio/sha256-simd"
11-
"gopkg.in/h2non/filetype.v1"
1212
)
1313

1414
func GenerateMetadata(r io.Reader) (m backends.Metadata, err error) {
@@ -21,7 +21,7 @@ func GenerateMetadata(r io.Reader) (m backends.Metadata, err error) {
2121

2222
// Get first 512 bytes for mimetype detection
2323
header := make([]byte, 512)
24-
_, err = teeReader.Read(header)
24+
headerlen, err := teeReader.Read(header)
2525
if err != nil {
2626
return
2727
}
@@ -47,17 +47,8 @@ func GenerateMetadata(r io.Reader) (m backends.Metadata, err error) {
4747

4848
// Use the bytes we extracted earlier and attempt to determine the file
4949
// type
50-
kind, err := filetype.Match(header)
51-
if err != nil {
52-
m.Mimetype = "application/octet-stream"
53-
return m, err
54-
} else if kind.MIME.Value != "" {
55-
m.Mimetype = kind.MIME.Value
56-
} else if printable(header) {
57-
m.Mimetype = "text/plain"
58-
} else {
59-
m.Mimetype = "application/octet-stream"
60-
}
50+
kind := mimetype.Detect(header[:headerlen])
51+
m.Mimetype = kind.String()
6152

6253
return
6354
}

helpers/helpers_test.go

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,10 @@
11
package helpers
22

33
import (
4+
"bytes"
45
"strings"
56
"testing"
7+
"unicode/utf16"
68
)
79

810
func TestGenerateMetadata(t *testing.T) {
@@ -17,7 +19,7 @@ func TestGenerateMetadata(t *testing.T) {
1719
t.Fatalf("Sha256sum was %q instead of expected value of %q", m.Sha256sum, expectedSha256sum)
1820
}
1921

20-
expectedMimetype := "text/plain"
22+
expectedMimetype := "text/plain; charset=utf-8"
2123
if m.Mimetype != expectedMimetype {
2224
t.Fatalf("Mimetype was %q instead of expected value of %q", m.Mimetype, expectedMimetype)
2325
}
@@ -27,3 +29,45 @@ func TestGenerateMetadata(t *testing.T) {
2729
t.Fatalf("Size was %d instead of expected value of %d", m.Size, expectedSize)
2830
}
2931
}
32+
33+
func TestTextCharsets(t *testing.T) {
34+
// verify that different text encodings are detected and passed through
35+
orig := "This is a text string"
36+
utf16 := utf16.Encode([]rune(orig))
37+
utf16LE := make([]byte, len(utf16)*2+2)
38+
utf16BE := make([]byte, len(utf16)*2+2)
39+
utf8 := []byte(orig)
40+
utf16LE[0] = 0xff
41+
utf16LE[1] = 0xfe
42+
utf16BE[0] = 0xfe
43+
utf16BE[1] = 0xff
44+
for i := 0; i < len(utf16); i++ {
45+
lsb := utf16[i] & 0xff
46+
msb := utf16[i] >> 8
47+
utf16LE[i*2+2] = byte(lsb)
48+
utf16LE[i*2+3] = byte(msb)
49+
utf16BE[i*2+2] = byte(msb)
50+
utf16BE[i*2+3] = byte(lsb)
51+
}
52+
53+
testcases := []struct {
54+
data []byte
55+
extension string
56+
mimetype string
57+
}{
58+
{mimetype: "text/plain; charset=utf-8", data: utf8},
59+
{mimetype: "text/plain; charset=utf-16le", data: utf16LE},
60+
{mimetype: "text/plain; charset=utf-16be", data: utf16BE},
61+
}
62+
63+
for i, testcase := range testcases {
64+
r := bytes.NewReader(testcase.data)
65+
m, err := GenerateMetadata(r)
66+
if err != nil {
67+
t.Fatalf("[%d] unexpected error return %v\n", i, err)
68+
}
69+
if m.Mimetype != testcase.mimetype {
70+
t.Errorf("[%d] Expected mimetype '%s', got mimetype '%s'\n", i, testcase.mimetype, m.Mimetype)
71+
}
72+
}
73+
}

upload.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,8 @@ import (
1818
"github.com/andreimarcu/linx-server/backends"
1919
"github.com/andreimarcu/linx-server/expiry"
2020
"github.com/dchest/uniuri"
21+
"github.com/gabriel-vasile/mimetype"
2122
"github.com/zenazn/goji/web"
22-
"gopkg.in/h2non/filetype.v1"
2323
)
2424

2525
var FileTooLargeError = errors.New("File too large.")
@@ -263,11 +263,11 @@ func processUpload(upReq UploadRequest) (upload Upload, err error) {
263263
header = header[:n]
264264

265265
// Determine the type of file from header
266-
kind, err := filetype.Match(header)
267-
if err != nil || kind.Extension == "unknown" {
266+
kind := mimetype.Detect(header)
267+
if len(kind.Extension()) < 2 {
268268
extension = "file"
269269
} else {
270-
extension = kind.Extension
270+
extension = kind.Extension()[1:] // remove leading "."
271271
}
272272
}
273273

0 commit comments

Comments
 (0)