Skip to content

Commit eda267d

Browse files
committed
refactor DetectContentTypeFromReader
1 parent 81bd73c commit eda267d

File tree

3 files changed

+22
-40
lines changed

3 files changed

+22
-40
lines changed

modules/git/blob.go

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -22,17 +22,22 @@ func (b *Blob) Name() string {
2222
return b.name
2323
}
2424

25-
// GetBlobContent Gets the limited content of the blob as raw text
26-
func (b *Blob) GetBlobContent(limit int64) (string, error) {
25+
// GetBlobBytes Gets the limited content of the blob
26+
func (b *Blob) GetBlobBytes(limit int64) ([]byte, error) {
2727
if limit <= 0 {
28-
return "", nil
28+
return nil, nil
2929
}
3030
dataRc, err := b.DataAsync()
3131
if err != nil {
32-
return "", err
32+
return nil, err
3333
}
3434
defer dataRc.Close()
35-
buf, err := util.ReadWithLimit(dataRc, int(limit))
35+
return util.ReadWithLimit(dataRc, int(limit))
36+
}
37+
38+
// GetBlobContent Gets the limited content of the blob as raw text
39+
func (b *Blob) GetBlobContent(limit int64) (string, error) {
40+
buf, err := b.GetBlobBytes(limit)
3641
return string(buf), err
3742
}
3843

@@ -99,11 +104,9 @@ loop:
99104

100105
// GuessContentType guesses the content type of the blob.
101106
func (b *Blob) GuessContentType() (typesniffer.SniffedType, error) {
102-
r, err := b.DataAsync()
107+
buf, err := b.GetBlobBytes(typesniffer.SniffContentSize)
103108
if err != nil {
104109
return typesniffer.SniffedType{}, err
105110
}
106-
defer r.Close()
107-
108-
return typesniffer.DetectContentTypeFromReader(r)
111+
return typesniffer.DetectContentType(buf), nil
109112
}

modules/typesniffer/typesniffer.go

Lines changed: 9 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -6,19 +6,14 @@ package typesniffer
66
import (
77
"bytes"
88
"encoding/binary"
9-
"fmt"
10-
"io"
119
"net/http"
1210
"regexp"
1311
"slices"
1412
"strings"
1513
"sync"
16-
17-
"code.gitea.io/gitea/modules/util"
1814
)
1915

20-
// Use at most this many bytes to determine Content Type.
21-
const sniffLen = 1024
16+
const SniffContentSize = 1024
2217

2318
const (
2419
MimeTypeImageSvg = "image/svg+xml"
@@ -42,7 +37,7 @@ type SniffedType struct {
4237
contentType string
4338
}
4439

45-
// IsText detects if the content format is plain text.
40+
// IsText detects if the content format is text family, including text/plain, text/html, text/css, etc.
4641
func (ct SniffedType) IsText() bool {
4742
return strings.Contains(ct.contentType, "text/")
4843
}
@@ -66,12 +61,12 @@ func (ct SniffedType) IsPDF() bool {
6661
return strings.Contains(ct.contentType, "application/pdf")
6762
}
6863

69-
// IsVideo detects if data is an video format
64+
// IsVideo detects if data is a video format
7065
func (ct SniffedType) IsVideo() bool {
7166
return strings.Contains(ct.contentType, "video/")
7267
}
7368

74-
// IsAudio detects if data is an video format
69+
// IsAudio detects if data is a video format
7570
func (ct SniffedType) IsAudio() bool {
7671
return strings.Contains(ct.contentType, "audio/")
7772
}
@@ -87,10 +82,6 @@ func (ct SniffedType) IsBrowsableBinaryType() bool {
8782
return ct.IsImage() || ct.IsSvgImage() || ct.IsPDF() || ct.IsVideo() || ct.IsAudio()
8883
}
8984

90-
func (ct SniffedType) IsApplicationOctetStream() bool {
91-
return ct.contentType == "application/octet-stream"
92-
}
93-
9485
// GetMimeType returns the mime type
9586
func (ct SniffedType) GetMimeType() string {
9687
return strings.SplitN(ct.contentType, ";", 2)[0]
@@ -116,16 +107,16 @@ func detectFileTypeBox(data []byte) (brands []string, found bool) {
116107
return brands, true
117108
}
118109

119-
// DetectContentType extends http.DetectContentType with more content types. Defaults to text/unknown if input is empty.
110+
// DetectContentType extends http.DetectContentType with more content types. Defaults to text/plain if input is empty.
120111
func DetectContentType(data []byte) SniffedType {
121112
if len(data) == 0 {
122-
return SniffedType{"text/unknown"}
113+
return SniffedType{"text/plain"}
123114
}
124115

125116
ct := http.DetectContentType(data)
126117

127-
if len(data) > sniffLen {
128-
data = data[:sniffLen]
118+
if len(data) > SniffContentSize {
119+
data = data[:SniffContentSize]
129120
}
130121

131122
vars := globalVars()
@@ -143,7 +134,7 @@ func DetectContentType(data []byte) SniffedType {
143134

144135
if strings.HasPrefix(ct, "audio/") && bytes.HasPrefix(data, []byte("ID3")) {
145136
// The MP3 detection is quite inaccurate, any content with "ID3" prefix will result in "audio/mpeg".
146-
// So remove the "ID3" prefix and detect again, if result is text, then it must be text content.
137+
// So remove the "ID3" prefix and detect again, then if the result is "text", it must be text content.
147138
// This works especially because audio files contain many unprintable/invalid characters like `0x00`
148139
ct2 := http.DetectContentType(data[3:])
149140
if strings.HasPrefix(ct2, "text/") {
@@ -169,15 +160,3 @@ func DetectContentType(data []byte) SniffedType {
169160
}
170161
return SniffedType{ct}
171162
}
172-
173-
// DetectContentTypeFromReader guesses the content type contained in the reader.
174-
func DetectContentTypeFromReader(r io.Reader) (SniffedType, error) {
175-
buf := make([]byte, sniffLen)
176-
n, err := util.ReadAtMost(r, buf)
177-
if err != nil {
178-
return SniffedType{}, fmt.Errorf("DetectContentTypeFromReader io error: %w", err)
179-
}
180-
buf = buf[:n]
181-
182-
return DetectContentType(buf), nil
183-
}

modules/typesniffer/typesniffer_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ func TestDetectContentTypeLongerThanSniffLen(t *testing.T) {
1717
// Pre-condition: Shorter than sniffLen detects SVG.
1818
assert.Equal(t, "image/svg+xml", DetectContentType([]byte(`<!-- Comment --><svg></svg>`)).contentType)
1919
// Longer than sniffLen detects something else.
20-
assert.NotEqual(t, "image/svg+xml", DetectContentType([]byte(`<!-- `+strings.Repeat("x", sniffLen)+` --><svg></svg>`)).contentType)
20+
assert.NotEqual(t, "image/svg+xml", DetectContentType([]byte(`<!-- `+strings.Repeat("x", SniffContentSize)+` --><svg></svg>`)).contentType)
2121
}
2222

2323
func TestIsTextFile(t *testing.T) {

0 commit comments

Comments
 (0)