Skip to content

Commit 54abd0a

Browse files
committed
fix(parser): handle feeds with leading whitespace that exceeds buffer size
1 parent 5eab475 commit 54abd0a

File tree

2 files changed

+88
-6
lines changed

2 files changed

+88
-6
lines changed

internal/reader/parser/format.go

Lines changed: 35 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,9 +4,9 @@
44
package parser // import "miniflux.app/v2/internal/reader/parser"
55

66
import (
7-
"bytes"
87
"encoding/xml"
98
"io"
9+
"unicode"
1010

1111
rxml "miniflux.app/v2/internal/reader/xml"
1212
)
@@ -22,11 +22,7 @@ const (
2222

2323
// DetectFeedFormat tries to guess the feed format from input data.
2424
func DetectFeedFormat(r io.ReadSeeker) (string, string) {
25-
var dataArray = [32]byte{}
26-
data := dataArray[:]
27-
r.Read(data)
28-
29-
if bytes.HasPrefix(bytes.TrimSpace(data), []byte("{")) {
25+
if isJSON, err := detectJSONFormat(r); err == nil && isJSON {
3026
return FormatJSON, ""
3127
}
3228

@@ -58,3 +54,36 @@ func DetectFeedFormat(r io.ReadSeeker) (string, string) {
5854

5955
return FormatUnknown, ""
6056
}
57+
58+
// detectJSONFormat checks if the reader contains JSON by reading until it finds
59+
// the first non-whitespace character or reaches EOF/error.
60+
func detectJSONFormat(r io.ReadSeeker) (bool, error) {
61+
const bufferSize = 32
62+
buffer := make([]byte, bufferSize)
63+
64+
for {
65+
n, err := r.Read(buffer)
66+
if n == 0 {
67+
if err == io.EOF {
68+
return false, nil // No non-whitespace content found
69+
}
70+
return false, err
71+
}
72+
73+
// Check each byte in the buffer
74+
for i := range n {
75+
ch := buffer[i]
76+
// Skip whitespace characters (space, tab, newline, carriage return, etc.)
77+
if unicode.IsSpace(rune(ch)) {
78+
continue
79+
}
80+
// First non-whitespace character determines if it's JSON
81+
return ch == '{', nil
82+
}
83+
84+
// If we've read less than bufferSize, we've reached EOF
85+
if n < bufferSize {
86+
return false, nil
87+
}
88+
}
89+
}

internal/reader/parser/format_test.go

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,3 +77,56 @@ func TestDetectUnknown(t *testing.T) {
7777
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatUnknown)
7878
}
7979
}
80+
81+
func TestDetectJSONWithLargeLeadingWhitespace(t *testing.T) {
82+
leadingWhitespace := strings.Repeat(" ", 10000)
83+
data := leadingWhitespace + `{
84+
"version" : "https://jsonfeed.org/version/1",
85+
"title" : "Example with lots of leading whitespace"
86+
}`
87+
format, _ := DetectFeedFormat(strings.NewReader(data))
88+
89+
if format != FormatJSON {
90+
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatJSON)
91+
}
92+
}
93+
94+
func TestDetectJSONWithMixedWhitespace(t *testing.T) {
95+
leadingWhitespace := strings.Repeat("\n\t ", 10000)
96+
data := leadingWhitespace + `{
97+
"version" : "https://jsonfeed.org/version/1",
98+
"title" : "Example with mixed whitespace"
99+
}`
100+
format, _ := DetectFeedFormat(strings.NewReader(data))
101+
102+
if format != FormatJSON {
103+
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatJSON)
104+
}
105+
}
106+
107+
func TestDetectOnlyWhitespace(t *testing.T) {
108+
data := strings.Repeat(" \t\n\r", 10000)
109+
format, _ := DetectFeedFormat(strings.NewReader(data))
110+
111+
if format != FormatUnknown {
112+
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatUnknown)
113+
}
114+
}
115+
116+
func TestDetectJSONSmallerThanBuffer(t *testing.T) {
117+
data := `{"version":"1"}` // This is only 15 bytes, well below the 32-byte buffer
118+
format, _ := DetectFeedFormat(strings.NewReader(data))
119+
120+
if format != FormatJSON {
121+
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatJSON)
122+
}
123+
}
124+
125+
func TestDetectJSONWithWhitespaceSmallerThanBuffer(t *testing.T) {
126+
data := ` {"title":"test"} `
127+
format, _ := DetectFeedFormat(strings.NewReader(data))
128+
129+
if format != FormatJSON {
130+
t.Errorf(`Wrong format detected: %q instead of %q`, format, FormatJSON)
131+
}
132+
}

0 commit comments

Comments
 (0)