Skip to content

Commit 633dd1d

Browse files
committed
encoding/json: fix Decoder.InputOffset regression in goexperiment.jsonv2
The Decoder.InputOffset method was always ambiguous about the exact offset returned since anything between the end of the previous token to the start of the next token could be a valid result. Empirically, it seems that the behavior was to report the end of the previous token unless Decoder.More is called, in which case it reports the start of the next token. This is an odd semantic since a relatively side-effect free method like More is not quite so side-effect free. However, our goal is to preserve historical v1 semantic when possible regardless of whether it made sense. Note that jsontext.Decoder.InputOffset consistently always reports the end of the previous token. Users can explicitly choose the exact position they want by inspecting the UnreadBuffer. Fixes #75468 Change-Id: I1e946e83c9d29dfc09f2913ff8d6b2b80632f292 Reviewed-on: https://go-review.googlesource.com/c/go/+/703856 Reviewed-by: Damien Neil <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]> Reviewed-by: Junyang Shao <[email protected]>
1 parent 8ad27fb commit 633dd1d

File tree

3 files changed

+149
-1
lines changed

3 files changed

+149
-1
lines changed

src/encoding/json/stream_test.go

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -557,3 +557,67 @@ func TestTokenTruncation(t *testing.T) {
557557
}
558558
}
559559
}
560+
561+
func TestDecoderInputOffset(t *testing.T) {
562+
const input = ` [
563+
[ ] , [ "one" ] , [ "one" , "two" ] ,
564+
{ } , { "alpha" : "bravo" } , { "alpha" : "bravo" , "fizz" : "buzz" }
565+
] `
566+
wantOffsets := []int64{
567+
0, 1, 2, 5, 6, 7, 8, 9, 12, 13, 18, 19, 20, 21, 24, 25, 30, 31,
568+
38, 39, 40, 41, 46, 47, 48, 49, 52, 53, 60, 61, 70, 71, 72, 73,
569+
76, 77, 84, 85, 94, 95, 103, 104, 112, 113, 114, 116, 117, 117,
570+
117, 117,
571+
}
572+
wantMores := []bool{
573+
true, true, false, true, true, false, true, true, true, false,
574+
true, false, true, true, true, false, true, true, true, true,
575+
true, false, false, false, false,
576+
}
577+
578+
d := NewDecoder(strings.NewReader(input))
579+
checkOffset := func() {
580+
t.Helper()
581+
got := d.InputOffset()
582+
if len(wantOffsets) == 0 {
583+
t.Fatalf("InputOffset = %d, want nil", got)
584+
}
585+
want := wantOffsets[0]
586+
if got != want {
587+
t.Fatalf("InputOffset = %d, want %d", got, want)
588+
}
589+
wantOffsets = wantOffsets[1:]
590+
}
591+
checkMore := func() {
592+
t.Helper()
593+
got := d.More()
594+
if len(wantMores) == 0 {
595+
t.Fatalf("More = %v, want nil", got)
596+
}
597+
want := wantMores[0]
598+
if got != want {
599+
t.Fatalf("More = %v, want %v", got, want)
600+
}
601+
wantMores = wantMores[1:]
602+
}
603+
checkOffset()
604+
checkMore()
605+
checkOffset()
606+
for {
607+
if _, err := d.Token(); err == io.EOF {
608+
break
609+
} else if err != nil {
610+
t.Fatalf("Token error: %v", err)
611+
}
612+
checkOffset()
613+
checkMore()
614+
checkOffset()
615+
}
616+
checkOffset()
617+
checkMore()
618+
checkOffset()
619+
620+
if len(wantOffsets)+len(wantMores) > 0 {
621+
t.Fatal("unconsumed testdata")
622+
}
623+
}

src/encoding/json/v2_stream.go

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@ type Decoder struct {
2020
dec *jsontext.Decoder
2121
opts jsonv2.Options
2222
err error
23+
24+
// hadPeeked reports whether [Decoder.More] was called.
25+
// It is reset by [Decoder.Decode] and [Decoder.Token].
26+
hadPeeked bool
2327
}
2428

2529
// NewDecoder returns a new decoder that reads from r.
@@ -76,6 +80,7 @@ func (dec *Decoder) Decode(v any) error {
7680
}
7781
return dec.err
7882
}
83+
dec.hadPeeked = false
7984
return jsonv2.Unmarshal(b, v, dec.opts)
8085
}
8186

@@ -206,6 +211,7 @@ func (dec *Decoder) Token() (Token, error) {
206211
}
207212
return nil, transformSyntacticError(err)
208213
}
214+
dec.hadPeeked = false
209215
switch k := tok.Kind(); k {
210216
case 'n':
211217
return nil, nil
@@ -230,6 +236,7 @@ func (dec *Decoder) Token() (Token, error) {
230236
// More reports whether there is another element in the
231237
// current array or object being parsed.
232238
func (dec *Decoder) More() bool {
239+
dec.hadPeeked = true
233240
k := dec.dec.PeekKind()
234241
return k > 0 && k != ']' && k != '}'
235242
}
@@ -238,5 +245,18 @@ func (dec *Decoder) More() bool {
238245
// The offset gives the location of the end of the most recently returned token
239246
// and the beginning of the next token.
240247
func (dec *Decoder) InputOffset() int64 {
241-
return dec.dec.InputOffset()
248+
offset := dec.dec.InputOffset()
249+
if dec.hadPeeked {
250+
// Historically, InputOffset reported the location of
251+
// the end of the most recently returned token
252+
// unless [Decoder.More] is called, in which case, it reported
253+
// the beginning of the next token.
254+
unreadBuffer := dec.dec.UnreadBuffer()
255+
trailingTokens := bytes.TrimLeft(unreadBuffer, " \n\r\t")
256+
if len(trailingTokens) > 0 {
257+
leadingWhitespace := len(unreadBuffer) - len(trailingTokens)
258+
offset += int64(leadingWhitespace)
259+
}
260+
}
261+
return offset
242262
}

src/encoding/json/v2_stream_test.go

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -537,3 +537,67 @@ func TestTokenTruncation(t *testing.T) {
537537
}
538538
}
539539
}
540+
541+
func TestDecoderInputOffset(t *testing.T) {
542+
const input = ` [
543+
[ ] , [ "one" ] , [ "one" , "two" ] ,
544+
{ } , { "alpha" : "bravo" } , { "alpha" : "bravo" , "fizz" : "buzz" }
545+
] `
546+
wantOffsets := []int64{
547+
0, 1, 2, 5, 6, 7, 8, 9, 12, 13, 18, 19, 20, 21, 24, 25, 30, 31,
548+
38, 39, 40, 41, 46, 47, 48, 49, 52, 53, 60, 61, 70, 71, 72, 73,
549+
76, 77, 84, 85, 94, 95, 103, 104, 112, 113, 114, 116, 117, 117,
550+
117, 117,
551+
}
552+
wantMores := []bool{
553+
true, true, false, true, true, false, true, true, true, false,
554+
true, false, true, true, true, false, true, true, true, true,
555+
true, false, false, false, false,
556+
}
557+
558+
d := NewDecoder(strings.NewReader(input))
559+
checkOffset := func() {
560+
t.Helper()
561+
got := d.InputOffset()
562+
if len(wantOffsets) == 0 {
563+
t.Fatalf("InputOffset = %d, want nil", got)
564+
}
565+
want := wantOffsets[0]
566+
if got != want {
567+
t.Fatalf("InputOffset = %d, want %d", got, want)
568+
}
569+
wantOffsets = wantOffsets[1:]
570+
}
571+
checkMore := func() {
572+
t.Helper()
573+
got := d.More()
574+
if len(wantMores) == 0 {
575+
t.Fatalf("More = %v, want nil", got)
576+
}
577+
want := wantMores[0]
578+
if got != want {
579+
t.Fatalf("More = %v, want %v", got, want)
580+
}
581+
wantMores = wantMores[1:]
582+
}
583+
checkOffset()
584+
checkMore()
585+
checkOffset()
586+
for {
587+
if _, err := d.Token(); err == io.EOF {
588+
break
589+
} else if err != nil {
590+
t.Fatalf("Token error: %v", err)
591+
}
592+
checkOffset()
593+
checkMore()
594+
checkOffset()
595+
}
596+
checkOffset()
597+
checkMore()
598+
checkOffset()
599+
600+
if len(wantOffsets)+len(wantMores) > 0 {
601+
t.Fatal("unconsumed testdata")
602+
}
603+
}

0 commit comments

Comments
 (0)