Skip to content

Commit f5983d5

Browse files
authored
proto: make invalid UTF-8 errors non-fatal (#660)
The current logic currently treats RequredNotSetError as a non-fatal error such that it continues to proceed with its normal execution, but returns that non-fatal error at the end. We now make it such that invalid UTF-8 is also a distinguishable error that is treated as non-fatal by the execution logic. In the rare event that both an RequiredNotSet error and InvalidUTF8 error is encountered, the first one encountered is returned. In the process of making this change, we also fix a number of cases where RequiredNotSet was treated as fatal, when it should not have been non-fatal (notably in the logic for extensions, message sets, and maps). This change deliberately does not provide an API make it easy to distinguish invalid UTF-8 errors as this is not a normal behavior we want users to depend on. We can always expose additional API for this in the future. Users can test for invalid UTF-8 with: re, ok := err.(interface{ InvalidUTF8() bool }) isInvalidUTF8 := ok && re.InvalidUTF8()
1 parent 560bdb6 commit f5983d5

File tree

5 files changed

+169
-94
lines changed

5 files changed

+169
-94
lines changed

proto/all_test.go

Lines changed: 27 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2256,26 +2256,32 @@ func TestInvalidUTF8(t *testing.T) {
22562256
label string
22572257
proto2 Message
22582258
proto3 Message
2259+
want []byte
22592260
}{{
22602261
label: "Scalar",
22612262
proto2: &TestUTF8{Scalar: String(invalidUTF8)},
22622263
proto3: &pb3.TestUTF8{Scalar: invalidUTF8},
2264+
want: []byte{0x0a, 0x07, 0xde, 0xad, 0xbe, 0xef, 0x80, 0x00, 0xff},
22632265
}, {
22642266
label: "Vector",
22652267
proto2: &TestUTF8{Vector: []string{invalidUTF8}},
22662268
proto3: &pb3.TestUTF8{Vector: []string{invalidUTF8}},
2269+
want: []byte{0x12, 0x07, 0xde, 0xad, 0xbe, 0xef, 0x80, 0x00, 0xff},
22672270
}, {
22682271
label: "Oneof",
22692272
proto2: &TestUTF8{Oneof: &TestUTF8_Field{invalidUTF8}},
22702273
proto3: &pb3.TestUTF8{Oneof: &pb3.TestUTF8_Field{invalidUTF8}},
2274+
want: []byte{0x1a, 0x07, 0xde, 0xad, 0xbe, 0xef, 0x80, 0x00, 0xff},
22712275
}, {
22722276
label: "MapKey",
22732277
proto2: &TestUTF8{MapKey: map[string]int64{invalidUTF8: 0}},
22742278
proto3: &pb3.TestUTF8{MapKey: map[string]int64{invalidUTF8: 0}},
2279+
want: []byte{0x22, 0x0b, 0x0a, 0x07, 0xde, 0xad, 0xbe, 0xef, 0x80, 0x00, 0xff, 0x10, 0x00},
22752280
}, {
22762281
label: "MapValue",
22772282
proto2: &TestUTF8{MapValue: map[int64]string{0: invalidUTF8}},
22782283
proto3: &pb3.TestUTF8{MapValue: map[int64]string{0: invalidUTF8}},
2284+
want: []byte{0x2a, 0x0b, 0x08, 0x00, 0x12, 0x07, 0xde, 0xad, 0xbe, 0xef, 0x80, 0x00, 0xff},
22792285
}}
22802286

22812287
for _, tt := range tests {
@@ -2284,22 +2290,37 @@ func TestInvalidUTF8(t *testing.T) {
22842290
if err != nil {
22852291
t.Errorf("Marshal(proto2.%s) = %v, want nil", tt.label, err)
22862292
}
2287-
tt.proto2.Reset()
2288-
err = Unmarshal(b, tt.proto2)
2289-
if err != nil {
2293+
if !bytes.Equal(b, tt.want) {
2294+
t.Errorf("Marshal(proto2.%s) = %x, want %x", tt.label, b, tt.want)
2295+
}
2296+
2297+
m := Clone(tt.proto2)
2298+
m.Reset()
2299+
if err = Unmarshal(tt.want, m); err != nil {
22902300
t.Errorf("Unmarshal(proto2.%s) = %v, want nil", tt.label, err)
22912301
}
2302+
if !Equal(m, tt.proto2) {
2303+
t.Errorf("proto2.%s: output mismatch:\ngot %v\nwant %v", tt.label, m, tt.proto2)
2304+
}
22922305

22932306
// Proto3 should validate UTF-8.
2294-
_, err = Marshal(tt.proto3)
2307+
b, err = Marshal(tt.proto3)
22952308
if err == nil {
22962309
t.Errorf("Marshal(proto3.%s) = %v, want non-nil", tt.label, err)
22972310
}
2298-
tt.proto3.Reset()
2299-
err = Unmarshal(b, tt.proto3)
2311+
if !bytes.Equal(b, tt.want) {
2312+
t.Errorf("Marshal(proto3.%s) = %x, want %x", tt.label, b, tt.want)
2313+
}
2314+
2315+
m = Clone(tt.proto3)
2316+
m.Reset()
2317+
err = Unmarshal(tt.want, m)
23002318
if err == nil {
23012319
t.Errorf("Unmarshal(proto3.%s) = %v, want non-nil", tt.label, err)
23022320
}
2321+
if !Equal(m, tt.proto3) {
2322+
t.Errorf("proto3.%s: output mismatch:\ngot %v\nwant %v", tt.label, m, tt.proto2)
2323+
}
23032324
}
23042325
}
23052326

proto/encode.go

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -37,24 +37,9 @@ package proto
3737

3838
import (
3939
"errors"
40-
"fmt"
4140
"reflect"
4241
)
4342

44-
// RequiredNotSetError is an error type returned by either Marshal or Unmarshal.
45-
// Marshal reports this when a required field is not initialized.
46-
// Unmarshal reports this when a required field is missing from the wire data.
47-
type RequiredNotSetError struct {
48-
field string
49-
}
50-
51-
func (e *RequiredNotSetError) Error() string {
52-
if e.field == "" {
53-
return fmt.Sprintf("proto: required field not set")
54-
}
55-
return fmt.Sprintf("proto: required field %q not set", e.field)
56-
}
57-
5843
var (
5944
// errRepeatedHasNil is the error returned if Marshal is called with
6045
// a struct with a repeated field containing a nil element.

proto/lib.go

Lines changed: 60 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,6 @@ package proto
265265

266266
import (
267267
"encoding/json"
268-
"errors"
269268
"fmt"
270269
"log"
271270
"reflect"
@@ -274,7 +273,66 @@ import (
274273
"sync"
275274
)
276275

277-
var errInvalidUTF8 = errors.New("proto: invalid UTF-8 string")
276+
// RequiredNotSetError is an error type returned by either Marshal or Unmarshal.
277+
// Marshal reports this when a required field is not initialized.
278+
// Unmarshal reports this when a required field is missing from the wire data.
279+
type RequiredNotSetError struct{ field string }
280+
281+
func (e *RequiredNotSetError) Error() string {
282+
if e.field == "" {
283+
return fmt.Sprintf("proto: required field not set")
284+
}
285+
return fmt.Sprintf("proto: required field %q not set", e.field)
286+
}
287+
func (e *RequiredNotSetError) RequiredNotSet() bool {
288+
return true
289+
}
290+
291+
type invalidUTF8Error struct{ field string }
292+
293+
func (e *invalidUTF8Error) Error() string {
294+
if e.field == "" {
295+
return "proto: invalid UTF-8 detected"
296+
}
297+
return fmt.Sprintf("proto: field %q contains invalid UTF-8", e.field)
298+
}
299+
func (e *invalidUTF8Error) InvalidUTF8() bool {
300+
return true
301+
}
302+
303+
// errInvalidUTF8 is a sentinel error to identify fields with invalid UTF-8.
304+
// This error should not be exposed to the external API as such errors should
305+
// be recreated with the field information.
306+
var errInvalidUTF8 = &invalidUTF8Error{}
307+
308+
// isNonFatal reports whether the error is either a RequiredNotSet error
309+
// or a InvalidUTF8 error.
310+
func isNonFatal(err error) bool {
311+
if re, ok := err.(interface{ RequiredNotSet() bool }); ok && re.RequiredNotSet() {
312+
return true
313+
}
314+
if re, ok := err.(interface{ InvalidUTF8() bool }); ok && re.InvalidUTF8() {
315+
return true
316+
}
317+
return false
318+
}
319+
320+
type nonFatal struct{ E error }
321+
322+
// Merge merges err into nf and reports whether it was successful.
323+
// Otherwise it returns false for any fatal non-nil errors.
324+
func (nf *nonFatal) Merge(err error) (ok bool) {
325+
if err == nil {
326+
return true // not an error
327+
}
328+
if !isNonFatal(err) {
329+
return false // fatal error
330+
}
331+
if nf.E == nil {
332+
nf.E = err // store first instance of non-fatal error
333+
}
334+
return true
335+
}
278336

279337
// Message is implemented by generated protocol buffer messages.
280338
type Message interface {

0 commit comments

Comments
 (0)