Skip to content

Commit c532754

Browse files
committed
manifest,record: disambiguate unexpected EOFs
Disambiguate an unexpected EOF in the record envelope (eg, missing chunks) from an unexpected EOF within the body of the record. An unexpected EOF of the envelope may occur if the process crashes while appending a version edit to the manifest file. An unexpected EOF within the record indicates corruption. Previously, some version edit code paths re-mapped an unexpected EOF while parsing the record envelope into a corruption error. In this case, recovery failed improperly when it should've ignored the incomplete version edit. In the other direction, some code paths would return io.ErrUnexpectedEOF while decoding the version edit structure within a valid envelope record. In this case, the caller interpreted the ErrUnexpectedEOF as a sudden end to the record envelope, and recovery succeeded improperly when it should've aborted with a corruption error.
1 parent 57c482f commit c532754

File tree

7 files changed

+231
-133
lines changed

7 files changed

+231
-133
lines changed

internal/manifest/version_edit.go

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,6 @@ import (
2525
// TODO(peter): describe the MANIFEST file format, independently of the C++
2626
// project.
2727

28-
var errCorruptManifest = base.CorruptionErrorf("pebble: corrupt manifest")
29-
3028
type byteReader interface {
3129
io.ByteReader
3230
io.Reader
@@ -551,7 +549,7 @@ func (v *VersionEdit) Decode(r io.Reader) error {
551549
return base.CorruptionErrorf("column families are not supported")
552550

553551
default:
554-
return errCorruptManifest
552+
return base.CorruptionErrorf("MANIFEST: unknown tag: %d", tag)
555553
}
556554
}
557555
return nil
@@ -847,7 +845,7 @@ func (d versionEditDecoder) readBytes() ([]byte, error) {
847845
_, err = io.ReadFull(d, s)
848846
if err != nil {
849847
if err == io.ErrUnexpectedEOF {
850-
return nil, errCorruptManifest
848+
return nil, base.CorruptionErrorf("pebble: corrupt manifest: failed to read %d bytes", n)
851849
}
852850
return nil, err
853851
}
@@ -860,7 +858,7 @@ func (d versionEditDecoder) readLevel() (int, error) {
860858
return 0, err
861859
}
862860
if u >= NumLevels {
863-
return 0, errCorruptManifest
861+
return 0, base.CorruptionErrorf("pebble: corrupt manifest: level %d >= %d", u, NumLevels)
864862
}
865863
return int(u), nil
866864
}
@@ -876,8 +874,8 @@ func (d versionEditDecoder) readFileNum() (base.FileNum, error) {
876874
func (d versionEditDecoder) readUvarint() (uint64, error) {
877875
u, err := binary.ReadUvarint(d)
878876
if err != nil {
879-
if err == io.EOF {
880-
return 0, errCorruptManifest
877+
if err == io.EOF || err == io.ErrUnexpectedEOF {
878+
return 0, base.CorruptionErrorf("pebble: corrupt manifest: failed to read uvarint")
881879
}
882880
return 0, err
883881
}

open.go

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -950,20 +950,29 @@ func (d *DB) replayWAL(
950950
// surfaces record.ErrInvalidChunk or record.ErrZeroedChunk. These
951951
// errors are always indicative of corruption and data loss.
952952
//
953-
// Otherwise, the reader surfaces io.ErrUnexpectedEOF indicating that
954-
// the WAL terminated uncleanly and ambiguously. If the WAL is the
955-
// most recent logical WAL, the caller passes in (strictWALTail=false),
956-
// indicating we should tolerate the unclean ending. If the WAL is an
957-
// older WAL, the caller passes in (strictWALTail=true), indicating that
958-
// the WAL should have been closed cleanly, and we should interpret
959-
// the `io.ErrUnexpectedEOF` as corruption and stop recovery.
953+
// Otherwise, the reader surfaces record.ErrUnexpectedEOF indicating
954+
// that the WAL terminated uncleanly and ambiguously. If the WAL is
955+
// the most recent logical WAL, the caller passes in
956+
// (strictWALTail=false), indicating we should tolerate the unclean
957+
// ending. If the WAL is an older WAL, the caller passes in
958+
// (strictWALTail=true), indicating that the WAL should have been
959+
// closed cleanly, and we should interpret the
960+
// `record.ErrUnexpectedEOF` as corruption and stop recovery.
960961
if errors.Is(err, io.EOF) {
961962
break
962-
} else if errors.Is(err, io.ErrUnexpectedEOF) && !strictWALTail {
963+
} else if errors.Is(err, record.ErrUnexpectedEOF) && !strictWALTail {
963964
break
964-
} else if errors.Is(err, record.ErrInvalidChunk) || errors.Is(err, record.ErrZeroedChunk) {
965-
// If a read-ahead returns one of these errors, they should be marked with corruption.
966-
// Other I/O related errors should not be marked with corruption and simply returned.
965+
} else if (errors.Is(err, record.ErrUnexpectedEOF) && strictWALTail) ||
966+
errors.Is(err, record.ErrInvalidChunk) || errors.Is(err, record.ErrZeroedChunk) {
967+
// If a read-ahead returns record.ErrInvalidChunk or
968+
// record.ErrZeroedChunk, then there's definitively corruption.
969+
//
970+
// If strictWALTail=true, then record.ErrUnexpectedEOF should
971+
// also be considered corruption because the strictWALTail
972+
// indicates we expect a clean end to the WAL.
973+
//
974+
// Other I/O related errors should not be marked with corruption
975+
// and simply returned.
967976
err = errors.Mark(err, ErrCorruption)
968977
}
969978

record/record.go

Lines changed: 28 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -215,13 +215,28 @@ var (
215215
// header, length, or checksum. This usually occurs when a log is recycled,
216216
// but can also occur due to corruption.
217217
ErrInvalidChunk = errors.New("pebble/record: invalid chunk")
218+
219+
// ErrUnexpectedEOF is returned if a log file ends unexpectedly. It
220+
// indicates the unexpected end of the log file or an in-progress record
221+
// envelope itself. ErrUnexpectedEOF may be returned by Reader when it
222+
// encounters an invalid chunk but observes no evidence that the invalid
223+
// chunk is caused by corruption (i.e., no future chunk indicates that
224+
// offset should be valid and durably synced.)
225+
//
226+
// This error is defined separately from io.ErrUnexpectedEOF to disambiguate
227+
// this case from from the case of an unexpected end of the record's payload
228+
// while decoding at a higher-level (eg, version edit decoding). If a
229+
// higher-level decoding routine returns record.ErrUnexpectedEOF, it
230+
// unambiguously indicates that the log file itself ended unexpectedly. The
231+
// record.Reader will never return io.ErrUnexpectedEOF, just record.ErrUnexpectedEOF.
232+
ErrUnexpectedEOF = errors.New("pebble/record: unexpected EOF")
218233
)
219234

220235
// IsInvalidRecord returns true if the error matches one of the error types
221236
// returned for invalid records. These are treated in a way similar to io.EOF
222237
// in recovery code.
223238
func IsInvalidRecord(err error) bool {
224-
return err == ErrZeroedChunk || err == ErrInvalidChunk || err == io.ErrUnexpectedEOF
239+
return err == ErrZeroedChunk || err == ErrInvalidChunk || err == ErrUnexpectedEOF
225240
}
226241

227242
// Reader reads records from an underlying io.Reader.
@@ -269,7 +284,7 @@ func NewReader(r io.Reader, logNum base.DiskFileNum) *Reader {
269284
logNum: uint32(logNum),
270285
blockNum: -1,
271286
// invalidOffset is initialized as MaxUint64 so that reading ahead
272-
// with the old chunk wire formats results in io.ErrUnexpectedEOF.
287+
// with the old chunk wire formats results in ErrUnexpectedEOF.
273288
invalidOffset: math.MaxUint64,
274289
}
275290
}
@@ -396,7 +411,7 @@ func (r *Reader) nextChunk(wantFirst bool) error {
396411
if err != nil && err != io.ErrUnexpectedEOF {
397412
if err == io.EOF && !wantFirst {
398413
r.invalidOffset = uint64(r.blockNum)*blockSize + uint64(r.begin)
399-
return io.ErrUnexpectedEOF
414+
return ErrUnexpectedEOF
400415
}
401416
return err
402417
}
@@ -454,20 +469,19 @@ func (r *Reader) readAheadForCorruption() error {
454469
}
455470

456471
if errors.Is(err, io.EOF) {
457-
// io.ErrUnexpectedEOF is returned instead of
458-
// io.EOF because io library functions clear
459-
// an error when it is io.EOF. io.ErrUnexpectedEOF
460-
// is returned so that the error is not cleared
461-
// when the io library makes calls to Reader.Read().
472+
// ErrUnexpectedEOF is returned instead of io.EOF because io library
473+
// functions clear an error when it is io.EOF. ErrUnexpectedEOF is
474+
// returned so that the error is not cleared when the io library
475+
// makes calls to Reader.Read().
462476
//
463-
// Since no sync offset was found to indicate that the
464-
// invalid chunk should have been valid, the chunk represents
465-
// an abrupt, unclean termination of the logical log. This
466-
// abrupt end of file represented by io.ErrUnexpectedEOF.
477+
// Since no sync offset was found to indicate that the invalid chunk
478+
// should have been valid, the chunk represents an abrupt, unclean
479+
// termination of the logical log. This abrupt end of file
480+
// represented by ErrUnexpectedEOF.
467481
if r.loggerForTesting != nil {
468-
r.loggerForTesting.logf("\tEncountered io.EOF; returning io.ErrUnexpectedEOF since no sync offset found.\n")
482+
r.loggerForTesting.logf("\tEncountered io.EOF; returning ErrUnexpectedEOF since no sync offset found.\n")
469483
}
470-
return io.ErrUnexpectedEOF
484+
return ErrUnexpectedEOF
471485
}
472486
// The last block of a log can be less than 32KiB, which is
473487
// the length of r.buf. Thus, we should still parse the data in

record/record_test.go

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -439,7 +439,7 @@ func TestCorruptBlock(t *testing.T) {
439439
if err == nil {
440440
t.Fatal("Expected a checksum mismatch error, got nil")
441441
}
442-
if err != io.ErrUnexpectedEOF {
442+
if !errors.Is(err, ErrUnexpectedEOF) {
443443
t.Fatalf("Unexpected error returned: %v", err)
444444
}
445445
}
@@ -584,7 +584,7 @@ func TestSeekRecordEndOfFile(t *testing.T) {
584584
if err == nil {
585585
t.Fatalf("Seek past the end of a file didn't cause an error")
586586
}
587-
if err != io.ErrUnexpectedEOF {
587+
if err != ErrUnexpectedEOF {
588588
t.Fatalf("Seeking past EOF raised unexpected error: %v", err)
589589
}
590590
}
@@ -673,8 +673,8 @@ func TestInvalidLogNum(t *testing.T) {
673673

674674
{
675675
r := NewReader(bytes.NewReader(buf.Bytes()), 2)
676-
if _, err := r.Next(); err != io.ErrUnexpectedEOF {
677-
t.Fatalf("expected %s, but found %s\n", io.ErrUnexpectedEOF, err)
676+
if _, err := r.Next(); !errors.Is(err, ErrUnexpectedEOF) {
677+
t.Fatalf("expected %s, but found %s\n", ErrUnexpectedEOF, err)
678678
}
679679
}
680680
}
@@ -759,15 +759,15 @@ func TestRecycleLog(t *testing.T) {
759759
rr, err := r.Next()
760760
if err != nil {
761761
// If we limited output then an EOF, zeroed, or invalid chunk is expected.
762-
if limitedBuf.limit < 0 && (err == io.EOF || err == io.ErrUnexpectedEOF || err == ErrZeroedChunk || err == ErrInvalidChunk) {
762+
if limitedBuf.limit < 0 && (err == io.EOF || IsInvalidRecord(err)) {
763763
break
764764
}
765765
t.Fatalf("%d/%d: %v", i, j, err)
766766
}
767767
x, err := io.ReadAll(rr)
768768
if err != nil {
769769
// If we limited output then an EOF, zeroed, or invalid chunk is expected.
770-
if limitedBuf.limit < 0 && (err == io.EOF || err == io.ErrUnexpectedEOF || err == ErrZeroedChunk || err == ErrInvalidChunk) {
770+
if limitedBuf.limit < 0 && (err == io.EOF || IsInvalidRecord(err)) {
771771
break
772772
}
773773
t.Fatalf("%d/%d: %v", i, j, err)
@@ -776,7 +776,7 @@ func TestRecycleLog(t *testing.T) {
776776
t.Fatalf("%d/%d: expected record %d, but found %d", i, j, sizes[j], len(x))
777777
}
778778
}
779-
if _, err := r.Next(); err != io.EOF && err != io.ErrUnexpectedEOF && err != ErrZeroedChunk && err != ErrInvalidChunk {
779+
if _, err := r.Next(); err != io.EOF && err != ErrUnexpectedEOF && err != ErrZeroedChunk && err != ErrInvalidChunk {
780780
t.Fatalf("%d: expected EOF, but found %v", i, err)
781781
}
782782
}
@@ -797,7 +797,7 @@ func TestTruncatedLog(t *testing.T) {
797797
rr, err := r.Next()
798798
require.NoError(t, err)
799799
_, err = io.ReadAll(rr)
800-
require.EqualValues(t, err, io.ErrUnexpectedEOF)
800+
require.EqualValues(t, err, ErrUnexpectedEOF)
801801
}
802802

803803
func TestRecycleLogWithPartialBlock(t *testing.T) {
@@ -901,7 +901,7 @@ func TestRecycleLogWithPartialRecord(t *testing.T) {
901901
require.NoError(t, err)
902902

903903
_, err = io.ReadAll(rr)
904-
require.Equal(t, err, io.ErrUnexpectedEOF)
904+
require.True(t, errors.Is(err, ErrUnexpectedEOF))
905905
}
906906

907907
type readerLogger struct {

0 commit comments

Comments
 (0)