Skip to content

Commit c4c8760

Browse files
committed
implement CidFromReader
And reuse two CidFromBytes tests for it, which includes both CIDv0 and CIDv1 cases as inputs, as well as some inputs that should error. Fixes #126.
1 parent 8e9280d commit c4c8760

File tree

2 files changed

+226
-41
lines changed

2 files changed

+226
-41
lines changed

cid.go

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -680,3 +680,139 @@ func CidFromBytes(data []byte) (int, Cid, error) {
680680

681681
return l, Cid{string(data[0:l])}, nil
682682
}
683+
684+
func toBufByteReader(r io.Reader, dst []byte) *bufByteReader {
685+
// If the reader already implements ByteReader, use it directly.
686+
// Otherwise, use a fallback that does 1-byte Reads.
687+
if br, ok := r.(io.ByteReader); ok {
688+
return &bufByteReader{direct: br, dst: dst}
689+
}
690+
return &bufByteReader{fallback: r, dst: dst}
691+
}
692+
693+
type bufByteReader struct {
694+
direct io.ByteReader
695+
fallback io.Reader
696+
697+
dst []byte
698+
}
699+
700+
func (r *bufByteReader) ReadByte() (byte, error) {
701+
// The underlying reader has ReadByte; use it.
702+
if br := r.direct; br != nil {
703+
b, err := br.ReadByte()
704+
if err != nil {
705+
return 0, err
706+
}
707+
r.dst = append(r.dst, b)
708+
return b, nil
709+
}
710+
711+
// Fall back to a one-byte Read.
712+
// TODO: consider reading straight into dst,
713+
// once we have benchmarks and if they prove that to be faster.
714+
var p [1]byte
715+
if _, err := io.ReadFull(r.fallback, p[:]); err != nil {
716+
return 0, err
717+
}
718+
r.dst = append(r.dst, p[0])
719+
return p[0], nil
720+
}
721+
722+
// CidFromReader reads a precise number of bytes for a CID from a given reader.
723+
// It returns the number of bytes read, the CID, and any error encountered.
724+
// The number of bytes read is accurate even if a non-nil error is returned.
725+
//
726+
// It's recommended to supply a reader that buffers and implements io.ByteReader,
727+
// as CidFromReader has to do many single-byte reads to decode varints.
728+
// If the argument only implements io.Reader, single-byte Read calls are used instead.
729+
func CidFromReader(r io.Reader) (int, Cid, error) {
730+
// 64 bytes is enough for any CIDv0,
731+
// and it's enough for most CIDv1s in practice.
732+
// If the digest is too long, we'll allocate more.
733+
br := toBufByteReader(r, make([]byte, 0, 64))
734+
735+
// We read the first varint, to tell if this is a CIDv0 or a CIDv1.
736+
// The varint package wants a io.ByteReader, so we must wrap our io.Reader.
737+
vers, err := varint.ReadUvarint(br)
738+
if err != nil {
739+
return len(br.dst), Undef, err
740+
}
741+
742+
// If we have a CIDv0, read the rest of the bytes and cast the buffer.
743+
if vers == mh.SHA2_256 {
744+
if n, err := io.ReadFull(r, br.dst[1:34]); err != nil {
745+
return len(br.dst) + n, Undef, err
746+
}
747+
748+
br.dst = br.dst[:34]
749+
h, err := mh.Cast(br.dst)
750+
if err != nil {
751+
return len(br.dst), Undef, err
752+
}
753+
754+
return len(br.dst), Cid{string(h)}, nil
755+
}
756+
757+
if vers != 1 {
758+
return len(br.dst), Undef, fmt.Errorf("expected 1 as the cid version number, got: %d", vers)
759+
}
760+
761+
// CID block encoding multicodec.
762+
_, err = varint.ReadUvarint(br)
763+
if err != nil {
764+
return len(br.dst), Undef, err
765+
}
766+
767+
// We could replace most of the code below with go-multihash's ReadMultihash.
768+
// Note that it would save code, but prevent reusing buffers.
769+
// Plus, we already have a ByteReader now.
770+
mhStart := len(br.dst)
771+
772+
// Multihash hash function code.
773+
_, err = varint.ReadUvarint(br)
774+
if err != nil {
775+
return len(br.dst), Undef, err
776+
}
777+
778+
// Multihash digest length.
779+
mhl, err := varint.ReadUvarint(br)
780+
if err != nil {
781+
return len(br.dst), Undef, err
782+
}
783+
784+
// Refuse to make large allocations to prevent OOMs due to bugs.
785+
const maxDigestAlloc = 32 << 20 // 32MiB
786+
if mhl > maxDigestAlloc {
787+
return len(br.dst), Undef, fmt.Errorf("refusing to allocate %d bytes for a digest", mhl)
788+
}
789+
790+
// Fine to convert mhl to int, given maxDigestAlloc.
791+
prefixLength := len(br.dst)
792+
cidLength := prefixLength + int(mhl)
793+
if cidLength > cap(br.dst) {
794+
// If the multihash digest doesn't fit in our initial 64 bytes,
795+
// efficiently extend the slice via append+make.
796+
br.dst = append(br.dst, make([]byte, cidLength-cap(br.dst))...)
797+
} else {
798+
// The multihash digest fits inside our buffer,
799+
// so just extend its capacity.
800+
br.dst = br.dst[:cidLength]
801+
}
802+
803+
if n, err := io.ReadFull(r, br.dst[prefixLength:cidLength]); err != nil {
804+
// We can't use len(br.dst) here,
805+
// as we've only read n bytes past prefixLength.
806+
return prefixLength + n, Undef, err
807+
}
808+
809+
// This simply ensures the multihash is valid.
810+
// TODO: consider removing this bit, as it's probably redundant;
811+
// for now, it helps ensure consistency with CidFromBytes.
812+
_, _, err = mh.MHFromBytes(br.dst[mhStart:])
813+
if err != nil {
814+
return len(br.dst), Undef, err
815+
}
816+
817+
return len(br.dst), Cid{string(br.dst)}, nil
818+
}

cid_test.go

Lines changed: 90 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,12 @@ import (
44
"bytes"
55
"encoding/json"
66
"fmt"
7+
"io"
78
"math/rand"
89
"reflect"
910
"strings"
1011
"testing"
12+
"testing/iotest"
1113

1214
mbase "github.com/multiformats/go-multibase"
1315
mh "github.com/multiformats/go-multihash"
@@ -692,51 +694,98 @@ func TestReadCidsFromBuffer(t *testing.T) {
692694
if cur != len(buf) {
693695
t.Fatal("had trailing bytes")
694696
}
695-
}
696697

697-
func TestBadCidFromBytes(t *testing.T) {
698-
l, c, err := CidFromBytes([]byte{mh.SHA2_256, 32, 0x00})
699-
if err == nil {
700-
t.Fatal("expected not-enough-bytes for V0 CidFromBytes")
701-
}
702-
if l != 0 {
703-
t.Fatal("expected length=0 from bad CidFromBytes")
704-
}
705-
if c != Undef {
706-
t.Fatal("expected Undef CID from bad CidFromBytes")
707-
}
698+
// The same, but now with CidFromReader.
699+
// In multiple forms, to catch more io interface bugs.
700+
for _, r := range []io.Reader{
701+
// implements io.ByteReader
702+
bytes.NewReader(buf),
708703

709-
c, err = Decode("bafkreie5qrjvaw64n4tjm6hbnm7fnqvcssfed4whsjqxzslbd3jwhsk3mm")
710-
if err != nil {
711-
t.Fatal(err)
712-
}
713-
byts := make([]byte, c.ByteLen())
714-
copy(byts, c.Bytes())
715-
byts[1] = 0x80 // bad codec varint
716-
byts[2] = 0x00
717-
l, c, err = CidFromBytes(byts)
718-
if err == nil {
719-
t.Fatal("expected not-enough-bytes for V1 CidFromBytes")
720-
}
721-
if l != 0 {
722-
t.Fatal("expected length=0 from bad CidFromBytes")
723-
}
724-
if c != Undef {
725-
t.Fatal("expected Undef CID from bad CidFromBytes")
704+
// tiny reads, no io.ByteReader
705+
iotest.OneByteReader(bytes.NewReader(buf)),
706+
} {
707+
cur = 0
708+
for _, expc := range cids {
709+
n, c, err := CidFromReader(r)
710+
if err != nil {
711+
t.Fatal(err)
712+
}
713+
if c != expc {
714+
t.Fatal("cids mismatched")
715+
}
716+
cur += n
717+
}
718+
if cur != len(buf) {
719+
t.Fatal("had trailing bytes")
720+
}
726721
}
722+
}
727723

728-
copy(byts, c.Bytes())
729-
byts[2] = 0x80 // bad multihash varint
730-
byts[3] = 0x00
731-
l, c, err = CidFromBytes(byts)
732-
if err == nil {
733-
t.Fatal("expected not-enough-bytes for V1 CidFromBytes")
734-
}
735-
if l != 0 {
736-
t.Fatal("expected length=0 from bad CidFromBytes")
737-
}
738-
if c != Undef {
739-
t.Fatal("expected Undef CID from bad CidFromBytes")
724+
func TestBadCidInput(t *testing.T) {
725+
for _, name := range []string{
726+
"FromBytes",
727+
"FromReader",
728+
} {
729+
t.Run(name, func(t *testing.T) {
730+
usingReader := name == "FromReader"
731+
732+
fromBytes := CidFromBytes
733+
if usingReader {
734+
fromBytes = func(data []byte) (int, Cid, error) {
735+
return CidFromReader(bytes.NewReader(data))
736+
}
737+
}
738+
739+
l, c, err := fromBytes([]byte{mh.SHA2_256, 32, 0x00})
740+
if err == nil {
741+
t.Fatal("expected not-enough-bytes for V0 CID")
742+
}
743+
if !usingReader && l != 0 {
744+
t.Fatal("expected length==0 from bad CID")
745+
} else if usingReader && l == 0 {
746+
t.Fatal("expected length!=0 from bad CID")
747+
}
748+
if c != Undef {
749+
t.Fatal("expected Undef CID from bad CID")
750+
}
751+
752+
c, err = Decode("bafkreie5qrjvaw64n4tjm6hbnm7fnqvcssfed4whsjqxzslbd3jwhsk3mm")
753+
if err != nil {
754+
t.Fatal(err)
755+
}
756+
byts := make([]byte, c.ByteLen())
757+
copy(byts, c.Bytes())
758+
byts[1] = 0x80 // bad codec varint
759+
byts[2] = 0x00
760+
l, c, err = fromBytes(byts)
761+
if err == nil {
762+
t.Fatal("expected not-enough-bytes for V1 CID")
763+
}
764+
if !usingReader && l != 0 {
765+
t.Fatal("expected length==0 from bad CID")
766+
} else if usingReader && l == 0 {
767+
t.Fatal("expected length!=0 from bad CID")
768+
}
769+
if c != Undef {
770+
t.Fatal("expected Undef CID from bad CID")
771+
}
772+
773+
copy(byts, c.Bytes())
774+
byts[2] = 0x80 // bad multihash varint
775+
byts[3] = 0x00
776+
l, c, err = fromBytes(byts)
777+
if err == nil {
778+
t.Fatal("expected not-enough-bytes for V1 CID")
779+
}
780+
if !usingReader && l != 0 {
781+
t.Fatal("expected length==0 from bad CID")
782+
} else if usingReader && l == 0 {
783+
t.Fatal("expected length!=0 from bad CID")
784+
}
785+
if c != Undef {
786+
t.Fatal("expected Undef CID from bad CidFromBytes")
787+
}
788+
})
740789
}
741790
}
742791

0 commit comments

Comments
 (0)