@@ -680,3 +680,139 @@ func CidFromBytes(data []byte) (int, Cid, error) {
680
680
681
681
return l , Cid {string (data [0 :l ])}, nil
682
682
}
683
+
684
+ func toBufByteReader (r io.Reader , dst []byte ) * bufByteReader {
685
+ // If the reader already implements ByteReader, use it directly.
686
+ // Otherwise, use a fallback that does 1-byte Reads.
687
+ if br , ok := r .(io.ByteReader ); ok {
688
+ return & bufByteReader {direct : br , dst : dst }
689
+ }
690
+ return & bufByteReader {fallback : r , dst : dst }
691
+ }
692
+
693
+ type bufByteReader struct {
694
+ direct io.ByteReader
695
+ fallback io.Reader
696
+
697
+ dst []byte
698
+ }
699
+
700
+ func (r * bufByteReader ) ReadByte () (byte , error ) {
701
+ // The underlying reader has ReadByte; use it.
702
+ if br := r .direct ; br != nil {
703
+ b , err := br .ReadByte ()
704
+ if err != nil {
705
+ return 0 , err
706
+ }
707
+ r .dst = append (r .dst , b )
708
+ return b , nil
709
+ }
710
+
711
+ // Fall back to a one-byte Read.
712
+ // TODO: consider reading straight into dst,
713
+ // once we have benchmarks and if they prove that to be faster.
714
+ var p [1 ]byte
715
+ if _ , err := io .ReadFull (r .fallback , p [:]); err != nil {
716
+ return 0 , err
717
+ }
718
+ r .dst = append (r .dst , p [0 ])
719
+ return p [0 ], nil
720
+ }
721
+
722
+ // CidFromReader reads a precise number of bytes for a CID from a given reader.
723
+ // It returns the number of bytes read, the CID, and any error encountered.
724
+ // The number of bytes read is accurate even if a non-nil error is returned.
725
+ //
726
+ // It's recommended to supply a reader that buffers and implements io.ByteReader,
727
+ // as CidFromReader has to do many single-byte reads to decode varints.
728
+ // If the argument only implements io.Reader, single-byte Read calls are used instead.
729
+ func CidFromReader (r io.Reader ) (int , Cid , error ) {
730
+ // 64 bytes is enough for any CIDv0,
731
+ // and it's enough for most CIDv1s in practice.
732
+ // If the digest is too long, we'll allocate more.
733
+ br := toBufByteReader (r , make ([]byte , 0 , 64 ))
734
+
735
+ // We read the first varint, to tell if this is a CIDv0 or a CIDv1.
736
+ // The varint package wants a io.ByteReader, so we must wrap our io.Reader.
737
+ vers , err := varint .ReadUvarint (br )
738
+ if err != nil {
739
+ return len (br .dst ), Undef , err
740
+ }
741
+
742
+ // If we have a CIDv0, read the rest of the bytes and cast the buffer.
743
+ if vers == mh .SHA2_256 {
744
+ if n , err := io .ReadFull (r , br .dst [1 :34 ]); err != nil {
745
+ return len (br .dst ) + n , Undef , err
746
+ }
747
+
748
+ br .dst = br .dst [:34 ]
749
+ h , err := mh .Cast (br .dst )
750
+ if err != nil {
751
+ return len (br .dst ), Undef , err
752
+ }
753
+
754
+ return len (br .dst ), Cid {string (h )}, nil
755
+ }
756
+
757
+ if vers != 1 {
758
+ return len (br .dst ), Undef , fmt .Errorf ("expected 1 as the cid version number, got: %d" , vers )
759
+ }
760
+
761
+ // CID block encoding multicodec.
762
+ _ , err = varint .ReadUvarint (br )
763
+ if err != nil {
764
+ return len (br .dst ), Undef , err
765
+ }
766
+
767
+ // We could replace most of the code below with go-multihash's ReadMultihash.
768
+ // Note that it would save code, but prevent reusing buffers.
769
+ // Plus, we already have a ByteReader now.
770
+ mhStart := len (br .dst )
771
+
772
+ // Multihash hash function code.
773
+ _ , err = varint .ReadUvarint (br )
774
+ if err != nil {
775
+ return len (br .dst ), Undef , err
776
+ }
777
+
778
+ // Multihash digest length.
779
+ mhl , err := varint .ReadUvarint (br )
780
+ if err != nil {
781
+ return len (br .dst ), Undef , err
782
+ }
783
+
784
+ // Refuse to make large allocations to prevent OOMs due to bugs.
785
+ const maxDigestAlloc = 32 << 20 // 32MiB
786
+ if mhl > maxDigestAlloc {
787
+ return len (br .dst ), Undef , fmt .Errorf ("refusing to allocate %d bytes for a digest" , mhl )
788
+ }
789
+
790
+ // Fine to convert mhl to int, given maxDigestAlloc.
791
+ prefixLength := len (br .dst )
792
+ cidLength := prefixLength + int (mhl )
793
+ if cidLength > cap (br .dst ) {
794
+ // If the multihash digest doesn't fit in our initial 64 bytes,
795
+ // efficiently extend the slice via append+make.
796
+ br .dst = append (br .dst , make ([]byte , cidLength - cap (br .dst ))... )
797
+ } else {
798
+ // The multihash digest fits inside our buffer,
799
+ // so just extend its capacity.
800
+ br .dst = br .dst [:cidLength ]
801
+ }
802
+
803
+ if n , err := io .ReadFull (r , br .dst [prefixLength :cidLength ]); err != nil {
804
+ // We can't use len(br.dst) here,
805
+ // as we've only read n bytes past prefixLength.
806
+ return prefixLength + n , Undef , err
807
+ }
808
+
809
+ // This simply ensures the multihash is valid.
810
+ // TODO: consider removing this bit, as it's probably redundant;
811
+ // for now, it helps ensure consistency with CidFromBytes.
812
+ _ , _ , err = mh .MHFromBytes (br .dst [mhStart :])
813
+ if err != nil {
814
+ return len (br .dst ), Undef , err
815
+ }
816
+
817
+ return len (br .dst ), Cid {string (br .dst )}, nil
818
+ }
0 commit comments