Skip to content

Commit 5cdbb51

Browse files
authored
BREAKING CHANGE: change Pointer CBOR representation to a kinded union (#60)
From: type Pointer union { &Node "0" Bucket "1" } representation keyed i.e. {"0": CID} or {"1": [KV...]} To: type Pointer union { &Node link Bucket list } representation kinded i.e. CID or [KV...] Also removes redundant refmt tags Closes: https://github.com/ipfs/go-hamt-ipld/issues/53
1 parent 569969d commit 5cdbb51

File tree

3 files changed

+65
-146
lines changed

3 files changed

+65
-146
lines changed

hamt.go

Lines changed: 13 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -60,18 +60,15 @@ var ErrMalformedHamt = fmt.Errorf("HAMT node was malformed")
6060
// array. Indexes `[1]` and `[2]` are not present, but index `[3]` is at
6161
// the second position of our Pointers array.
6262
//
63-
// (Note: the `refmt` tags are ignored by cbor-gen which will generate an
64-
// array type rather than map.)
65-
//
6663
// The IPLD Schema representation of this data structure is as follows:
6764
//
6865
// type Node struct {
6966
// bitfield Bytes
7067
// pointers [Pointer]
7168
// } representation tuple
7269
type Node struct {
73-
Bitfield *big.Int `refmt:"bf"`
74-
Pointers []*Pointer `refmt:"p"`
70+
Bitfield *big.Int
71+
Pointers []*Pointer
7572

7673
bitWidth int
7774
hash func([]byte) []byte
@@ -82,10 +79,11 @@ type Node struct {
8279

8380
// Pointer is an element in a HAMT node's Pointers array, encoded as an IPLD
8481
// tuple in DAG-CBOR of shape:
85-
// {"0": CID} or {"1": [KV...]}
86-
// Where a map with a single key of "0" contains a Link, where a map with a
87-
// single key of "1" contains a KV bucket. The map may contain only one of
88-
// these two possible keys.
82+
// CID or [KV...]
83+
// i.e. it is represented as a "kinded union" where a Link is a pointer to a
84+
// child node, while an array is a bucket of elements local to this node. A
85+
// Pointer must represent exactly one of of these two states and cannot be both
86+
// (or neither).
8987
//
9088
// There are between 1 and 2^bitWidth of these Pointers in any HAMT node.
9189
//
@@ -94,20 +92,17 @@ type Node struct {
9492
// the bucket is replaced with a link to a newly created HAMT node which will
9593
// contain the `bucketSize+1` elements in its own Pointers array.
9694
//
97-
// (Note: the `refmt` tags are ignored by cbor-gen which will generate an
98-
// array type rather than map.)
99-
//
10095
// The IPLD Schema representation of this data structure is as follows:
10196
//
10297
// type Pointer union {
103-
// &Node "0"
104-
// Bucket "1"
105-
// } representation keyed
98+
// &Node link
99+
// Bucket list
100+
// } representation kinded
106101
//
107102
// type Bucket [KV]
108103
type Pointer struct {
109-
KVs []*KV `refmt:"v,omitempty"`
110-
Link cid.Cid `refmt:"l,omitempty"`
104+
KVs []*KV
105+
Link cid.Cid
111106

112107
// cache is a pointer to an in-memory Node, which may or may not be
113108
// present, and corresponds to the Link field, which also may or may not
@@ -389,6 +384,7 @@ func loadNode(
389384
isLink := ch.isShard()
390385
isBucket := ch.KVs != nil
391386
if !((isLink && !isBucket) || (!isLink && isBucket)) {
387+
// Pointer#UnmarshalCBOR shouldn't allow this
392388
return nil, ErrMalformedHamt
393389
}
394390
if isLink && ch.Link.Type() != cid.DagCBOR { // not dag-cbor

hamt_test.go

Lines changed: 19 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -980,10 +980,9 @@ func TestMalformedHamt(t *testing.T) {
980980
bcat(b(0x40+1), b(kv.key)), // bytes(1) "\x??"
981981
bcat(b(0x40+1), b(kv.value)))) // bytes(1) "\x??"
982982
}
983-
return bcat(b(0xa0+1), // map(1)
984-
bcat(b(0x60+1), b(0x31)), // string(1) "1"
985-
bcat(b(0x80+byte(len(kvs))), // array(?)
986-
en)) // bucket contents
983+
return bcat(
984+
b(0x80+byte(len(kvs))), // array(?)
985+
en) // bucket contents
987986
}
988987

989988
// most minimal HAMT node with one k/v entry, sanity check we can load this
@@ -1040,65 +1039,25 @@ func TestMalformedHamt(t *testing.T) {
10401039
t.Fatal("Should have returned ErrMalformedHamt for mismatch bitfield count")
10411040
}
10421041

1043-
// test mixed link & bucket
1044-
1045-
// this node contains 2 elements, the first is a plain entry with one bucket
1046-
// and with a single key of 0x0100, the second element is a link to a child
1047-
// node which happens to be the same CID as this node will be stored in.
1048-
// However, this second entry has both a CID and a bucket in the same
1049-
// element, which is not allowed. Without checks for exactly one of these
1050-
// two things then then a lookup for key 0x0100 would navigate through this
1051-
// node and back again as its own child to the first element.
1052-
store(
1053-
bcat(b(0x80+2), // array(2)
1054-
bcat(b(0x40+1), b(0x03)), // bytes(1) "\x03" (bitmap)
1055-
bcat(b(0x80+2), // array(2)
1056-
bcat(b(0xa0+1), // map(1)
1057-
bcat(b(0x60+1), b(0x31)), // string(1) "1"
1058-
bcat(b(0x80+1), // array(1)
1059-
bcat(b(0x80+2), // array(2)
1060-
bcat(b(0x40+2), []byte{0x01, 0x00}), // bytes(2) "\x0100"
1061-
bcat(b(0x40+1), b(0xff))))), // bytes(1) "\xff"
1062-
bcat(b(0xa0+2), // map(2)
1063-
bcat(b(0x60+1), b(0x30)), // string(1) "0"
1064-
bcat(b(0xd8), b(0x2a), // tag(42)
1065-
b(0x58), b(0x27), // bytes(39)
1066-
cidBytes), // cid
1067-
bcat(b(0x60+1), b(0x31)), // string(1) "1"
1068-
bcat(b(0x80+1), // array(1)
1069-
bcat(b(0x80+2), // array(2)
1070-
bcat(b(0x40+1), b(0x01)), // bytes(1) "\x00"
1071-
bcat(b(0x40+1), b(0xfe)))))))) // bytes(1) "\xff
1072-
1073-
n, err = LoadNode(ctx, cs, bcid, UseTreeBitWidth(8), UseHashFunction(identityHash))
1074-
if err == nil || n != nil || err.Error() != "Pointers should be a single element map" {
1075-
// no ErrMalformedHamt here possible bcause of cbor-gen wrapping
1076-
t.Fatal("Should have returned error for bad Pointer cbor")
1077-
}
1078-
10791042
// test pointers with links have are DAG-CBOR multicodec
10801043
// sanity check minimal node pointing to a child node
10811044
store(
10821045
bcat(b(0x80+2), // array(2)
10831046
bcat(b(0x40+1), b(0x01)), // bytes(1) "\x01" (bitmap)
10841047
bcat(b(0x80+1), // array(1)
1085-
bcat(b(0xa0+1), // map(1)
1086-
bcat(b(0x60+1), b(0x30)), // string(1) "0"
1087-
bcat(b(0xd8), b(0x2a), // tag(42)
1088-
b(0x58), b(0x27), // bytes(39)
1089-
cidBytes))))) // cid
1048+
bcat(b(0xd8), b(0x2a), // tag(42)
1049+
b(0x58), b(0x27), // bytes(39)
1050+
cidBytes)))) // cid
10901051
load()
10911052

10921053
// node pointing to a non-dag-cbor node
10931054
store(
10941055
bcat(b(0x80+2), // array(2)
10951056
bcat(b(0x40+1), b(0x01)), // bytes(1) "\x01" (bitmap)
10961057
bcat(b(0x80+1), // array(1)
1097-
bcat(b(0xa0+1), // map(1)
1098-
bcat(b(0x60+1), b(0x30)), // string(1) "0"
1099-
bcat(b(0xd8), b(0x2a), // tag(42)
1100-
b(0x58), b(0x27), // bytes(39)
1101-
badCidBytes))))) // cid
1058+
bcat(b(0xd8), b(0x2a), // tag(42)
1059+
b(0x58), b(0x27), // bytes(39)
1060+
badCidBytes)))) // cid
11021061
n, err = LoadNode(ctx, cs, bcid, UseTreeBitWidth(8), UseHashFunction(identityHash))
11031062
if err != ErrMalformedHamt || n != nil {
11041063
t.Fatal("Should have returned ErrMalformedHamt for bad child link codec")
@@ -1178,11 +1137,9 @@ func TestMalformedHamt(t *testing.T) {
11781137
bcat(b(0x80+2), // array(2)
11791138
bcat(b(0x40+1), b(0x01)), // bytes(1) "\x01" (bitmap)
11801139
bcat(b(0x80+1), // array(1)
1181-
bcat(b(0xa0+1), // map(1)
1182-
bcat(b(0x60+1), b(0x30)), // string(1) "0"
1183-
bcat(b(0xd8), b(0x2a), // tag(42)
1184-
b(0x58), b(0x27), // bytes(39)
1185-
ccidBytes))))) // cid
1140+
bcat(b(0xd8), b(0x2a), // tag(42)
1141+
b(0x58), b(0x27), // bytes(39)
1142+
ccidBytes)))) // cid
11861143

11871144
vg, err := load().FindRaw(ctx, string([]byte{0x00, 0x01}))
11881145
// without validation of the child block, this would return an ErrNotFound
@@ -1221,17 +1178,13 @@ func TestMalformedHamt(t *testing.T) {
12211178
bcat(b(0x80+2), // array(2)
12221179
bcat(b(0x40+1), b(0x03)), // bytes(1) "\x03" (bitmap)
12231180
bcat(b(0x80+2), // array(2)
1224-
bcat(b(0xa0+1), // map(1)
1225-
bcat(b(0x60+1), b(0x31)), // string(1) "1"
1226-
bcat(b(0x80+1), // array(1)
1227-
bcat(b(0x80+2), // array(2)
1228-
bcat(b(0x40+2), []byte{0x00, 0x01}), // bytes(2) "\x0001"
1229-
bcat(b(0x40+1), b(0xff))))), // bytes(1) "\xff"
1230-
bcat(b(0xa0+1), // map(1)
1231-
bcat(b(0x60+1), b(0x30)), // string(1) "0"
1232-
bcat(b(0xd8), b(0x2a), // tag(42)
1233-
b(0x58), b(0x27), // bytes(39)
1234-
ccidBytes))))) // cid
1181+
bcat(b(0x80+1), // array(1)
1182+
bcat(b(0x80+2), // array(2)
1183+
bcat(b(0x40+2), []byte{0x00, 0x01}), // bytes(2) "\x0001"
1184+
bcat(b(0x40+1), b(0xff)))), // bytes(1) "\xff"
1185+
bcat(b(0xd8), b(0x2a), // tag(42)
1186+
b(0x58), b(0x27), // bytes(39)
1187+
ccidBytes)))) // cid
12351188

12361189
vg, err = load().FindRaw(ctx, string([]byte{0x00, 0x01}))
12371190
// without validation of the child block, this would return an ErrNotFound

pointer_cbor.go

Lines changed: 33 additions & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@ import (
88
cbg "github.com/whyrusleeping/cbor-gen"
99
)
1010

11-
var keyZero = []byte("0")
12-
var keyOne = []byte("1")
11+
// implemented as a kinded union - a "Pointer" is either a Link (child node) or
12+
// an Array (bucket)
1313

1414
func (t *Pointer) MarshalCBOR(w io.Writer) error {
1515
if t.Link != cid.Undef && len(t.KVs) > 0 {
@@ -18,36 +18,11 @@ func (t *Pointer) MarshalCBOR(w io.Writer) error {
1818

1919
scratch := make([]byte, 9)
2020

21-
if err := cbg.WriteMajorTypeHeaderBuf(scratch, w, cbg.MajMap, 1); err != nil {
22-
return err
23-
}
24-
2521
if t.Link != cid.Undef {
26-
// key for links is "0"
27-
// Refmt (and the general IPLD data model currently) can't deal
28-
// with non string keys. So we have this weird restriction right now
29-
// hoping to be able to use integer keys soon
30-
if err := cbg.WriteMajorTypeHeaderBuf(scratch, w, cbg.MajTextString, 1); err != nil {
31-
return err
32-
}
33-
34-
if _, err := w.Write(keyZero); err != nil {
35-
return err
36-
}
37-
3822
if err := cbg.WriteCidBuf(scratch, w, t.Link); err != nil {
3923
return err
4024
}
4125
} else {
42-
// key for KVs is "1"
43-
if err := cbg.WriteMajorTypeHeaderBuf(scratch, w, cbg.MajTextString, 1); err != nil {
44-
return err
45-
}
46-
47-
if _, err := w.Write(keyOne); err != nil {
48-
return err
49-
}
50-
5126
if err := cbg.WriteMajorTypeHeaderBuf(scratch, w, cbg.MajArray, uint64(len(t.KVs))); err != nil {
5227
return err
5328
}
@@ -69,51 +44,29 @@ func (t *Pointer) UnmarshalCBOR(br io.Reader) error {
6944
if err != nil {
7045
return err
7146
}
72-
if maj != cbg.MajMap {
73-
return fmt.Errorf("cbor input should be of map")
74-
}
75-
76-
if extra != 1 {
77-
return fmt.Errorf("Pointers should be a single element map")
78-
}
79-
80-
maj, val, err := cbg.CborReadHeaderBuf(br, scratch)
81-
if err != nil {
82-
return err
83-
}
8447

85-
if maj != cbg.MajTextString {
86-
return fmt.Errorf("expected text string key")
87-
}
88-
89-
if val != 1 {
90-
return fmt.Errorf("map keys in pointers must be a single byte long")
91-
}
92-
93-
if _, err := io.ReadAtLeast(br, scratch[:1], 1); err != nil {
94-
return err
95-
}
48+
if maj == cbg.MajTag {
49+
if extra != 42 {
50+
return fmt.Errorf("expected tag 42 for child node link")
51+
}
9652

97-
switch scratch[0] {
98-
case '0':
99-
c, err := cbg.ReadCid(br)
53+
ba, err := cbg.ReadByteArray(br, 512)
10054
if err != nil {
10155
return err
10256
}
103-
t.Link = c
104-
return nil
105-
case '1':
106-
maj, length, err := cbg.CborReadHeaderBuf(br, scratch)
57+
58+
c, err := bufToCid(ba)
10759
if err != nil {
10860
return err
10961
}
11062

111-
if maj != cbg.MajArray {
112-
return fmt.Errorf("expected an array of KVs in cbor input")
113-
}
63+
t.Link = c
64+
return nil
65+
} else if maj == cbg.MajArray {
66+
length := extra
11467

11568
if length > 32 {
116-
return fmt.Errorf("KV array in cbor input for pointer was too long")
69+
return fmt.Errorf("KV array in CBOR input for pointer was too long")
11770
}
11871

11972
t.KVs = make([]*KV, length)
@@ -127,7 +80,24 @@ func (t *Pointer) UnmarshalCBOR(br io.Reader) error {
12780
}
12881

12982
return nil
130-
default:
131-
return fmt.Errorf("invalid pointer map key in cbor input: %d", val)
83+
} else {
84+
return fmt.Errorf("expected CBOR child node link or array")
85+
}
86+
}
87+
88+
// from https://github.com/whyrusleeping/cbor-gen/blob/211df3b9e24c6e0d0c338b440e6ab4ab298505b2/utils.go#L530
89+
func bufToCid(buf []byte) (cid.Cid, error) {
90+
if len(buf) == 0 {
91+
return cid.Undef, fmt.Errorf("undefined CID")
13292
}
93+
94+
if len(buf) < 2 {
95+
return cid.Undef, fmt.Errorf("DAG-CBOR serialized CIDs must have at least two bytes")
96+
}
97+
98+
if buf[0] != 0 {
99+
return cid.Undef, fmt.Errorf("DAG-CBOR serialized CIDs must have binary multibase")
100+
}
101+
102+
return cid.Cast(buf[1:])
133103
}

0 commit comments

Comments
 (0)