@@ -110,16 +110,9 @@ type Pointer struct {
110
110
Link cid.Cid `refmt:"l,omitempty"`
111
111
112
112
// cached node to avoid too many serialization operations
113
- // TODO(rvagg): we should check that this is actually used optimally. Flush()
114
- // performs a save of all of the cached nodes, but both Copy() and loadChild()
115
- // will set them. In the case of loadChild() we're not expecting a mutation so
116
- // a save is likely going to mean we incur unnecessary serialization when
117
- // we've simply inspected the tree. Copy() will only set a cached form if
118
- // it already exists on the source. It's unclear exactly what Flush() is good
119
- // for in its current form. Users may also need an advisory about memory
120
- // usage of large graphs since they don't have control over this outside of
121
- // Flush().
122
113
cache * Node
114
+ // dirty flag to indicate that the cached node needs to be flushed
115
+ dirty bool
123
116
}
124
117
125
118
// KV represents leaf storage within a HAMT node. A Pointer may hold up to
@@ -451,7 +444,7 @@ func (n *Node) checkSize(ctx context.Context) (uint64, error) {
451
444
// child pointer exists in cached form.
452
445
func (n * Node ) Flush (ctx context.Context ) error {
453
446
for _ , p := range n .Pointers {
454
- if p .cache != nil {
447
+ if p .cache != nil && p . dirty {
455
448
if err := p .cache .Flush (ctx ); err != nil {
456
449
return err
457
450
}
@@ -461,7 +454,7 @@ func (n *Node) Flush(ctx context.Context) error {
461
454
return err
462
455
}
463
456
464
- p .cache = nil
457
+ p .dirty = false
465
458
p .Link = c
466
459
}
467
460
}
@@ -613,6 +606,8 @@ func (n *Node) modifyValue(ctx context.Context, hv *hashBits, k []byte, v *cbg.D
613
606
return err
614
607
}
615
608
609
+ child .dirty = true
610
+
616
611
// CHAMP optimization, ensure the HAMT retains its canonical form for the
617
612
// current data it contains. This may involve collapsing child nodes if
618
613
// they no longer contain enough elements to justify their stand-alone
@@ -659,13 +654,6 @@ func (n *Node) modifyValue(ctx context.Context, hv *hashBits, k []byte, v *cbg.D
659
654
if len (child .KVs ) >= bucketSize {
660
655
// bucket is full, create a child node (shard) with all existing bucket
661
656
// elements plus the new one and set it in the place of the bucket
662
- // TODO(rvagg): this all of the modifyValue() calls are going to result
663
- // in a store.Put(), this could be improved by allowing NewNode() to take
664
- // the bulk set of elements, or modifying modifyValue() for the case
665
- // where we know for sure that the elements will go into buckets and
666
- // not cause an overflow - i.e. we just need to take each element, hash it
667
- // and consume the correct number of bytes off the digest and figure out
668
- // where it should be in the new node.
669
657
sub := NewNode (n .store )
670
658
sub .bitWidth = n .bitWidth
671
659
sub .hash = n .hash
@@ -675,18 +663,13 @@ func (n *Node) modifyValue(ctx context.Context, hv *hashBits, k []byte, v *cbg.D
675
663
}
676
664
677
665
for _ , p := range child .KVs {
678
- chhv := & hashBits {b : n .hash ([] byte ( p .Key ) ), consumed : hv .consumed }
666
+ chhv := & hashBits {b : n .hash (p .Key ), consumed : hv .consumed }
679
667
if err := sub .modifyValue (ctx , chhv , p .Key , p .Value ); err != nil {
680
668
return err
681
669
}
682
670
}
683
671
684
- c , err := n .store .Put (ctx , sub )
685
- if err != nil {
686
- return err
687
- }
688
-
689
- return n .setPointer (cindex , & Pointer {Link : c })
672
+ return n .setPointer (cindex , & Pointer {cache : sub , dirty : true })
690
673
}
691
674
692
675
// otherwise insert the new element into the array in order, the ordering is
@@ -768,6 +751,7 @@ func (n *Node) Copy() *Node {
768
751
pp := & Pointer {}
769
752
if p .cache != nil {
770
753
pp .cache = p .cache .Copy ()
754
+ pp .dirty = p .dirty
771
755
}
772
756
pp .Link = p .Link
773
757
if p .KVs != nil {
@@ -785,7 +769,7 @@ func (n *Node) Copy() *Node {
785
769
// Pointers elements can either contain a bucket of local elements or be a
786
770
// link to a child node. In the case of a link, isShard() returns true.
787
771
func (p * Pointer ) isShard () bool {
788
- return p .Link .Defined ()
772
+ return p .cache != nil || p . Link .Defined ()
789
773
}
790
774
791
775
// ForEach recursively calls function f on each k / val pair found in the HAMT.
0 commit comments