Skip to content

Commit a4177d2

Browse files
committed
Introduce event stats on mock blockstore.
These are both reported as metrics, and also actively logged if repeated puts are detected in any number. (It is necessary to do both, because repeated puts could otherwise end up rounded down to zero, and I'd in fact like to see if the number is at all nonzero.) I also wanted to try to make some use of these numbers within the addAndRemoveKeys function... however, at the moment, that goes over like a lead balloon. Immediately after the first Set operations, and before the Flush, there may still be some Put operations! (In practice, I saw both 0 and 1 in the tests that already call the addAndRemoveKeys test helper function; just enough variance to make asserting on it not fly.) The reason for these Puts-without-Flush is that `modifyValue` does a `store.Put`... but, only in the path where the KV's array is full and it creates a new subshard: it then puts the new subshard (and not, arguably surprisingly, itself). Not sure I entirely grok the high level reasoning behind this. And these Puts are tad difficult to predict, short of evaluating the full hamt algorithm itself. These Puts-without-Flush also explains why the graphs for BenchmarkFill-blocks-per-entry-vs-scale.svg were so uncorrelated, as remarked on in the previous commit: the number of blocks generated is largely affected by how many changes are accumulated before Put starts to occur; and if things aren't *actually* being buffered up until Flush is called, then of course we get plenty of Put operations no matter what. This may indicate a problem: if someone was expecting batching to really work for garbage avoidance reasons, it... doesn't look like it really does. (It's possible this hasn't actually been evident in any of the workflows people are using this hamt for, though; if the workload is doing various point changes, the effectiveness of this batching doesn't matter very much.) The countSize method gains some docs, because in the first writing of this, I reported these blockstore event stats *after* countSize... and yeah, oof, that's a measurement error alright. (You'll see many, many, many duplicate puts if you do things in this order.)
1 parent f9a26f1 commit a4177d2

File tree

3 files changed

+39
-10
lines changed

3 files changed

+39
-10
lines changed

hamt.go

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -358,10 +358,16 @@ func LoadNode(ctx context.Context, cs cbor.IpldStore, c cid.Cid, options ...Opti
358358
return &out, nil
359359
}
360360

361-
// Calculate the total _byte weight_ of the HAMT by fetching each node
362-
// from the IpldStore and adding its raw byte size to the total. This
363-
// operation will exhaustively load every node of the HAMT so should not
364-
// be used lightly.
361+
// checkSize computes the total serialized size of the entire HAMT.
362+
// It both puts and loads blocks as necesary to do this
363+
// (using the Put operation and a paired Get to discover the serial size,
364+
// and the load to move recursively as necessary).
365+
//
366+
// This is an expensive operation and should only be used in testing and analysis.
367+
//
368+
// Note that checkSize *does* actually *use the blockstore*: therefore it
369+
// will affect get and put counts (and makes no attempt to avoid duplicate puts!);
370+
// be aware of this if you are measuring those event counts.
365371
func (n *Node) checkSize(ctx context.Context) (uint64, error) {
366372
c, err := n.store.Put(ctx, n)
367373
if err != nil {

hamt_bench_test.go

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -125,13 +125,18 @@ func BenchmarkFill(b *testing.B) {
125125
b.Fatal(err)
126126
}
127127
b.StopTimer()
128+
if i < 3 {
129+
//b.Logf("block size histogram: %v\n", blockstore.getBlockSizesHistogram())
130+
}
131+
if blockstore.stats.evtcntPutDup > 0 {
132+
b.Logf("on round N=%d: blockstore stats: %#v\n", b.N, blockstore.stats) // note: must refer to this before doing `n.checkSize`; that function has many effects.
133+
}
134+
b.ReportMetric(float64(blockstore.stats.evtcntGet)/float64(t.kcount*1000), "getEvts/entry")
135+
b.ReportMetric(float64(blockstore.stats.evtcntPut)/float64(t.kcount*1000), "putEvts/entry")
128136
b.ReportMetric(float64(len(blockstore.data))/float64(t.kcount*1000), "blocks/entry")
129137
binarySize, _ := n.checkSize(context.Background())
130138
b.ReportMetric(float64(binarySize)/float64(t.kcount*1000), "bytes(hamtAccnt)/entry")
131139
b.ReportMetric(float64(blockstore.totalBlockSizes())/float64(t.kcount*1000), "bytes(blockstoreAccnt)/entry")
132-
if i < 3 {
133-
//b.Logf("block size histogram: %v\n", blockstore.getBlockSizesHistogram())
134-
}
135140
b.StartTimer()
136141
}
137142
})
@@ -177,6 +182,7 @@ func doBenchmarkSetSuite(b *testing.B, flushPer bool) {
177182
}
178183
initalBlockstoreSize := len(blockstore.data)
179184
b.ResetTimer()
185+
blockstore.stats = blockstoreStats{}
180186
// Additional inserts:
181187
b.ReportAllocs()
182188
for j := 0; j < 1000; j++ {
@@ -195,10 +201,15 @@ func doBenchmarkSetSuite(b *testing.B, flushPer bool) {
195201
}
196202
}
197203
b.StopTimer()
198-
b.ReportMetric(float64(len(blockstore.data)-initalBlockstoreSize)/float64(1000), "addntlBlocks/addntlEntry")
199204
if i < 3 {
200205
// b.Logf("block size histogram: %v\n", blockstore.getBlockSizesHistogram())
201206
}
207+
if blockstore.stats.evtcntPutDup > 0 {
208+
b.Logf("on round N=%d: blockstore stats: %#v\n", b.N, blockstore.stats)
209+
}
210+
b.ReportMetric(float64(blockstore.stats.evtcntGet)/float64(t.kcount*1000), "getEvts/entry")
211+
b.ReportMetric(float64(blockstore.stats.evtcntPut)/float64(t.kcount*1000), "putEvts/entry")
212+
b.ReportMetric(float64(len(blockstore.data)-initalBlockstoreSize)/float64(1000), "addntlBlocks/addntlEntry")
202213
b.StartTimer()
203214
}
204215
})

hamt_test.go

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,16 @@ import (
1919
)
2020

2121
type mockBlocks struct {
22-
data map[cid.Cid]block.Block
22+
data map[cid.Cid]block.Block
23+
stats blockstoreStats
2324
}
2425

2526
func newMockBlocks() *mockBlocks {
26-
return &mockBlocks{make(map[cid.Cid]block.Block)}
27+
return &mockBlocks{make(map[cid.Cid]block.Block), blockstoreStats{}}
2728
}
2829

2930
func (mb *mockBlocks) Get(c cid.Cid) (block.Block, error) {
31+
mb.stats.evtcntGet++
3032
d, ok := mb.data[c]
3133
if ok {
3234
return d, nil
@@ -35,10 +37,20 @@ func (mb *mockBlocks) Get(c cid.Cid) (block.Block, error) {
3537
}
3638

3739
func (mb *mockBlocks) Put(b block.Block) error {
40+
mb.stats.evtcntPut++
41+
if _, exists := mb.data[b.Cid()]; exists {
42+
mb.stats.evtcntPutDup++
43+
}
3844
mb.data[b.Cid()] = b
3945
return nil
4046
}
4147

48+
type blockstoreStats struct {
49+
evtcntGet int
50+
evtcntPut int
51+
evtcntPutDup int
52+
}
53+
4254
func (mb *mockBlocks) totalBlockSizes() int {
4355
sum := 0
4456
for _, v := range mb.data {

0 commit comments

Comments
 (0)