Introduce event stats on mock blockstore.

warpfork · warpfork · commit a4177d2d7024 · 2020-08-10T15:12:27.000+02:00
These are both reported as metrics, and also actively logged if
repeated puts are detected in any number.  (It is necessary to do
both, because repeated puts could otherwise end up rounded down to
zero, and I'd in fact like to see if the number is at all nonzero.)

I also wanted to try to make some use of these numbers within the
addAndRemoveKeys function... however, at the moment, that goes over
like a lead balloon.  Immediately after the first Set operations,
and before the Flush, there may still be some Put operations!  (In
practice, I saw both 0 and 1 in the tests that already call the
addAndRemoveKeys test helper function; just enough variance to make
asserting on it not fly.)

The reason for these Puts-without-Flush is that `modifyValue` does
a `store.Put`... but, only in the path where the KV's array is full
and it creates a new subshard: it then puts the new subshard (and not,
arguably surprisingly, itself).  Not sure I entirely grok the high
level reasoning behind this.  And these Puts are tad difficult to
predict, short of evaluating the full hamt algorithm itself.

These Puts-without-Flush also explains why the graphs for
BenchmarkFill-blocks-per-entry-vs-scale.svg were so uncorrelated,
as remarked on in the previous commit: the number of blocks generated
is largely affected by how many changes are accumulated before Put
starts to occur; and if things aren't *actually* being buffered up
until Flush is called, then of course we get plenty of Put operations
no matter what.  This may indicate a problem: if someone was expecting
batching to really work for garbage avoidance reasons, it... doesn't
look like it really does.  (It's possible this hasn't actually been
evident in any of the workflows people are using this hamt for, though;
if the workload is doing various point changes, the effectiveness of
this batching doesn't matter very much.)

The countSize method gains some docs, because in the first writing of
this, I reported these blockstore event stats *after* countSize...
and yeah, oof, that's a measurement error alright.  (You'll see many,
many, many duplicate puts if you do things in this order.)
diff --git a/hamt.go b/hamt.go
@@ -358,10 +358,16 @@ func LoadNode(ctx context.Context, cs cbor.IpldStore, c cid.Cid, options ...Opti
 	return &out, nil
 }
 
-// Calculate the total _byte weight_ of the HAMT by fetching each node
-// from the IpldStore and adding its raw byte size to the total. This
-// operation will exhaustively load every node of the HAMT so should not
-// be used lightly.
+// checkSize computes the total serialized size of the entire HAMT.
+// It both puts and loads blocks as necesary to do this
+// (using the Put operation and a paired Get to discover the serial size,
+// and the load to move recursively as necessary).
+//
+// This is an expensive operation and should only be used in testing and analysis.
+//
+// Note that checkSize *does* actually *use the blockstore*: therefore it
+// will affect get and put counts (and makes no attempt to avoid duplicate puts!);
+// be aware of this if you are measuring those event counts.
 func (n *Node) checkSize(ctx context.Context) (uint64, error) {
 	c, err := n.store.Put(ctx, n)
 	if err != nil {
diff --git a/hamt_bench_test.go b/hamt_bench_test.go
@@ -125,13 +125,18 @@ func BenchmarkFill(b *testing.B) {
 					b.Fatal(err)
 				}
 				b.StopTimer()
+				if i < 3 {
+					//b.Logf("block size histogram: %v\n", blockstore.getBlockSizesHistogram())
+				}
+				if blockstore.stats.evtcntPutDup > 0 {
+					b.Logf("on round N=%d: blockstore stats: %#v\n", b.N, blockstore.stats) // note: must refer to this before doing `n.checkSize`; that function has many effects.
+				}
+				b.ReportMetric(float64(blockstore.stats.evtcntGet)/float64(t.kcount*1000), "getEvts/entry")
+				b.ReportMetric(float64(blockstore.stats.evtcntPut)/float64(t.kcount*1000), "putEvts/entry")
 				b.ReportMetric(float64(len(blockstore.data))/float64(t.kcount*1000), "blocks/entry")
 				binarySize, _ := n.checkSize(context.Background())
 				b.ReportMetric(float64(binarySize)/float64(t.kcount*1000), "bytes(hamtAccnt)/entry")
 				b.ReportMetric(float64(blockstore.totalBlockSizes())/float64(t.kcount*1000), "bytes(blockstoreAccnt)/entry")
-				if i < 3 {
-					//b.Logf("block size histogram: %v\n", blockstore.getBlockSizesHistogram())
-				}
 				b.StartTimer()
 			}
 		})
@@ -177,6 +182,7 @@ func doBenchmarkSetSuite(b *testing.B, flushPer bool) {
 				}
 				initalBlockstoreSize := len(blockstore.data)
 				b.ResetTimer()
+				blockstore.stats = blockstoreStats{}
 				// Additional inserts:
 				b.ReportAllocs()
 				for j := 0; j < 1000; j++ {
@@ -195,10 +201,15 @@ func doBenchmarkSetSuite(b *testing.B, flushPer bool) {
 					}
 				}
 				b.StopTimer()
-				b.ReportMetric(float64(len(blockstore.data)-initalBlockstoreSize)/float64(1000), "addntlBlocks/addntlEntry")
 				if i < 3 {
 					// b.Logf("block size histogram: %v\n", blockstore.getBlockSizesHistogram())
 				}
+				if blockstore.stats.evtcntPutDup > 0 {
+					b.Logf("on round N=%d: blockstore stats: %#v\n", b.N, blockstore.stats)
+				}
+				b.ReportMetric(float64(blockstore.stats.evtcntGet)/float64(t.kcount*1000), "getEvts/entry")
+				b.ReportMetric(float64(blockstore.stats.evtcntPut)/float64(t.kcount*1000), "putEvts/entry")
+				b.ReportMetric(float64(len(blockstore.data)-initalBlockstoreSize)/float64(1000), "addntlBlocks/addntlEntry")
 				b.StartTimer()
 			}
 		})
diff --git a/hamt_test.go b/hamt_test.go
@@ -19,14 +19,16 @@ import (
 )
 
 type mockBlocks struct {
-	data map[cid.Cid]block.Block
+	data  map[cid.Cid]block.Block
+	stats blockstoreStats
 }
 
 func newMockBlocks() *mockBlocks {
-	return &mockBlocks{make(map[cid.Cid]block.Block)}
+	return &mockBlocks{make(map[cid.Cid]block.Block), blockstoreStats{}}
 }
 
 func (mb *mockBlocks) Get(c cid.Cid) (block.Block, error) {
+	mb.stats.evtcntGet++
 	d, ok := mb.data[c]
 	if ok {
 		return d, nil
@@ -35,10 +37,20 @@ func (mb *mockBlocks) Get(c cid.Cid) (block.Block, error) {
 }
 
 func (mb *mockBlocks) Put(b block.Block) error {
+	mb.stats.evtcntPut++
+	if _, exists := mb.data[b.Cid()]; exists {
+		mb.stats.evtcntPutDup++
+	}
 	mb.data[b.Cid()] = b
 	return nil
 }
 
+type blockstoreStats struct {
+	evtcntGet    int
+	evtcntPut    int
+	evtcntPutDup int
+}
+
 func (mb *mockBlocks) totalBlockSizes() int {
 	sum := 0
 	for _, v := range mb.data {