Merge pull request #62 from filecoin-project/addntl-benchmarks

ZenGround0 · web-flow · commit 5b4fdcea2c17 · 2020-08-26T18:28:26.000-04:00
Additional benchmarks
diff --git a/.travis.yml b/.travis.yml
@@ -4,7 +4,7 @@ os:
 language: go
 
 go:
-  - 1.11.x
+  - 1.13.x
 
 env:
   global:
diff --git a/hamt.go b/hamt.go
@@ -405,10 +405,16 @@ func loadNode(
 	return &out, nil
 }
 
-// Calculate the total _byte weight_ of the HAMT by fetching each node
-// from the IpldStore and adding its raw byte size to the total. This
-// operation will exhaustively load every node of the HAMT so should not
-// be used lightly.
+// checkSize computes the total serialized size of the entire HAMT.
+// It both puts and loads blocks as necesary to do this
+// (using the Put operation and a paired Get to discover the serial size,
+// and the load to move recursively as necessary).
+//
+// This is an expensive operation and should only be used in testing and analysis.
+//
+// Note that checkSize *does* actually *use the blockstore*: therefore it
+// will affect get and put counts (and makes no attempt to avoid duplicate puts!);
+// be aware of this if you are measuring those event counts.
 func (n *Node) checkSize(ctx context.Context) (uint64, error) {
 	c, err := n.store.Put(ctx, n)
 	if err != nil {
diff --git a/hamt_bench_test.go b/hamt_bench_test.go
@@ -51,53 +51,184 @@ func BenchmarkSerializeNode(b *testing.B) {
 }
 
 type benchSetCase struct {
-	count    int
+	kcount   int
 	bitwidth int
 }
 
-func BenchmarkSet(b *testing.B) {
-	kCounts := []int{1, 10, 100}
-	bitwidths := []int{5, 8}
+var benchSetCaseTable []benchSetCase
 
-	var table []benchSetCase
+func init() {
+	kCounts := []int{
+		1,
+		5,
+		10,
+		50,
+		100,
+		500,
+		1000, // aka 1M
+		//10000, // aka 10M -- you'll need a lot of RAM for this.  Also, some patience.
+	}
+	bitwidths := []int{
+		3,
+		4,
+		5,
+		6,
+		7,
+		8,
+	}
+	// bucketsize-aka-arraywidth?  maybe someday.
 	for _, c := range kCounts {
-
 		for _, bw := range bitwidths {
-			table = append(table, benchSetCase{count: c * 1000, bitwidth: bw})
+			benchSetCaseTable = append(benchSetCaseTable, benchSetCase{kcount: c, bitwidth: bw})
 		}
+	}
+}
 
+// The benchmark results can be graphed.  Here are some reasonable selections:
+/*
+	benchdraw --filter=BenchmarkFill          --plot=line --x=n "--y=blocks/entry"                 < sample > BenchmarkFill-blocks-per-entry-vs-scale.svg
+	benchdraw --filter=BenchmarkFill          --plot=line --x=n "--y=bytes(blockstoreAccnt)/entry" < sample > BenchmarkFill-totalBytes-per-entry-vs-scale.svg
+	benchdraw --filter=BenchmarkSetBulk       --plot=line --x=n "--y=addntlBlocks/addntlEntry"     < sample > BenchmarkSetBulk-addntlBlocks-per-addntlEntry-vs-scale.svg
+	benchdraw --filter=BenchmarkSetIndividual --plot=line --x=n "--y=addntlBlocks/addntlEntry"     < sample > BenchmarkSetIndividual-addntlBlocks-per-addntlEntry-vs-scale.svg
+	benchdraw --filter=BenchmarkFind          --plot=line --x=n "--y=ns/op"                        < sample > BenchmarkFind-speed-vs-scale.svg
+	benchdraw --filter=BenchmarkFind          --plot=line --x=n "--y=getEvts/find"                 < sample > BenchmarkFind-getEvts-vs-scale.svg
+*/
+// (The 'benchdraw' command alluded to here is https://github.com/cep21/benchdraw .)
+
+// Histograms of blocksizes can be logged from some of the following functions, but are commented out.
+// The main thing to check for in those is whether there are any exceptionally small blocks being produced:
+// less than 64 bytes is a bit concerning because we assume there's some overhead per block in most operations (even if the exact amount may vary situationally).
+// We do see some of these small blocks with small bitwidth parameters (e.g. 3), but almost none with larger bitwidth parameters.
+
+// BenchmarkFill creates a large HAMT, and measures how long it takes to generate all of this many entries;
+// the number of entries is varied in sub-benchmarks, denoted by their "n=" label component.
+// Flush is done once for the entire structure, meaning the number of blocks generated per entry can be much fewer than 1.
+//
+// The number of blocks saved to the blockstore per entry is reported, and the total content size in bytes.
+// The nanoseconds-per-op report on this function is not very useful, because the size of "op" varies with "n" between benchmarks.
+//
+// See "BenchmarkSet*" for a probe of how long it takes to set additional entries in an already-large hamt
+// (this gives a more interesting and useful nanoseconds-per-op indicators).
+func BenchmarkFill(b *testing.B) {
+	for _, t := range benchSetCaseTable {
+		b.Run(fmt.Sprintf("n=%dk/bitwidth=%d", t.kcount, t.bitwidth), func(b *testing.B) {
+			for i := 0; i < b.N; i++ {
+				r := rander{rand.New(rand.NewSource(int64(i)))}
+				blockstore := newMockBlocks()
+				n := NewNode(cbor.NewCborStore(blockstore), UseTreeBitWidth(t.bitwidth))
+				//b.ResetTimer()
+				for j := 0; j < t.kcount*1000; j++ {
+					if err := n.Set(context.Background(), r.randString(), r.randValue()); err != nil {
+						b.Fatal(err)
+					}
+				}
+				if err := n.Flush(context.Background()); err != nil {
+					b.Fatal(err)
+				}
+				b.StopTimer()
+				if i < 3 {
+					//b.Logf("block size histogram: %v\n", blockstore.getBlockSizesHistogram())
+				}
+				if blockstore.stats.evtcntPutDup > 0 {
+					b.Logf("on round N=%d: blockstore stats: %#v\n", b.N, blockstore.stats) // note: must refer to this before doing `n.checkSize`; that function has many effects.
+				}
+				b.ReportMetric(float64(blockstore.stats.evtcntGet)/float64(t.kcount*1000), "getEvts/entry")
+				b.ReportMetric(float64(blockstore.stats.evtcntPut)/float64(t.kcount*1000), "putEvts/entry")
+				b.ReportMetric(float64(len(blockstore.data))/float64(t.kcount*1000), "blocks/entry")
+				binarySize, _ := n.checkSize(context.Background())
+				b.ReportMetric(float64(binarySize)/float64(t.kcount*1000), "bytes(hamtAccnt)/entry")
+				b.ReportMetric(float64(blockstore.totalBlockSizes())/float64(t.kcount*1000), "bytes(blockstoreAccnt)/entry")
+				b.StartTimer()
+			}
+		})
 	}
-	r := rander{rand.New(rand.NewSource(int64(42)))}
-	for _, t := range table {
-		b.Run(fmt.Sprintf("%d/%d", t.count, t.bitwidth), func(b *testing.B) {
-			ctx := context.Background()
-			n := NewNode(cbor.NewCborStore(newMockBlocks()), UseTreeBitWidth(t.bitwidth))
-			b.ResetTimer()
+}
+
+// BenchmarkSetBulk creates a large HAMT, then resets the timer, and does another 1000 inserts,
+// measuring the time taken for this second batch of inserts.
+// Flushing happens once after all 1000 inserts.
+//
+// The number of *additional* blocks per entry is reported.
+// This number is usually less than one, because the bulk flush means changes might be amortized.
+func BenchmarkSetBulk(b *testing.B) {
+	doBenchmarkSetSuite(b, false)
+}
+
+// BenchmarkSetIndividual is the same as BenchmarkSetBulk, but flushes more.
+// Flush happens per insert.
+//
+// The number of *additional* blocks per entry is reported.
+// Since we flush each insert individually, this number should be at least 1 --
+// however, since we choose random keys, it can still turn out lower if keys happen to collide.
+// (The Set method does not make it possible to adjust our denominator to compensate for this: it does not yield previous values nor indicators of prior presense.)
+func BenchmarkSetIndividual(b *testing.B) {
+	doBenchmarkSetSuite(b, true)
+}
+
+func doBenchmarkSetSuite(b *testing.B, flushPer bool) {
+	for _, t := range benchSetCaseTable {
+		b.Run(fmt.Sprintf("n=%dk/bitwidth=%d", t.kcount, t.bitwidth), func(b *testing.B) {
 			for i := 0; i < b.N; i++ {
-				for j := 0; j < t.count; j++ {
-					if err := n.Set(ctx, r.randString(), r.randValue()); err != nil {
+				r := rander{rand.New(rand.NewSource(int64(i)))}
+				blockstore := newMockBlocks()
+				n := NewNode(cbor.NewCborStore(blockstore), UseTreeBitWidth(t.bitwidth))
+				// Initial fill:
+				for j := 0; j < t.kcount*1000; j++ {
+					if err := n.Set(context.Background(), r.randString(), r.randValue()); err != nil {
 						b.Fatal(err)
 					}
 				}
+				if err := n.Flush(context.Background()); err != nil {
+					b.Fatal(err)
+				}
+				initalBlockstoreSize := len(blockstore.data)
+				b.ResetTimer()
+				blockstore.stats = blockstoreStats{}
+				// Additional inserts:
+				b.ReportAllocs()
+				for j := 0; j < 1000; j++ {
+					if err := n.Set(context.Background(), r.randString(), r.randValue()); err != nil {
+						b.Fatal(err)
+					}
+					if flushPer {
+						if err := n.Flush(context.Background()); err != nil {
+							b.Fatal(err)
+						}
+					}
+				}
+				if !flushPer {
+					if err := n.Flush(context.Background()); err != nil {
+						b.Fatal(err)
+					}
+				}
+				b.StopTimer()
+				if i < 3 {
+					// b.Logf("block size histogram: %v\n", blockstore.getBlockSizesHistogram())
+				}
+				if blockstore.stats.evtcntPutDup > 0 {
+					b.Logf("on round N=%d: blockstore stats: %#v\n", b.N, blockstore.stats)
+				}
+				b.ReportMetric(float64(blockstore.stats.evtcntGet)/float64(t.kcount*1000), "getEvts/entry")
+				b.ReportMetric(float64(blockstore.stats.evtcntPut)/float64(t.kcount*1000), "putEvts/entry")
+				b.ReportMetric(float64(len(blockstore.data)-initalBlockstoreSize)/float64(1000), "addntlBlocks/addntlEntry")
+				b.StartTimer()
 			}
 		})
 	}
 }
 
 func BenchmarkFind(b *testing.B) {
-	b.Run("find-10k", doBenchmarkEntriesCount(10000, 8))
-	b.Run("find-100k", doBenchmarkEntriesCount(100000, 8))
-	b.Run("find-1m", doBenchmarkEntriesCount(1000000, 8))
-	b.Run("find-10k-bitwidth-5", doBenchmarkEntriesCount(10000, 5))
-	b.Run("find-100k-bitwidth-5", doBenchmarkEntriesCount(100000, 5))
-	b.Run("find-1m-bitwidth-5", doBenchmarkEntriesCount(1000000, 5))
-
+	for _, t := range benchSetCaseTable {
+		b.Run(fmt.Sprintf("n=%dk/bitwidth=%d", t.kcount, t.bitwidth),
+			doBenchmarkEntriesCount(t.kcount*1000, t.bitwidth))
+	}
 }
 
 func doBenchmarkEntriesCount(num int, bitWidth int) func(b *testing.B) {
 	r := rander{rand.New(rand.NewSource(int64(num)))}
 	return func(b *testing.B) {
-		cs := cbor.NewCborStore(newMockBlocks())
+		blockstore := newMockBlocks()
+		cs := cbor.NewCborStore(blockstore)
 		n := NewNode(cs, UseTreeBitWidth(bitWidth))
 
 		var keys []string
@@ -119,6 +250,7 @@ func doBenchmarkEntriesCount(num int, bitWidth int) func(b *testing.B) {
 		}
 
 		runtime.GC()
+		blockstore.stats = blockstoreStats{}
 		b.ResetTimer()
 		b.ReportAllocs()
 
@@ -132,5 +264,7 @@ func doBenchmarkEntriesCount(num int, bitWidth int) func(b *testing.B) {
 				b.Fatal(err)
 			}
 		}
+		b.ReportMetric(float64(blockstore.stats.evtcntGet)/float64(b.N), "getEvts/find")
+		b.ReportMetric(float64(blockstore.stats.evtcntPut)/float64(b.N), "putEvts/find") // surely this is zero, but for completeness.
 	}
 }
diff --git a/hamt_test.go b/hamt_test.go
@@ -7,6 +7,7 @@ import (
 	"encoding/hex"
 	"fmt"
 	"math/rand"
+	"strconv"
 	"strings"
 	"testing"
 	"time"
@@ -18,14 +19,16 @@ import (
 )
 
 type mockBlocks struct {
-	data map[cid.Cid]block.Block
+	data  map[cid.Cid]block.Block
+	stats blockstoreStats
 }
 
 func newMockBlocks() *mockBlocks {
-	return &mockBlocks{make(map[cid.Cid]block.Block)}
+	return &mockBlocks{make(map[cid.Cid]block.Block), blockstoreStats{}}
 }
 
 func (mb *mockBlocks) Get(c cid.Cid) (block.Block, error) {
+	mb.stats.evtcntGet++
 	d, ok := mb.data[c]
 	if ok {
 		return d, nil
@@ -34,10 +37,80 @@ func (mb *mockBlocks) Get(c cid.Cid) (block.Block, error) {
 }
 
 func (mb *mockBlocks) Put(b block.Block) error {
+	mb.stats.evtcntPut++
+	if _, exists := mb.data[b.Cid()]; exists {
+		mb.stats.evtcntPutDup++
+	}
 	mb.data[b.Cid()] = b
 	return nil
 }
 
+type blockstoreStats struct {
+	evtcntGet    int
+	evtcntPut    int
+	evtcntPutDup int
+}
+
+func (mb *mockBlocks) totalBlockSizes() int {
+	sum := 0
+	for _, v := range mb.data {
+		sum += len(v.RawData())
+	}
+	return sum
+}
+
+type blockSizesHistogram [12]int
+
+func (mb *mockBlocks) getBlockSizesHistogram() (h blockSizesHistogram) {
+	for _, v := range mb.data {
+		l := len(v.RawData())
+		switch {
+		case l <= 2<<2: // 8
+			h[0]++
+		case l <= 2<<3: // 16
+			h[1]++
+		case l <= 2<<4: // 32
+			h[2]++
+		case l <= 2<<5: // 64
+			h[3]++
+		case l <= 2<<6: // 128
+			h[4]++
+		case l <= 2<<7: // 256
+			h[5]++
+		case l <= 2<<8: // 512
+			h[6]++
+		case l <= 2<<9: // 1024
+			h[7]++
+		case l <= 2<<10: // 2048
+			h[8]++
+		case l <= 2<<11: // 4096
+			h[9]++
+		case l <= 2<<12: // 8192
+			h[10]++
+		default:
+			h[11]++
+		}
+	}
+	return
+}
+
+func (h blockSizesHistogram) String() string {
+	v := "["
+	v += "<=" + strconv.Itoa(2<<2) + ":" + strconv.Itoa(h[0]) + ", "
+	v += "<=" + strconv.Itoa(2<<3) + ":" + strconv.Itoa(h[1]) + ", "
+	v += "<=" + strconv.Itoa(2<<4) + ":" + strconv.Itoa(h[2]) + ", "
+	v += "<=" + strconv.Itoa(2<<5) + ":" + strconv.Itoa(h[3]) + ", "
+	v += "<=" + strconv.Itoa(2<<6) + ":" + strconv.Itoa(h[4]) + ", "
+	v += "<=" + strconv.Itoa(2<<7) + ":" + strconv.Itoa(h[5]) + ", "
+	v += "<=" + strconv.Itoa(2<<8) + ":" + strconv.Itoa(h[6]) + ", "
+	v += "<=" + strconv.Itoa(2<<9) + ":" + strconv.Itoa(h[7]) + ", "
+	v += "<=" + strconv.Itoa(2<<10) + ":" + strconv.Itoa(h[8]) + ", "
+	v += "<=" + strconv.Itoa(2<<11) + ":" + strconv.Itoa(h[9]) + ", "
+	v += "<=" + strconv.Itoa(2<<12) + ":" + strconv.Itoa(h[10]) + ", "
+	v += ">" + strconv.Itoa(2<<12) + ":" + strconv.Itoa(h[11])
+	return v + "]"
+}
+
 func randString() string {
 	buf := make([]byte, 18)
 	rand.Read(buf)

-Original file line number
+Diff line change
 language: go
 go:
 -  - 1.11.x
 +  - 1.13.x
 env:
   global: