From 13c01bb8db4e6cd69363d9e6da67032815baf320 Mon Sep 17 00:00:00 2001 From: ZenGround0 Date: Mon, 7 Dec 2020 10:52:46 -0500 Subject: [PATCH 1/2] AMT benchmarks --- amt_bench_test.go | 239 ++++++++++++++++++++++++++++++++++++++++++++++ amt_test.go | 56 +++++++---- node.go | 59 ++++++++++++ 3 files changed, 336 insertions(+), 18 deletions(-) create mode 100644 amt_bench_test.go diff --git a/amt_bench_test.go b/amt_bench_test.go new file mode 100644 index 0000000..2b2bded --- /dev/null +++ b/amt_bench_test.go @@ -0,0 +1,239 @@ +package amt + +import ( + "context" + "fmt" + "math/rand" + "testing" + + cbor "github.com/ipfs/go-ipld-cbor" + "github.com/stretchr/testify/require" + cbg "github.com/whyrusleeping/cbor-gen" +) + +type rander struct { + r *rand.Rand +} + +func (r *rander) randKey(keyRange uint64) uint64 { + return r.r.Uint64() % keyRange +} + +func (r *rander) randValue(datasize int) []byte { + buf := make([]byte, datasize) + rand.Read(buf) + return buf +} + +func (r *rander) selectKey(keys []uint64) uint64 { + i := rand.Int() % len(keys) + return keys[i] +} + +type amtParams struct { + id string + count int + datasize int + keyrange int +} + +type benchCase struct { + id string + count int + bitwidth int + datasize int + keyrange int +} + +var caseTable []benchCase + +func init() { + + bitwidths := []int{ + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + } + + amts := []amtParams{ + amtParams{ + id: "example.Full", + count: 5000, + datasize: 4, + keyrange: 5000, + }, + amtParams{ + id: "example.Sparse", + count: 5000, + datasize: 4, + keyrange: 5000000, + }, + amtParams{ + id: "example.AlmostFull", + count: 5000, + datasize: 4, + keyrange: 10000, + }, + } + + for _, a := range amts { + for _, bw := range bitwidths { + caseTable = append(caseTable, + benchCase{ + id: fmt.Sprintf("%s -- bw=%d", a.id, bw), + count: a.count, + bitwidth: bw, + datasize: a.datasize, + keyrange: a.keyrange, + }) + } + } +} + +func fillContinuous(ctx context.Context, b *testing.B, a *Root, count uint64, dataSize int, r rander) []uint64 { + keys := make([]uint64, 0) + for i := uint64(0); i < count; i++ { + require.NoError(b, a.Set(ctx, i, r.randValue(dataSize))) + keys = append(keys, i) + } + return keys +} + +func fillSparse(ctx context.Context, b *testing.B, a *Root, count int, keyrange int, dataSize int, r rander) []uint64 { + keys := make(map[uint64]struct{}) + keysSlice := make([]uint64, 0) + for j := 0; j < count; j++ { + for { + key := r.randKey(uint64(keyrange)) + _, dup := keys[key] + if !dup { + require.NoError(b, a.Set(ctx, key, r.randValue(dataSize))) + keys[key] = struct{}{} + keysSlice = append(keysSlice, key) + break + } + } + } + return keysSlice +} + +func fill(ctx context.Context, b *testing.B, a *Root, count int, dataSize int, keyrange int, r rander) []uint64 { + if count >= keyrange { + return fillContinuous(ctx, b, a, uint64(count), dataSize, r) + } else { + return fillSparse(ctx, b, a, count, keyrange, dataSize, r) + } +} + +// Note this is only intended for use measuring size as timing and memory usage +// may not be optimal to handle no duplicate writes. +func BenchmarkFill(b *testing.B) { + ctx := context.Background() + for _, t := range caseTable { + b.Run(fmt.Sprintf("%s", t.id), func(b *testing.B) { + for i := 0; i < b.N; i++ { + r := rander{rand.New(rand.NewSource(int64(i)))} + mock := newMockBlocks() + cst := cbor.NewCborStore(mock) + a, err := NewAMT(cst, UseTreeBitWidth(t.bitwidth)) + require.NoError(b, err) + + // Fill the tree + fill(ctx, b, a, t.count, t.datasize, t.keyrange, r) + _, err = a.Flush(ctx) + require.NoError(b, err) + b.StopTimer() + b.ReportMetric(float64(len(mock.data))/float64(t.count), "blocks") + b.ReportMetric(float64(mock.totalBlockSizes())/float64(t.count), "bytes(blockstoreSize)/entry") + binarySize, err := a.node.checkSize(ctx, cst, uint(t.bitwidth), a.height) + require.NoError(b, err) + b.ReportMetric(float64(binarySize), "binarySize") + b.ReportMetric(float64(binarySize)/float64(t.count), "bytes(amtSize)/entry") + b.StartTimer() + } + }) + } +} + +// 0. Fill AMT with t.count keys selected between 0 and t.keyrange. +// 1. Perform 1000 sets on a random key from t.keyrange on the base AMT +// 2. Report average over sets +func BenchmarkSetIndividual(b *testing.B) { + ctx := context.Background() + for _, t := range caseTable { + b.Run(fmt.Sprintf("%s", t.id), func(b *testing.B) { + for i := 0; i < b.N; i++ { + b.StopTimer() + r := rander{rand.New(rand.NewSource(int64(i)))} + mock := newMockBlocks() + cst := cbor.NewCborStore(mock) + a, err := NewAMT(cst, UseTreeBitWidth(t.bitwidth)) + require.NoError(b, err) + + // Initial fill + fill(ctx, b, a, t.count, t.datasize, t.keyrange, r) + aCid, err := a.Flush(ctx) + require.NoError(b, err) + + mock.stats = blockstoreStats{} + b.ReportAllocs() + b.StartTimer() + for j := 0; j < 1000; j++ { + // Load AMT, perform a set at random within key range, flush + a, err = LoadAMT(ctx, cst, aCid, UseTreeBitWidth(t.bitwidth)) + require.NoError(b, err) + + key := r.randKey(uint64(t.keyrange)) + require.NoError(b, a.Set(ctx, key, r.randValue(t.datasize))) + _, err = a.Flush(ctx) + require.NoError(b, err) + } + b.StopTimer() + b.ReportMetric(float64(mock.stats.evtcntGet)/1000, "getEvts") + b.ReportMetric(float64(mock.stats.evtcntPut)/1000, "putEvts") + b.ReportMetric(float64(mock.stats.bytesPut)/1000, "bytesPut") + } + }) + } +} + +func BenchmarkGetIndividual(b *testing.B) { + ctx := context.Background() + + for _, t := range caseTable { + b.Run(fmt.Sprintf("%s", t.id), func(b *testing.B) { + for i := 0; i < b.N; i++ { + b.StopTimer() + r := rander{rand.New(rand.NewSource(int64(i)))} + mock := newMockBlocks() + cst := cbor.NewCborStore(mock) + a, err := NewAMT(cst, UseTreeBitWidth(t.bitwidth)) + require.NoError(b, err) + + // Initial fill + amtKeys := fill(ctx, b, a, t.count, t.datasize, t.keyrange, r) + aCid, err := a.Flush(ctx) + require.NoError(b, err) + + mock.stats = blockstoreStats{} + b.ReportAllocs() + b.StartTimer() + var d cbg.Deferred + for j := 0; j < 1000; j++ { + // Load AMT, perform a set on a random existing key + a, err = LoadAMT(ctx, cst, aCid, UseTreeBitWidth(t.bitwidth)) + require.NoError(b, err) + require.NoError(b, a.Get(ctx, r.selectKey(amtKeys), &d)) + } + b.StopTimer() + b.ReportMetric(float64(mock.stats.evtcntGet)/float64(1000), "getEvts") + b.ReportMetric(float64(mock.stats.evtcntPut)/float64(1000), "putEvts") + } + }) + } +} diff --git a/amt_test.go b/amt_test.go index f7273db..3942799 100644 --- a/amt_test.go +++ b/amt_test.go @@ -5,7 +5,6 @@ import ( "context" "fmt" "math/rand" - "os" "testing" "time" @@ -28,29 +27,39 @@ func init() { } } -func TestMain(m *testing.M) { - // Hack to test with multiple widths, without hassle. - for defaultBitWidth = 2; defaultBitWidth <= 18; defaultBitWidth++ { - fmt.Printf("WIDTH %d\n", defaultBitWidth) - if code := m.Run(); code != 0 { - os.Exit(code) - } - } - os.Exit(0) -} +// func TestMain(m *testing.M) { +// // Hack to test with multiple widths, without hassle. +// for defaultBitWidth = 2; defaultBitWidth <= 18; defaultBitWidth++ { +// fmt.Printf("WIDTH %d\n", defaultBitWidth) +// if code := m.Run(); code != 0 { +// os.Exit(code) +// } +// } +// os.Exit(0) +// } type mockBlocks struct { - data map[cid.Cid]block.Block - getCount, putCount int + data map[cid.Cid]block.Block + stats blockstoreStats } func newMockBlocks() *mockBlocks { - return &mockBlocks{make(map[cid.Cid]block.Block), 0, 0} + return &mockBlocks{make(map[cid.Cid]block.Block), + blockstoreStats{0, 0, 0, 0}, + } +} + +func (mb *mockBlocks) totalBlockSizes() int { + sum := 0 + for _, v := range mb.data { + sum += len(v.RawData()) + } + return sum } func (mb *mockBlocks) Get(c cid.Cid) (block.Block, error) { + mb.stats.evtcntGet++ d, ok := mb.data[c] - mb.getCount++ if ok { return d, nil } @@ -58,14 +67,25 @@ func (mb *mockBlocks) Get(c cid.Cid) (block.Block, error) { } func (mb *mockBlocks) Put(b block.Block) error { - mb.putCount++ + mb.stats.evtcntPut++ + mb.stats.bytesPut += len(b.RawData()) + if _, exists := mb.data[b.Cid()]; exists { + mb.stats.evtcntPutDup++ + } mb.data[b.Cid()] = b return nil } +type blockstoreStats struct { + evtcntGet int + evtcntPut int + bytesPut int + evtcntPutDup int +} + func (mb *mockBlocks) report(b *testing.B) { - b.ReportMetric(float64(mb.getCount)/float64(b.N), "gets/op") - b.ReportMetric(float64(mb.putCount)/float64(b.N), "puts/op") + b.ReportMetric(float64(mb.stats.evtcntGet)/float64(b.N), "gets/op") + b.ReportMetric(float64(mb.stats.evtcntPut)/float64(b.N), "puts/op") } func TestBasicSetGet(t *testing.T) { diff --git a/node.go b/node.go index 3c85fda..4b5735a 100644 --- a/node.go +++ b/node.go @@ -362,6 +362,65 @@ func (n *node) flush(ctx context.Context, bs cbor.IpldStore, bitWidth uint, heig return nd, nil } +// checkSize computes the serialized size of the entire AMT. +// It puts and gets blocks as necessary to do this. +// This is an expensive operation and should only be used in testing and analysis. +// +// Precondition: the node has not been modified since flush. The dirty bits are +// not checked and link cid when exists is assumed to be source of truth +func (n *node) checkSize(ctx context.Context, bs cbor.IpldStore, bitWidth uint, height int) (uint64, error) { + // Get size of this node + nd := new(internal.Node) + nd.Bmap = make([]byte, bmapBytes(bitWidth)) + if height == 0 { + for i, val := range n.values { + if val == nil { + continue + } + nd.Values = append(nd.Values, val) + nd.Bmap[i/8] |= 1 << (uint(i) % 8) + } + } else { + for i, ln := range n.links { + if ln == nil { + continue + } + // Precondition that no link cids are out of date applied here. + // For the current implementation this should not actually impact + // the final result as cids are all sized the same. + nd.Links = append(nd.Links, ln.cid) + nd.Bmap[i/8] |= 1 << (uint(1) % 8) + } + } + c, err := bs.Put(ctx, nd) + if err != nil { + return 0, err + } + var def cbg.Deferred + if err := bs.Get(ctx, c, &def); err != nil { + return 0, err + } + totsize := uint64(len(def.Raw)) + + // Recurse + for _, ln := range n.links { + if ln == nil { + continue + } + chnd, err := ln.load(ctx, bs, bitWidth, height) + if err != nil { + return 0, err + } + chsize, err := chnd.checkSize(ctx, bs, bitWidth, height-1) + if err != nil { + return 0, err + } + totsize += chsize + + } + return totsize, nil +} + func (n *node) setLink(bitWidth uint, i uint64, l *link) { if n.links == nil { if l == nil { From d976d496bfe96bccff6bfd9aab2cf21060686cdc Mon Sep 17 00:00:00 2001 From: ZenGround0 Date: Thu, 6 May 2021 09:12:07 -0400 Subject: [PATCH 2/2] Uncomment TestMain --- amt_test.go | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/amt_test.go b/amt_test.go index 3942799..ab90a13 100644 --- a/amt_test.go +++ b/amt_test.go @@ -5,6 +5,7 @@ import ( "context" "fmt" "math/rand" + "os" "testing" "time" @@ -27,16 +28,16 @@ func init() { } } -// func TestMain(m *testing.M) { -// // Hack to test with multiple widths, without hassle. -// for defaultBitWidth = 2; defaultBitWidth <= 18; defaultBitWidth++ { -// fmt.Printf("WIDTH %d\n", defaultBitWidth) -// if code := m.Run(); code != 0 { -// os.Exit(code) -// } -// } -// os.Exit(0) -// } +func TestMain(m *testing.M) { + // Hack to test with multiple widths, without hassle. + for defaultBitWidth = 2; defaultBitWidth <= 18; defaultBitWidth++ { + fmt.Printf("WIDTH %d\n", defaultBitWidth) + if code := m.Run(); code != 0 { + os.Exit(code) + } + } + os.Exit(0) +} type mockBlocks struct { data map[cid.Cid]block.Block