Skip to content

Commit 541d744

Browse files
authored
Merge pull request #6465 from onflow/leo/db-ops
[Badger] Add universal database operations
2 parents 0310dfd + 24d65bc commit 541d744

File tree

19 files changed

+1898
-2
lines changed

19 files changed

+1898
-2
lines changed

cmd/bootstrap/utils/md5.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,8 @@ package utils
22

33
// The google storage API only provides md5 and crc32 hence overriding the linter flag for md5
44
import (
5-
"crypto/md5" //nolint:gosec
5+
// #nosec
6+
"crypto/md5"
67
"io"
78
"os"
89
)

storage/batch.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,19 @@
11
package storage
22

3-
import "github.com/dgraph-io/badger/v2"
3+
import (
4+
"github.com/dgraph-io/badger/v2"
5+
)
46

7+
// Deprecated: Transaction is being deprecated as part of the transition from Badger to Pebble.
8+
// Use Writer instead of Transaction for all new code.
59
type Transaction interface {
610
Set(key, val []byte) error
711
}
812

913
// BatchStorage serves as an abstraction over batch storage, adding ability to add ability to add extra
1014
// callbacks which fire after the batch is successfully flushed.
15+
// Deprecated: BatchStorage is being deprecated as part of the transition from Badger to Pebble.
16+
// Use ReaderBatchWriter instead of BatchStorage for all new code.
1117
type BatchStorage interface {
1218
GetWriter() *badger.WriteBatch
1319

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
package badgerimpl
2+
3+
import (
4+
"bytes"
5+
6+
"github.com/dgraph-io/badger/v2"
7+
8+
"github.com/onflow/flow-go/storage"
9+
)
10+
11+
type badgerIterator struct {
12+
iter *badger.Iterator
13+
lowerBound []byte
14+
upperBound []byte
15+
hasUpperBound bool // whether there's an upper bound
16+
}
17+
18+
var _ storage.Iterator = (*badgerIterator)(nil)
19+
20+
func newBadgerIterator(db *badger.DB, startPrefix, endPrefix []byte, ops storage.IteratorOption) *badgerIterator {
21+
options := badger.DefaultIteratorOptions
22+
if ops.BadgerIterateKeyOnly {
23+
options.PrefetchValues = false
24+
}
25+
26+
tx := db.NewTransaction(false)
27+
iter := tx.NewIterator(options)
28+
29+
lowerBound, upperBound, hasUpperBound := storage.StartEndPrefixToLowerUpperBound(startPrefix, endPrefix)
30+
31+
return &badgerIterator{
32+
iter: iter,
33+
lowerBound: lowerBound,
34+
upperBound: upperBound,
35+
hasUpperBound: hasUpperBound,
36+
}
37+
}
38+
39+
// First seeks to the smallest key greater than or equal to the given key.
40+
func (i *badgerIterator) First() bool {
41+
i.iter.Seek(i.lowerBound)
42+
return i.Valid()
43+
}
44+
45+
// Valid returns whether the iterator is positioned at a valid key-value pair.
46+
func (i *badgerIterator) Valid() bool {
47+
// Note: we didn't specify the iteration range with the badger IteratorOptions,
48+
// because the IterationOptions only allows us to specify a single prefix, whereas
49+
// we need to specify a range of prefixes. So we have to manually check the bounds here.
50+
// The First() method, which calls Seek(i.lowerBound), ensures the iteration starts from
51+
// the lowerBound, and the upperbound is checked here by first checking if it's
52+
// reaching the end of the iteration, then checking if the key is within the upperbound.
53+
54+
// check if it's reaching the end of the iteration
55+
if !i.iter.Valid() {
56+
return false
57+
}
58+
59+
// if upper bound is nil, then there's no upper bound, so it's always valid
60+
if !i.hasUpperBound {
61+
return true
62+
}
63+
64+
// check if the key is within the upperbound (exclusive)
65+
key := i.iter.Item().Key()
66+
// note: for the boundary case,
67+
// upperBound is the exclusive upper bound, should not be included in the iteration,
68+
// so if key == upperBound, it's invalid, should return false.
69+
valid := bytes.Compare(key, i.upperBound) < 0
70+
return valid
71+
}
72+
73+
// Next advances the iterator to the next key-value pair.
74+
func (i *badgerIterator) Next() {
75+
i.iter.Next()
76+
}
77+
78+
// IterItem returns the current key-value pair, or nil if done.
79+
func (i *badgerIterator) IterItem() storage.IterItem {
80+
return i.iter.Item()
81+
}
82+
83+
var _ storage.IterItem = (*badger.Item)(nil)
84+
85+
// Close closes the iterator. Iterator must be closed, otherwise it causes memory leak.
86+
// No errors expected during normal operation
87+
func (i *badgerIterator) Close() error {
88+
i.iter.Close()
89+
return nil
90+
}
Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,67 @@
1+
package badgerimpl
2+
3+
import (
4+
"bytes"
5+
"errors"
6+
"fmt"
7+
"io"
8+
9+
"github.com/dgraph-io/badger/v2"
10+
11+
"github.com/onflow/flow-go/module/irrecoverable"
12+
"github.com/onflow/flow-go/storage"
13+
"github.com/onflow/flow-go/utils/noop"
14+
)
15+
16+
type dbReader struct {
17+
db *badger.DB
18+
}
19+
20+
// Get gets the value for the given key. It returns ErrNotFound if the DB
21+
// does not contain the key.
22+
// other errors are exceptions
23+
//
24+
// The caller should not modify the contents of the returned slice, but it is
25+
// safe to modify the contents of the argument after Get returns. The
26+
// returned slice will remain valid until the returned Closer is closed.
27+
// when err == nil, the caller MUST call closer.Close() or a memory leak will occur.
28+
func (b dbReader) Get(key []byte) ([]byte, io.Closer, error) {
29+
tx := b.db.NewTransaction(false)
30+
defer tx.Discard()
31+
32+
item, err := tx.Get(key)
33+
if err != nil {
34+
if errors.Is(err, badger.ErrKeyNotFound) {
35+
return nil, noop.Closer{}, storage.ErrNotFound
36+
}
37+
return nil, noop.Closer{}, irrecoverable.NewExceptionf("could not load data: %w", err)
38+
}
39+
40+
value, err := item.ValueCopy(nil)
41+
if err != nil {
42+
return nil, noop.Closer{}, irrecoverable.NewExceptionf("could not load value: %w", err)
43+
}
44+
45+
return value, noop.Closer{}, nil
46+
}
47+
48+
// NewIter returns a new Iterator for the given key prefix range [startPrefix, endPrefix], both inclusive.
49+
// Specifically, all keys that meet ANY of the following conditions are included in the iteration:
50+
// - have a prefix equal to startPrefix OR
51+
// - have a prefix equal to the endPrefix OR
52+
// - have a prefix that is lexicographically between startPrefix and endPrefix
53+
//
54+
// it returns error if the startPrefix key is greater than the endPrefix key
55+
// no errors are expected during normal operation
56+
func (b dbReader) NewIter(startPrefix, endPrefix []byte, ops storage.IteratorOption) (storage.Iterator, error) {
57+
if bytes.Compare(startPrefix, endPrefix) > 0 {
58+
return nil, fmt.Errorf("startPrefix key must be less than or equal to endPrefix key")
59+
}
60+
61+
return newBadgerIterator(b.db, startPrefix, endPrefix, ops), nil
62+
}
63+
64+
// ToReader is a helper function to convert a *badger.DB to a Reader
65+
func ToReader(db *badger.DB) storage.Reader {
66+
return dbReader{db}
67+
}
Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
package badgerimpl
2+
3+
import (
4+
"fmt"
5+
6+
"github.com/dgraph-io/badger/v2"
7+
8+
"github.com/onflow/flow-go/storage"
9+
"github.com/onflow/flow-go/storage/operation"
10+
op "github.com/onflow/flow-go/storage/operation"
11+
)
12+
13+
type ReaderBatchWriter struct {
14+
globalReader storage.Reader
15+
batch *badger.WriteBatch
16+
17+
callbacks op.Callbacks
18+
}
19+
20+
var _ storage.ReaderBatchWriter = (*ReaderBatchWriter)(nil)
21+
22+
// GlobalReader returns a database-backed reader which reads the latest committed global database state ("read-committed isolation").
23+
// This reader will not read un-committed writes written to ReaderBatchWriter.Writer until the write batch is committed.
24+
// This reader may observe different values for the same key on subsequent reads.
25+
func (b *ReaderBatchWriter) GlobalReader() storage.Reader {
26+
return b.globalReader
27+
}
28+
29+
// Writer returns a writer associated with a batch of writes. The batch is pending until it is committed.
30+
// When we `Write` into the batch, that write operation is added to the pending batch, but not committed.
31+
// The commit operation is atomic w.r.t. the batch; either all writes are applied to the database, or no writes are.
32+
// Note:
33+
// - The writer cannot be used concurrently for writing.
34+
func (b *ReaderBatchWriter) Writer() storage.Writer {
35+
return b
36+
}
37+
38+
// BadgerWriteBatch returns the badger write batch
39+
func (b *ReaderBatchWriter) BadgerWriteBatch() *badger.WriteBatch {
40+
return b.batch
41+
}
42+
43+
// AddCallback adds a callback to execute after the batch has been flush
44+
// regardless the batch update is succeeded or failed.
45+
// The error parameter is the error returned by the batch update.
46+
func (b *ReaderBatchWriter) AddCallback(callback func(error)) {
47+
b.callbacks.AddCallback(callback)
48+
}
49+
50+
// Commit flushes the batch to the database.
51+
// No errors expected during normal operation
52+
func (b *ReaderBatchWriter) Commit() error {
53+
err := b.batch.Flush()
54+
55+
b.callbacks.NotifyCallbacks(err)
56+
57+
return err
58+
}
59+
60+
func WithReaderBatchWriter(db *badger.DB, fn func(storage.ReaderBatchWriter) error) error {
61+
batch := NewReaderBatchWriter(db)
62+
63+
err := fn(batch)
64+
if err != nil {
65+
// fn might use lock to ensure concurrent safety while reading and writing data
66+
// and the lock is usually released by a callback.
67+
// in other words, fn might hold a lock to be released by a callback,
68+
// we need to notify the callback for the locks to be released before
69+
// returning the error.
70+
batch.callbacks.NotifyCallbacks(err)
71+
return err
72+
}
73+
74+
return batch.Commit()
75+
}
76+
77+
func NewReaderBatchWriter(db *badger.DB) *ReaderBatchWriter {
78+
return &ReaderBatchWriter{
79+
globalReader: ToReader(db),
80+
batch: db.NewWriteBatch(),
81+
}
82+
}
83+
84+
var _ storage.Writer = (*ReaderBatchWriter)(nil)
85+
86+
// Set sets the value for the given key. It overwrites any previous value
87+
// for that key; a DB is not a multi-map.
88+
//
89+
// It is safe to modify the contents of the arguments after Set returns.
90+
// No errors expected during normal operation
91+
func (b *ReaderBatchWriter) Set(key, value []byte) error {
92+
return b.batch.Set(key, value)
93+
}
94+
95+
// Delete deletes the value for the given key. Deletes are blind all will
96+
// succeed even if the given key does not exist.
97+
//
98+
// It is safe to modify the contents of the arguments after Delete returns.
99+
// No errors expected during normal operation
100+
func (b *ReaderBatchWriter) Delete(key []byte) error {
101+
return b.batch.Delete(key)
102+
}
103+
104+
// DeleteByRange removes all keys with a prefix that falls within the
105+
// range [start, end], both inclusive.
106+
// It returns error if endPrefix < startPrefix
107+
// no other errors are expected during normal operation
108+
func (b *ReaderBatchWriter) DeleteByRange(globalReader storage.Reader, startPrefix, endPrefix []byte) error {
109+
err := operation.Iterate(startPrefix, endPrefix, func(key []byte) error {
110+
err := b.batch.Delete(key)
111+
if err != nil {
112+
return fmt.Errorf("could not add key to delete batch (%v): %w", key, err)
113+
}
114+
return nil
115+
})(globalReader)
116+
117+
if err != nil {
118+
return fmt.Errorf("could not find keys by range to be deleted: %w", err)
119+
}
120+
return nil
121+
}

storage/operation/callbacks.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
package operation
2+
3+
import "sync"
4+
5+
type Callbacks struct {
6+
sync.RWMutex // protect callbacks
7+
callbacks []func(error)
8+
}
9+
10+
func (b *Callbacks) AddCallback(callback func(error)) {
11+
b.Lock()
12+
defer b.Unlock()
13+
14+
b.callbacks = append(b.callbacks, callback)
15+
}
16+
17+
func (b *Callbacks) NotifyCallbacks(err error) {
18+
b.RLock()
19+
defer b.RUnlock()
20+
21+
for _, callback := range b.callbacks {
22+
callback(err)
23+
}
24+
}

storage/operation/codec.go

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
package operation
2+
3+
import (
4+
"encoding/binary"
5+
"fmt"
6+
7+
"github.com/onflow/flow-go/model/flow"
8+
)
9+
10+
// EncodeKeyPart encodes a value to be used as a part of a key to be stored in storage.
11+
func EncodeKeyPart(v interface{}) []byte {
12+
switch i := v.(type) {
13+
case uint8:
14+
return []byte{i}
15+
case uint32:
16+
b := make([]byte, 4)
17+
binary.BigEndian.PutUint32(b, i)
18+
return b
19+
case uint64:
20+
b := make([]byte, 8)
21+
binary.BigEndian.PutUint64(b, i)
22+
return b
23+
case string:
24+
return []byte(i)
25+
case flow.Role:
26+
return []byte{byte(i)}
27+
case flow.Identifier:
28+
return i[:]
29+
case flow.ChainID:
30+
return []byte(i)
31+
default:
32+
panic(fmt.Sprintf("unsupported type to convert (%T)", v))
33+
}
34+
}

0 commit comments

Comments
 (0)