Skip to content

Commit 79d23d0

Browse files
authored
core/state: rewrite a new optimized keyValueMigrator (#256)
* trie/utils: add helper to calculate code tree indices * core/state: rewrite optimized version of keyValueMigrator Signed-off-by: Ignacio Hagopian <[email protected]> * trie/verkle: remove uint256 allocs (#257) Signed-off-by: Ignacio Hagopian <[email protected]> --------- Signed-off-by: Ignacio Hagopian <[email protected]>
1 parent cd83a7b commit 79d23d0

File tree

2 files changed

+163
-84
lines changed

2 files changed

+163
-84
lines changed

core/state_processor.go

Lines changed: 125 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ import (
2525
"io"
2626
"math/big"
2727
"os"
28+
"runtime"
29+
"sync"
2830
"time"
2931

3032
"github.com/ethereum/go-ethereum/common"
@@ -170,7 +172,7 @@ func (p *StateProcessor) Process(block *types.Block, statedb *state.StateDB, cfg
170172
// mkv will be assiting in the collection of up to maxMovedCount key values to be migrated to the VKT.
171173
// It has internal caches to do efficient MPT->VKT key calculations, which will be discarded after
172174
// this function.
173-
mkv := &keyValueMigrator{vktLeafData: make(map[string]*verkle.BatchNewLeafNodeData)}
175+
mkv := newKeyValueMigrator()
174176
// move maxCount accounts into the verkle tree, starting with the
175177
// slots from the previous account.
176178
count := 0
@@ -297,8 +299,17 @@ func (p *StateProcessor) Process(block *types.Block, statedb *state.StateDB, cfg
297299
}
298300
migrdb.SetCurrentPreimageOffset(preimageSeek)
299301

300-
log.Info("Collected and prepared key values from base tree", "count", count, "duration", time.Since(now), "last account", statedb.Database().GetCurrentAccountHash())
301-
302+
log.Info("Collected key values from base tree", "count", count, "duration", time.Since(now), "last account", statedb.Database().GetCurrentAccountHash())
303+
304+
// Take all the collected key-values and prepare the new leaf values.
305+
// This fires a background routine that will start doing the work that
306+
// migrateCollectedKeyValues() will use to insert into the tree.
307+
//
308+
// TODO: Now both prepare() and migrateCollectedKeyValues() are next to each other, but
309+
// after we fix an existing bug, we can call prepare() before the block execution and
310+
// let it do the work in the background. After the block execution and finalization
311+
// finish, we can call migrateCollectedKeyValues() which should already find everything ready.
312+
mkv.prepare()
302313
now = time.Now()
303314
if err := mkv.migrateCollectedKeyValues(tt.Overlay()); err != nil {
304315
return nil, nil, 0, fmt.Errorf("could not migrate key values: %w", err)
@@ -380,30 +391,60 @@ func ApplyTransaction(config *params.ChainConfig, bc ChainContext, author *commo
380391
return applyTransaction(msg, config, gp, statedb, header.Number, header.Hash(), tx, usedGas, vmenv)
381392
}
382393

383-
// keyValueMigrator is a helper struct that collects key-values from the base tree.
384-
// The walk is done in account order, so **we assume** the APIs hold this invariant. This is
385-
// useful to be smart about caching banderwagon.Points to make VKT key calculations faster.
394+
var zeroTreeIndex uint256.Int
395+
396+
// keyValueMigrator is a helper module that collects key-values from the overlay-tree migration for Verkle Trees.
397+
// It assumes that the walk of the base tree is done in address-order, so it exploit that fact to
398+
// collect the key-values in a way that is efficient.
386399
type keyValueMigrator struct {
387-
currAddr []byte
388-
currAddrPoint *verkle.Point
400+
// leafData contains the values for the future leaf for a particular VKT branch.
401+
leafData []migratedKeyValue
402+
403+
// When prepare() is called, it will start a background routine that will process the leafData
404+
// saving the result in newLeaves to be used by migrateCollectedKeyValues(). The background
405+
// routine signals that it is done by closing processingReady.
406+
processingReady chan struct{}
407+
newLeaves []verkle.LeafNode
408+
prepareErr error
409+
}
389410

390-
vktLeafData map[string]*verkle.BatchNewLeafNodeData
411+
func newKeyValueMigrator() *keyValueMigrator {
412+
// We do initialize the VKT config since prepare() might indirectly make multiple GetConfig() calls
413+
// in different goroutines when we never called GetConfig() before, causing a race considering the way
414+
// that `config` is designed in go-verkle.
415+
// TODO: jsign as a fix for this in the PR where we move to a file-less precomp, since it allows safe
416+
// concurrent calls to GetConfig(). When that gets merged, we can remove this line.
417+
_ = verkle.GetConfig()
418+
return &keyValueMigrator{
419+
processingReady: make(chan struct{}),
420+
leafData: make([]migratedKeyValue, 0, 10_000),
421+
}
391422
}
392423

393-
func (kvm *keyValueMigrator) addStorageSlot(addr []byte, slotNumber []byte, slotValue []byte) {
394-
addrPoint := kvm.getAddrPoint(addr)
424+
type migratedKeyValue struct {
425+
branchKey branchKey
426+
leafNodeData verkle.BatchNewLeafNodeData
427+
}
428+
type branchKey struct {
429+
addr common.Address
430+
treeIndex uint256.Int
431+
}
395432

396-
vktKey := tutils.GetTreeKeyStorageSlotWithEvaluatedAddress(addrPoint, slotNumber)
397-
leafNodeData := kvm.getOrInitLeafNodeData(vktKey)
433+
func newBranchKey(addr []byte, treeIndex *uint256.Int) branchKey {
434+
var sk branchKey
435+
copy(sk.addr[:], addr)
436+
sk.treeIndex = *treeIndex
437+
return sk
438+
}
398439

399-
leafNodeData.Values[vktKey[verkle.StemSize]] = slotValue
440+
func (kvm *keyValueMigrator) addStorageSlot(addr []byte, slotNumber []byte, slotValue []byte) {
441+
treeIndex, subIndex := tutils.GetTreeKeyStorageSlotTreeIndexes(slotNumber)
442+
leafNodeData := kvm.getOrInitLeafNodeData(newBranchKey(addr, treeIndex))
443+
leafNodeData.Values[subIndex] = slotValue
400444
}
401445

402446
func (kvm *keyValueMigrator) addAccount(addr []byte, acc *types.StateAccount) {
403-
addrPoint := kvm.getAddrPoint(addr)
404-
405-
vktKey := tutils.GetTreeKeyVersionWithEvaluatedAddress(addrPoint)
406-
leafNodeData := kvm.getOrInitLeafNodeData(vktKey)
447+
leafNodeData := kvm.getOrInitLeafNodeData(newBranchKey(addr, &zeroTreeIndex))
407448

408449
var version [verkle.LeafValueSize]byte
409450
leafNodeData.Values[tutils.VersionLeafKey] = version[:]
@@ -419,16 +460,10 @@ func (kvm *keyValueMigrator) addAccount(addr []byte, acc *types.StateAccount) {
419460
leafNodeData.Values[tutils.NonceLeafKey] = nonce[:]
420461

421462
leafNodeData.Values[tutils.CodeKeccakLeafKey] = acc.CodeHash[:]
422-
423-
// Code size is ignored here. If this isn't an EOA, the tree-walk will call
424-
// addAccountCode with this information.
425463
}
426464

427465
func (kvm *keyValueMigrator) addAccountCode(addr []byte, codeSize uint64, chunks []byte) {
428-
addrPoint := kvm.getAddrPoint(addr)
429-
430-
vktKey := tutils.GetTreeKeyVersionWithEvaluatedAddress(addrPoint)
431-
leafNodeData := kvm.getOrInitLeafNodeData(vktKey)
466+
leafNodeData := kvm.getOrInitLeafNodeData(newBranchKey(addr, &zeroTreeIndex))
432467

433468
// Save the code size.
434469
var codeSizeBytes [verkle.LeafValueSize]byte
@@ -442,8 +477,8 @@ func (kvm *keyValueMigrator) addAccountCode(addr []byte, codeSize uint64, chunks
442477

443478
// Potential further chunks, have their own leaf nodes.
444479
for i := 128; i < len(chunks)/32; {
445-
vktKey := tutils.GetTreeKeyCodeChunkWithEvaluatedAddress(addrPoint, uint256.NewInt(uint64(i)))
446-
leafNodeData := kvm.getOrInitLeafNodeData(vktKey)
480+
treeIndex, _ := tutils.GetTreeKeyCodeChunkIndices(uint256.NewInt(uint64(i)))
481+
leafNodeData := kvm.getOrInitLeafNodeData(newBranchKey(addr, treeIndex))
447482

448483
j := i
449484
for ; (j-i) < 256 && j < len(chunks)/32; j++ {
@@ -453,41 +488,79 @@ func (kvm *keyValueMigrator) addAccountCode(addr []byte, codeSize uint64, chunks
453488
}
454489
}
455490

456-
func (kvm *keyValueMigrator) getAddrPoint(addr []byte) *verkle.Point {
457-
if bytes.Equal(addr, kvm.currAddr) {
458-
return kvm.currAddrPoint
491+
func (kvm *keyValueMigrator) getOrInitLeafNodeData(bk branchKey) *verkle.BatchNewLeafNodeData {
492+
// Remember that keyValueMigration receives actions ordered by (address, subtreeIndex).
493+
// This means that we can assume that the last element of leafData is the one that we
494+
// are looking for, or that we need to create a new one.
495+
if len(kvm.leafData) == 0 || kvm.leafData[len(kvm.leafData)-1].branchKey != bk {
496+
kvm.leafData = append(kvm.leafData, migratedKeyValue{
497+
branchKey: bk,
498+
leafNodeData: verkle.BatchNewLeafNodeData{
499+
Stem: nil, // It will be calculated in the prepare() phase, since it's CPU heavy.
500+
Values: make(map[byte][]byte),
501+
},
502+
})
459503
}
460-
kvm.currAddr = addr
461-
kvm.currAddrPoint = tutils.EvaluateAddressPoint(addr)
462-
return kvm.currAddrPoint
504+
return &kvm.leafData[len(kvm.leafData)-1].leafNodeData
463505
}
464506

465-
func (kvm *keyValueMigrator) getOrInitLeafNodeData(stem []byte) *verkle.BatchNewLeafNodeData {
466-
stemStr := string(stem)
467-
if _, ok := kvm.vktLeafData[stemStr]; !ok {
468-
kvm.vktLeafData[stemStr] = &verkle.BatchNewLeafNodeData{
469-
Stem: stem[:verkle.StemSize],
470-
Values: make(map[byte][]byte),
507+
func (kvm *keyValueMigrator) prepare() {
508+
// We fire a background routine to process the leafData and save the result in newLeaves.
509+
// The background routine signals that it is done by closing processingReady.
510+
go func() {
511+
// Step 1: We split kvm.leafData in numBatches batches, and we process each batch in a separate goroutine.
512+
// This fills each leafNodeData.Stem with the correct value.
513+
var wg sync.WaitGroup
514+
batchNum := runtime.NumCPU()
515+
batchSize := (len(kvm.leafData) + batchNum - 1) / batchNum
516+
for i := 0; i < len(kvm.leafData); i += batchSize {
517+
start := i
518+
end := i + batchSize
519+
if end > len(kvm.leafData) {
520+
end = len(kvm.leafData)
521+
}
522+
wg.Add(1)
523+
524+
batch := kvm.leafData[start:end]
525+
go func() {
526+
defer wg.Done()
527+
var currAddr common.Address
528+
var currPoint *verkle.Point
529+
for i := range batch {
530+
if batch[i].branchKey.addr != currAddr {
531+
currAddr = batch[i].branchKey.addr
532+
currPoint = tutils.EvaluateAddressPoint(currAddr[:])
533+
}
534+
stem := tutils.GetTreeKeyWithEvaluatedAddess(currPoint, &batch[i].branchKey.treeIndex, 0)
535+
stem = stem[:verkle.StemSize]
536+
batch[i].leafNodeData.Stem = stem
537+
}
538+
}()
471539
}
472-
}
473-
return kvm.vktLeafData[stemStr]
540+
wg.Wait()
541+
542+
// Step 2: Now that we have all stems (i.e: tree keys) calcualted, we can create the new leaves.
543+
nodeValues := make([]verkle.BatchNewLeafNodeData, len(kvm.leafData))
544+
for i := range kvm.leafData {
545+
nodeValues[i] = kvm.leafData[i].leafNodeData
546+
}
547+
548+
// Create all leaves in batch mode so we can optimize cryptography operations.
549+
kvm.newLeaves, kvm.prepareErr = verkle.BatchNewLeafNode(nodeValues)
550+
close(kvm.processingReady)
551+
}()
474552
}
475553

476554
func (kvm *keyValueMigrator) migrateCollectedKeyValues(tree *trie.VerkleTrie) error {
477-
// Transform the map into a slice.
478-
nodeValues := make([]verkle.BatchNewLeafNodeData, 0, len(kvm.vktLeafData))
479-
for _, vld := range kvm.vktLeafData {
480-
nodeValues = append(nodeValues, *vld)
481-
}
482-
483-
// Create all leaves in batch mode so we can optimize cryptography operations.
484-
newLeaves, err := verkle.BatchNewLeafNode(nodeValues)
485-
if err != nil {
486-
return fmt.Errorf("failed to batch-create new leaf nodes")
555+
now := time.Now()
556+
<-kvm.processingReady
557+
if kvm.prepareErr != nil {
558+
return fmt.Errorf("failed to prepare key values: %w", kvm.prepareErr)
487559
}
560+
log.Info("Prepared key values from base tree", "duration", time.Since(now))
488561

489562
// Insert into the tree.
490-
if err := tree.InsertMigratedLeaves(newLeaves); err != nil {
563+
if err := tree.InsertMigratedLeaves(kvm.newLeaves); err != nil {
491564
return fmt.Errorf("failed to insert migrated leaves: %w", err)
492565
}
493566

trie/utils/verkle.go

Lines changed: 38 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
package utils
1818

1919
import (
20-
"math/big"
20+
"encoding/binary"
2121
"sync"
2222

2323
"github.com/crate-crypto/go-ipa/bandersnatch/fr"
@@ -34,18 +34,14 @@ const (
3434
)
3535

3636
var (
37-
zero = uint256.NewInt(0)
38-
HeaderStorageOffset = uint256.NewInt(64)
39-
CodeOffset = uint256.NewInt(128)
40-
MainStorageOffset = new(uint256.Int).Lsh(uint256.NewInt(256), 31)
41-
VerkleNodeWidth = uint256.NewInt(256)
42-
codeStorageDelta = uint256.NewInt(0).Sub(CodeOffset, HeaderStorageOffset)
43-
44-
// BigInt versions of the above.
45-
headerStorageOffsetBig = HeaderStorageOffset.ToBig()
46-
mainStorageOffsetBig = MainStorageOffset.ToBig()
47-
verkleNodeWidthBig = VerkleNodeWidth.ToBig()
48-
codeStorageDeltaBig = codeStorageDelta.ToBig()
37+
zero = uint256.NewInt(0)
38+
VerkleNodeWidthLog2 = 8
39+
HeaderStorageOffset = uint256.NewInt(64)
40+
mainStorageOffsetLshVerkleNodeWidth = new(uint256.Int).Lsh(uint256.NewInt(256), 31-uint(VerkleNodeWidthLog2))
41+
CodeOffset = uint256.NewInt(128)
42+
MainStorageOffset = new(uint256.Int).Lsh(uint256.NewInt(256), 31)
43+
VerkleNodeWidth = uint256.NewInt(256)
44+
codeStorageDelta = uint256.NewInt(0).Sub(CodeOffset, HeaderStorageOffset)
4945

5046
getTreePolyIndex0Point *verkle.Point
5147
)
@@ -164,14 +160,19 @@ func GetTreeKeyCodeSize(address []byte) []byte {
164160
}
165161

166162
func GetTreeKeyCodeChunk(address []byte, chunk *uint256.Int) []byte {
163+
treeIndex, subIndex := GetTreeKeyCodeChunkIndices(chunk)
164+
return GetTreeKey(address, treeIndex, subIndex)
165+
}
166+
167+
func GetTreeKeyCodeChunkIndices(chunk *uint256.Int) (*uint256.Int, byte) {
167168
chunkOffset := new(uint256.Int).Add(CodeOffset, chunk)
168169
treeIndex := new(uint256.Int).Div(chunkOffset, VerkleNodeWidth)
169170
subIndexMod := new(uint256.Int).Mod(chunkOffset, VerkleNodeWidth)
170171
var subIndex byte
171172
if len(subIndexMod) != 0 {
172173
subIndex = byte(subIndexMod[0])
173174
}
174-
return GetTreeKey(address, treeIndex, subIndex)
175+
return treeIndex, subIndex
175176
}
176177

177178
func GetTreeKeyCodeChunkWithEvaluatedAddress(addressPoint *verkle.Point, chunk *uint256.Int) []byte {
@@ -230,8 +231,8 @@ func GetTreeKeyWithEvaluatedAddess(evaluated *verkle.Point, treeIndex *uint256.I
230231

231232
// little-endian, 32-byte aligned treeIndex
232233
var index [32]byte
233-
for i, b := range treeIndex.Bytes() {
234-
index[len(treeIndex.Bytes())-1-i] = b
234+
for i := 0; i < len(treeIndex); i++ {
235+
binary.LittleEndian.PutUint64(index[i*8:(i+1)*8], treeIndex[i])
235236
}
236237
verkle.FromLEBytes(&poly[3], index[:16])
237238
verkle.FromLEBytes(&poly[4], index[16:])
@@ -274,22 +275,27 @@ func GetTreeKeyStorageSlotWithEvaluatedAddress(evaluated *verkle.Point, storageK
274275
}
275276

276277
func GetTreeKeyStorageSlotTreeIndexes(storageKey []byte) (*uint256.Int, byte) {
277-
// Note that `pos` must be a big.Int and not a uint256.Int, because the subsequent
278-
// arithmetics operations could overflow. (e.g: imagine if storageKey is 2^256-1)
279-
pos := new(big.Int).SetBytes(storageKey)
280-
if pos.Cmp(codeStorageDeltaBig) < 0 {
281-
pos.Add(headerStorageOffsetBig, pos)
282-
} else {
283-
pos.Add(mainStorageOffsetBig, pos)
284-
}
285-
treeIndex, overflow := uint256.FromBig(big.NewInt(0).Div(pos, verkleNodeWidthBig))
286-
if overflow { // Must never happen considering the EIP definition.
287-
panic("tree index overflow")
278+
var pos uint256.Int
279+
pos.SetBytes(storageKey)
280+
281+
// If the storage slot is in the header, we need to add the header offset.
282+
if pos.Cmp(codeStorageDelta) < 0 {
283+
// This addition is always safe; it can't ever overflow since pos<codeStorageDelta.
284+
pos.Add(HeaderStorageOffset, &pos)
285+
286+
// In this branch, the tree-index is zero since we're in the account header,
287+
// and the sub-index is the LSB of the modified storage key.
288+
return zero, byte(pos[0] & 0xFF)
289+
288290
}
289-
// calculate the sub_index, i.e. the index in the stem tree.
290-
// Because the modulus is 256, it's the last byte of treeIndex
291-
posBytes := pos.Bytes()
292-
subIndex := posBytes[len(posBytes)-1]
291+
// If the storage slot is in the main storage, we need to add the main storage offset.
293292

294-
return treeIndex, subIndex
293+
// We first divide by VerkleNodeWidth to create room to avoid an overflow next.
294+
pos.Rsh(&pos, uint(VerkleNodeWidthLog2))
295+
// We add mainStorageOffset/VerkleNodeWidth which can't overflow.
296+
pos.Add(&pos, mainStorageOffsetLshVerkleNodeWidth)
297+
298+
// The sub-index is the LSB of the original storage key, since mainStorageOffset
299+
// doesn't affect this byte, so we can avoid masks or shifts.
300+
return &pos, storageKey[len(storageKey)-1]
295301
}

0 commit comments

Comments
 (0)