Skip to content

Commit cbbf686

Browse files
authored
trie, core: rework tracer and track origin value of dirty nodes (#32306)
These changes made in the PR should be highlighted here The trie tracer is split into two distinct structs: opTracer and prevalueTracer. The former is specific to MPT, while the latter is generic and applicable to all trie implementations. The original values of dirty nodes are tracked in a NodeSet. This serves as the foundation for both full archive node implementations and the state live tracer.
1 parent 55a471e commit cbbf686

File tree

10 files changed

+376
-171
lines changed

10 files changed

+376
-171
lines changed

core/state/statedb.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -977,7 +977,7 @@ func (s *StateDB) fastDeleteStorage(snaps *snapshot.Tree, addrHash common.Hash,
977977
storageOrigins = make(map[common.Hash][]byte) // the set for tracking the original value of slot
978978
)
979979
stack := trie.NewStackTrie(func(path []byte, hash common.Hash, blob []byte) {
980-
nodes.AddNode(path, trienode.NewDeleted())
980+
nodes.AddNode(path, trienode.NewDeletedWithPrev(blob))
981981
})
982982
for iter.Next() {
983983
slot := common.CopyBytes(iter.Slot())
@@ -1028,7 +1028,7 @@ func (s *StateDB) slowDeleteStorage(addr common.Address, addrHash common.Hash, r
10281028
if it.Hash() == (common.Hash{}) {
10291029
continue
10301030
}
1031-
nodes.AddNode(it.Path(), trienode.NewDeleted())
1031+
nodes.AddNode(it.Path(), trienode.NewDeletedWithPrev(it.NodeBlob()))
10321032
}
10331033
if err := it.Error(); err != nil {
10341034
return nil, nil, nil, err
@@ -1160,7 +1160,7 @@ func (s *StateDB) commit(deleteEmptyObjects bool, noStorageWiping bool) (*stateU
11601160
//
11611161
// Given that some accounts may be destroyed and then recreated within
11621162
// the same block, it's possible that a node set with the same owner
1163-
// may already exists. In such cases, these two sets are combined, with
1163+
// may already exist. In such cases, these two sets are combined, with
11641164
// the later one overwriting the previous one if any nodes are modified
11651165
// or deleted in both sets.
11661166
//

trie/committer.go

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -29,12 +29,12 @@ import (
2929
// insertion order.
3030
type committer struct {
3131
nodes *trienode.NodeSet
32-
tracer *tracer
32+
tracer *prevalueTracer
3333
collectLeaf bool
3434
}
3535

3636
// newCommitter creates a new committer or picks one from the pool.
37-
func newCommitter(nodeset *trienode.NodeSet, tracer *tracer, collectLeaf bool) *committer {
37+
func newCommitter(nodeset *trienode.NodeSet, tracer *prevalueTracer, collectLeaf bool) *committer {
3838
return &committer{
3939
nodes: nodeset,
4040
tracer: tracer,
@@ -110,14 +110,16 @@ func (c *committer) commitChildren(path []byte, n *fullNode, parallel bool) {
110110
} else {
111111
wg.Add(1)
112112
go func(index int) {
113+
defer wg.Done()
114+
113115
p := append(path, byte(index))
114116
childSet := trienode.NewNodeSet(c.nodes.Owner)
115117
childCommitter := newCommitter(childSet, c.tracer, c.collectLeaf)
116118
n.Children[index] = childCommitter.commit(p, child, false)
119+
117120
nodesMu.Lock()
118-
c.nodes.MergeSet(childSet)
121+
c.nodes.MergeDisjoint(childSet)
119122
nodesMu.Unlock()
120-
wg.Done()
121123
}(i)
122124
}
123125
}
@@ -140,15 +142,15 @@ func (c *committer) store(path []byte, n node) node {
140142
// The node is embedded in its parent, in other words, this node
141143
// will not be stored in the database independently, mark it as
142144
// deleted only if the node was existent in database before.
143-
_, ok := c.tracer.accessList[string(path)]
144-
if ok {
145-
c.nodes.AddNode(path, trienode.NewDeleted())
145+
origin := c.tracer.get(path)
146+
if len(origin) != 0 {
147+
c.nodes.AddNode(path, trienode.NewDeletedWithPrev(origin))
146148
}
147149
return n
148150
}
149151
// Collect the dirty node to nodeset for return.
150152
nhash := common.BytesToHash(hash)
151-
c.nodes.AddNode(path, trienode.New(nhash, nodeToBytes(n)))
153+
c.nodes.AddNode(path, trienode.NewNodeWithPrev(nhash, nodeToBytes(n), c.tracer.get(path)))
152154

153155
// Collect the corresponding leaf node if it's required. We don't check
154156
// full node since it's impossible to store value in fullNode. The key

trie/proof.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -567,7 +567,12 @@ func VerifyRangeProof(rootHash common.Hash, firstKey []byte, keys [][]byte, valu
567567
}
568568
// Rebuild the trie with the leaf stream, the shape of trie
569569
// should be same with the original one.
570-
tr := &Trie{root: root, reader: newEmptyReader(), tracer: newTracer()}
570+
tr := &Trie{
571+
root: root,
572+
reader: newEmptyReader(),
573+
opTracer: newOpTracer(),
574+
prevalueTracer: newPrevalueTracer(),
575+
}
571576
if empty {
572577
tr.root = nil
573578
}

trie/tracer.go

Lines changed: 84 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,13 @@ package trie
1818

1919
import (
2020
"maps"
21-
22-
"github.com/ethereum/go-ethereum/common"
21+
"slices"
2322
)
2423

25-
// tracer tracks the changes of trie nodes. During the trie operations,
24+
// opTracer tracks the changes of trie nodes. During the trie operations,
2625
// some nodes can be deleted from the trie, while these deleted nodes
2726
// won't be captured by trie.Hasher or trie.Committer. Thus, these deleted
28-
// nodes won't be removed from the disk at all. Tracer is an auxiliary tool
27+
// nodes won't be removed from the disk at all. opTracer is an auxiliary tool
2928
// used to track all insert and delete operations of trie and capture all
3029
// deleted nodes eventually.
3130
//
@@ -35,38 +34,25 @@ import (
3534
// This tool can track all of them no matter the node is embedded in its
3635
// parent or not, but valueNode is never tracked.
3736
//
38-
// Besides, it's also used for recording the original value of the nodes
39-
// when they are resolved from the disk. The pre-value of the nodes will
40-
// be used to construct trie history in the future.
41-
//
42-
// Note tracer is not thread-safe, callers should be responsible for handling
37+
// Note opTracer is not thread-safe, callers should be responsible for handling
4338
// the concurrency issues by themselves.
44-
type tracer struct {
45-
inserts map[string]struct{}
46-
deletes map[string]struct{}
47-
accessList map[string][]byte
39+
type opTracer struct {
40+
inserts map[string]struct{}
41+
deletes map[string]struct{}
4842
}
4943

50-
// newTracer initializes the tracer for capturing trie changes.
51-
func newTracer() *tracer {
52-
return &tracer{
53-
inserts: make(map[string]struct{}),
54-
deletes: make(map[string]struct{}),
55-
accessList: make(map[string][]byte),
44+
// newOpTracer initializes the tracer for capturing trie changes.
45+
func newOpTracer() *opTracer {
46+
return &opTracer{
47+
inserts: make(map[string]struct{}),
48+
deletes: make(map[string]struct{}),
5649
}
5750
}
5851

59-
// onRead tracks the newly loaded trie node and caches the rlp-encoded
60-
// blob internally. Don't change the value outside of function since
61-
// it's not deep-copied.
62-
func (t *tracer) onRead(path []byte, val []byte) {
63-
t.accessList[string(path)] = val
64-
}
65-
6652
// onInsert tracks the newly inserted trie node. If it's already
6753
// in the deletion set (resurrected node), then just wipe it from
6854
// the deletion set as it's "untouched".
69-
func (t *tracer) onInsert(path []byte) {
55+
func (t *opTracer) onInsert(path []byte) {
7056
if _, present := t.deletes[string(path)]; present {
7157
delete(t.deletes, string(path))
7258
return
@@ -77,7 +63,7 @@ func (t *tracer) onInsert(path []byte) {
7763
// onDelete tracks the newly deleted trie node. If it's already
7864
// in the addition set, then just wipe it from the addition set
7965
// as it's untouched.
80-
func (t *tracer) onDelete(path []byte) {
66+
func (t *opTracer) onDelete(path []byte) {
8167
if _, present := t.inserts[string(path)]; present {
8268
delete(t.inserts, string(path))
8369
return
@@ -86,37 +72,83 @@ func (t *tracer) onDelete(path []byte) {
8672
}
8773

8874
// reset clears the content tracked by tracer.
89-
func (t *tracer) reset() {
90-
t.inserts = make(map[string]struct{})
91-
t.deletes = make(map[string]struct{})
92-
t.accessList = make(map[string][]byte)
75+
func (t *opTracer) reset() {
76+
clear(t.inserts)
77+
clear(t.deletes)
9378
}
9479

9580
// copy returns a deep copied tracer instance.
96-
func (t *tracer) copy() *tracer {
97-
accessList := make(map[string][]byte, len(t.accessList))
98-
for path, blob := range t.accessList {
99-
accessList[path] = common.CopyBytes(blob)
100-
}
101-
return &tracer{
102-
inserts: maps.Clone(t.inserts),
103-
deletes: maps.Clone(t.deletes),
104-
accessList: accessList,
81+
func (t *opTracer) copy() *opTracer {
82+
return &opTracer{
83+
inserts: maps.Clone(t.inserts),
84+
deletes: maps.Clone(t.deletes),
10585
}
10686
}
10787

108-
// deletedNodes returns a list of node paths which are deleted from the trie.
109-
func (t *tracer) deletedNodes() []string {
110-
var paths []string
88+
// deletedList returns a list of node paths which are deleted from the trie.
89+
func (t *opTracer) deletedList() [][]byte {
90+
paths := make([][]byte, 0, len(t.deletes))
11191
for path := range t.deletes {
112-
// It's possible a few deleted nodes were embedded
113-
// in their parent before, the deletions can be no
114-
// effect by deleting nothing, filter them out.
115-
_, ok := t.accessList[path]
116-
if !ok {
117-
continue
118-
}
119-
paths = append(paths, path)
92+
paths = append(paths, []byte(path))
12093
}
12194
return paths
12295
}
96+
97+
// prevalueTracer tracks the original values of resolved trie nodes. Cached trie
98+
// node values are expected to be immutable. A zero-size node value is treated as
99+
// non-existent and should not occur in practice.
100+
//
101+
// Note prevalueTracer is not thread-safe, callers should be responsible for
102+
// handling the concurrency issues by themselves.
103+
type prevalueTracer struct {
104+
data map[string][]byte
105+
}
106+
107+
// newPrevalueTracer initializes the tracer for capturing resolved trie nodes.
108+
func newPrevalueTracer() *prevalueTracer {
109+
return &prevalueTracer{
110+
data: make(map[string][]byte),
111+
}
112+
}
113+
114+
// put tracks the newly loaded trie node and caches its RLP-encoded
115+
// blob internally. Do not modify the value outside this function,
116+
// as it is not deep-copied.
117+
func (t *prevalueTracer) put(path []byte, val []byte) {
118+
t.data[string(path)] = val
119+
}
120+
121+
// get returns the cached trie node value. If the node is not found, nil will
122+
// be returned.
123+
func (t *prevalueTracer) get(path []byte) []byte {
124+
return t.data[string(path)]
125+
}
126+
127+
// hasList returns a list of flags indicating whether the corresponding trie nodes
128+
// specified by the path exist in the trie.
129+
func (t *prevalueTracer) hasList(list [][]byte) []bool {
130+
exists := make([]bool, 0, len(list))
131+
for _, path := range list {
132+
_, ok := t.data[string(path)]
133+
exists = append(exists, ok)
134+
}
135+
return exists
136+
}
137+
138+
// values returns a list of values of the cached trie nodes.
139+
func (t *prevalueTracer) values() [][]byte {
140+
return slices.Collect(maps.Values(t.data))
141+
}
142+
143+
// reset resets the cached content in the prevalueTracer.
144+
func (t *prevalueTracer) reset() {
145+
clear(t.data)
146+
}
147+
148+
// copy returns a copied prevalueTracer instance.
149+
func (t *prevalueTracer) copy() *prevalueTracer {
150+
// Shadow clone is used, as the cached trie node values are immutable
151+
return &prevalueTracer{
152+
data: maps.Clone(t.data),
153+
}
154+
}

trie/tracer_test.go

Lines changed: 25 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -52,24 +52,25 @@ var (
5252
}
5353
)
5454

55-
func TestTrieTracer(t *testing.T) {
56-
testTrieTracer(t, tiny)
57-
testTrieTracer(t, nonAligned)
58-
testTrieTracer(t, standard)
55+
func TestTrieOpTracer(t *testing.T) {
56+
testTrieOpTracer(t, tiny)
57+
testTrieOpTracer(t, nonAligned)
58+
testTrieOpTracer(t, standard)
5959
}
6060

6161
// Tests if the trie diffs are tracked correctly. Tracer should capture
6262
// all non-leaf dirty nodes, no matter the node is embedded or not.
63-
func testTrieTracer(t *testing.T, vals []struct{ k, v string }) {
63+
func testTrieOpTracer(t *testing.T, vals []struct{ k, v string }) {
6464
db := newTestDatabase(rawdb.NewMemoryDatabase(), rawdb.HashScheme)
6565
trie := NewEmpty(db)
6666

6767
// Determine all new nodes are tracked
6868
for _, val := range vals {
6969
trie.MustUpdate([]byte(val.k), []byte(val.v))
7070
}
71-
insertSet := copySet(trie.tracer.inserts) // copy before commit
72-
deleteSet := copySet(trie.tracer.deletes) // copy before commit
71+
insertSet := copySet(trie.opTracer.inserts) // copy before commit
72+
deleteSet := copySet(trie.opTracer.deletes) // copy before commit
73+
7374
root, nodes := trie.Commit(false)
7475
db.Update(root, types.EmptyRootHash, trienode.NewWithNodeSet(nodes))
7576

@@ -86,7 +87,7 @@ func testTrieTracer(t *testing.T, vals []struct{ k, v string }) {
8687
for _, val := range vals {
8788
trie.MustDelete([]byte(val.k))
8889
}
89-
insertSet, deleteSet = copySet(trie.tracer.inserts), copySet(trie.tracer.deletes)
90+
insertSet, deleteSet = copySet(trie.opTracer.inserts), copySet(trie.opTracer.deletes)
9091
if !compareSet(insertSet, nil) {
9192
t.Fatal("Unexpected insertion set")
9293
}
@@ -97,13 +98,13 @@ func testTrieTracer(t *testing.T, vals []struct{ k, v string }) {
9798

9899
// Test that after inserting a new batch of nodes and deleting them immediately,
99100
// the trie tracer should be cleared normally as no operation happened.
100-
func TestTrieTracerNoop(t *testing.T) {
101-
testTrieTracerNoop(t, tiny)
102-
testTrieTracerNoop(t, nonAligned)
103-
testTrieTracerNoop(t, standard)
101+
func TestTrieOpTracerNoop(t *testing.T) {
102+
testTrieOpTracerNoop(t, tiny)
103+
testTrieOpTracerNoop(t, nonAligned)
104+
testTrieOpTracerNoop(t, standard)
104105
}
105106

106-
func testTrieTracerNoop(t *testing.T, vals []struct{ k, v string }) {
107+
func testTrieOpTracerNoop(t *testing.T, vals []struct{ k, v string }) {
107108
db := newTestDatabase(rawdb.NewMemoryDatabase(), rawdb.HashScheme)
108109
trie := NewEmpty(db)
109110
for _, val := range vals {
@@ -112,22 +113,22 @@ func testTrieTracerNoop(t *testing.T, vals []struct{ k, v string }) {
112113
for _, val := range vals {
113114
trie.MustDelete([]byte(val.k))
114115
}
115-
if len(trie.tracer.inserts) != 0 {
116+
if len(trie.opTracer.inserts) != 0 {
116117
t.Fatal("Unexpected insertion set")
117118
}
118-
if len(trie.tracer.deletes) != 0 {
119+
if len(trie.opTracer.deletes) != 0 {
119120
t.Fatal("Unexpected deletion set")
120121
}
121122
}
122123

123-
// Tests if the accessList is correctly tracked.
124-
func TestAccessList(t *testing.T) {
125-
testAccessList(t, tiny)
126-
testAccessList(t, nonAligned)
127-
testAccessList(t, standard)
124+
// Tests if the original value of trie nodes are correctly tracked.
125+
func TestPrevalueTracer(t *testing.T) {
126+
testPrevalueTracer(t, tiny)
127+
testPrevalueTracer(t, nonAligned)
128+
testPrevalueTracer(t, standard)
128129
}
129130

130-
func testAccessList(t *testing.T, vals []struct{ k, v string }) {
131+
func testPrevalueTracer(t *testing.T, vals []struct{ k, v string }) {
131132
var (
132133
db = newTestDatabase(rawdb.NewMemoryDatabase(), rawdb.HashScheme)
133134
trie = NewEmpty(db)
@@ -210,7 +211,7 @@ func testAccessList(t *testing.T, vals []struct{ k, v string }) {
210211
}
211212

212213
// Tests origin values won't be tracked in Iterator or Prover
213-
func TestAccessListLeak(t *testing.T) {
214+
func TestPrevalueTracerLeak(t *testing.T) {
214215
var (
215216
db = newTestDatabase(rawdb.NewMemoryDatabase(), rawdb.HashScheme)
216217
trie = NewEmpty(db)
@@ -249,9 +250,9 @@ func TestAccessListLeak(t *testing.T) {
249250
}
250251
for _, c := range cases {
251252
trie, _ = New(TrieID(root), db)
252-
n1 := len(trie.tracer.accessList)
253+
n1 := len(trie.prevalueTracer.data)
253254
c.op(trie)
254-
n2 := len(trie.tracer.accessList)
255+
n2 := len(trie.prevalueTracer.data)
255256

256257
if n1 != n2 {
257258
t.Fatalf("AccessList is leaked, prev %d after %d", n1, n2)

0 commit comments

Comments
 (0)