Skip to content

Commit 902ec5b

Browse files
authored
cmd, core, eth, triedb/pathdb: track node origins in the path database (ethereum#32418)
This PR is the first step in the trienode history series. It introduces the `nodeWithOrigin` struct in the path database, which tracks the original values of dirty nodes to support trienode history construction. Note, the original value is always empty in this PR, so it won't break the existing journal for encoding and decoding. The compatibility of journal should be handled in the following PR.
1 parent f5fcfb2 commit 902ec5b

File tree

15 files changed

+648
-210
lines changed

15 files changed

+648
-210
lines changed

core/blockchain.go

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -168,10 +168,13 @@ type BlockChainConfig struct {
168168
TrieNoAsyncFlush bool // Whether the asynchronous buffer flushing is disallowed
169169
TrieJournalDirectory string // Directory path to the journal used for persisting trie data across node restarts
170170

171-
Preimages bool // Whether to store preimage of trie key to the disk
172-
StateHistory uint64 // Number of blocks from head whose state histories are reserved.
173-
StateScheme string // Scheme used to store ethereum states and merkle tree nodes on top
174-
ArchiveMode bool // Whether to enable the archive mode
171+
Preimages bool // Whether to store preimage of trie key to the disk
172+
StateScheme string // Scheme used to store ethereum states and merkle tree nodes on top
173+
ArchiveMode bool // Whether to enable the archive mode
174+
175+
// Number of blocks from the chain head for which state histories are retained.
176+
// If set to 0, all state histories across the entire chain will be retained;
177+
StateHistory uint64
175178

176179
// State snapshot related options
177180
SnapshotLimit int // Memory allowance (MB) to use for caching snapshot entries in memory

trie/trienode/node.go

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -259,11 +259,24 @@ func (set *MergedNodeSet) Merge(other *NodeSet) error {
259259
return nil
260260
}
261261

262-
// Flatten returns a two-dimensional map for internal nodes.
263-
func (set *MergedNodeSet) Flatten() map[common.Hash]map[string]*Node {
262+
// Nodes returns a two-dimensional map for internal nodes.
263+
func (set *MergedNodeSet) Nodes() map[common.Hash]map[string]*Node {
264264
nodes := make(map[common.Hash]map[string]*Node, len(set.Sets))
265265
for owner, set := range set.Sets {
266266
nodes[owner] = set.Nodes
267267
}
268268
return nodes
269269
}
270+
271+
// NodeAndOrigins returns a two-dimensional map for internal nodes along with
272+
// their original values.
273+
func (set *MergedNodeSet) NodeAndOrigins() (map[common.Hash]map[string]*Node, map[common.Hash]map[string][]byte) {
274+
var (
275+
nodes = make(map[common.Hash]map[string]*Node, len(set.Sets))
276+
origins = make(map[common.Hash]map[string][]byte, len(set.Sets))
277+
)
278+
for owner, set := range set.Sets {
279+
nodes[owner], origins[owner] = set.Nodes, set.Origins
280+
}
281+
return nodes, origins
282+
}

triedb/pathdb/config.go

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
// Copyright 2025 The go-ethereum Authors
2+
// This file is part of the go-ethereum library.
3+
//
4+
// The go-ethereum library is free software: you can redistribute it and/or modify
5+
// it under the terms of the GNU Lesser General Public License as published by
6+
// the Free Software Foundation, either version 3 of the License, or
7+
// (at your option) any later version.
8+
//
9+
// The go-ethereum library is distributed in the hope that it will be useful,
10+
// but WITHOUT ANY WARRANTY; without even the implied warranty of
11+
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12+
// GNU Lesser General Public License for more details.
13+
//
14+
// You should have received a copy of the GNU Lesser General Public License
15+
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
16+
17+
package pathdb
18+
19+
import (
20+
"fmt"
21+
22+
"github.com/ethereum/go-ethereum/common"
23+
"github.com/ethereum/go-ethereum/log"
24+
"github.com/ethereum/go-ethereum/params"
25+
)
26+
27+
const (
28+
// defaultTrieCleanSize is the default memory allowance of clean trie cache.
29+
defaultTrieCleanSize = 16 * 1024 * 1024
30+
31+
// defaultStateCleanSize is the default memory allowance of clean state cache.
32+
defaultStateCleanSize = 16 * 1024 * 1024
33+
34+
// maxBufferSize is the maximum memory allowance of node buffer.
35+
// Too large buffer will cause the system to pause for a long
36+
// time when write happens. Also, the largest batch that pebble can
37+
// support is 4GB, node will panic if batch size exceeds this limit.
38+
maxBufferSize = 256 * 1024 * 1024
39+
40+
// defaultBufferSize is the default memory allowance of node buffer
41+
// that aggregates the writes from above until it's flushed into the
42+
// disk. It's meant to be used once the initial sync is finished.
43+
// Do not increase the buffer size arbitrarily, otherwise the system
44+
// pause time will increase when the database writes happen.
45+
defaultBufferSize = 64 * 1024 * 1024
46+
)
47+
48+
var (
49+
// maxDiffLayers is the maximum diff layers allowed in the layer tree.
50+
maxDiffLayers = 128
51+
)
52+
53+
// Defaults contains default settings for Ethereum mainnet.
54+
var Defaults = &Config{
55+
StateHistory: params.FullImmutabilityThreshold,
56+
EnableStateIndexing: false,
57+
TrieCleanSize: defaultTrieCleanSize,
58+
StateCleanSize: defaultStateCleanSize,
59+
WriteBufferSize: defaultBufferSize,
60+
}
61+
62+
// ReadOnly is the config in order to open database in read only mode.
63+
var ReadOnly = &Config{
64+
ReadOnly: true,
65+
TrieCleanSize: defaultTrieCleanSize,
66+
StateCleanSize: defaultStateCleanSize,
67+
}
68+
69+
// Config contains the settings for database.
70+
type Config struct {
71+
StateHistory uint64 // Number of recent blocks to maintain state history for, 0: full chain
72+
EnableStateIndexing bool // Whether to enable state history indexing for external state access
73+
TrieCleanSize int // Maximum memory allowance (in bytes) for caching clean trie data
74+
StateCleanSize int // Maximum memory allowance (in bytes) for caching clean state data
75+
WriteBufferSize int // Maximum memory allowance (in bytes) for write buffer
76+
ReadOnly bool // Flag whether the database is opened in read only mode
77+
JournalDirectory string // Absolute path of journal directory (null means the journal data is persisted in key-value store)
78+
79+
// Testing configurations
80+
SnapshotNoBuild bool // Flag Whether the state generation is disabled
81+
NoAsyncFlush bool // Flag whether the background buffer flushing is disabled
82+
NoAsyncGeneration bool // Flag whether the background generation is disabled
83+
}
84+
85+
// sanitize checks the provided user configurations and changes anything that's
86+
// unreasonable or unworkable.
87+
func (c *Config) sanitize() *Config {
88+
conf := *c
89+
if conf.WriteBufferSize > maxBufferSize {
90+
log.Warn("Sanitizing invalid node buffer size", "provided", common.StorageSize(conf.WriteBufferSize), "updated", common.StorageSize(maxBufferSize))
91+
conf.WriteBufferSize = maxBufferSize
92+
}
93+
return &conf
94+
}
95+
96+
// fields returns a list of attributes of config for printing.
97+
func (c *Config) fields() []interface{} {
98+
var list []interface{}
99+
if c.ReadOnly {
100+
list = append(list, "readonly", true)
101+
}
102+
list = append(list, "triecache", common.StorageSize(c.TrieCleanSize))
103+
list = append(list, "statecache", common.StorageSize(c.StateCleanSize))
104+
list = append(list, "buffer", common.StorageSize(c.WriteBufferSize))
105+
106+
if c.StateHistory == 0 {
107+
list = append(list, "state-history", "entire chain")
108+
} else {
109+
list = append(list, "state-history", fmt.Sprintf("last %d blocks", c.StateHistory))
110+
}
111+
if c.EnableStateIndexing {
112+
list = append(list, "index-history", true)
113+
}
114+
if c.JournalDirectory != "" {
115+
list = append(list, "journal-dir", c.JournalDirectory)
116+
}
117+
return list
118+
}

triedb/pathdb/database.go

Lines changed: 3 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -31,37 +31,10 @@ import (
3131
"github.com/ethereum/go-ethereum/crypto"
3232
"github.com/ethereum/go-ethereum/ethdb"
3333
"github.com/ethereum/go-ethereum/log"
34-
"github.com/ethereum/go-ethereum/params"
3534
"github.com/ethereum/go-ethereum/trie/trienode"
3635
"github.com/ethereum/go-verkle"
3736
)
3837

39-
const (
40-
// defaultTrieCleanSize is the default memory allowance of clean trie cache.
41-
defaultTrieCleanSize = 16 * 1024 * 1024
42-
43-
// defaultStateCleanSize is the default memory allowance of clean state cache.
44-
defaultStateCleanSize = 16 * 1024 * 1024
45-
46-
// maxBufferSize is the maximum memory allowance of node buffer.
47-
// Too large buffer will cause the system to pause for a long
48-
// time when write happens. Also, the largest batch that pebble can
49-
// support is 4GB, node will panic if batch size exceeds this limit.
50-
maxBufferSize = 256 * 1024 * 1024
51-
52-
// defaultBufferSize is the default memory allowance of node buffer
53-
// that aggregates the writes from above until it's flushed into the
54-
// disk. It's meant to be used once the initial sync is finished.
55-
// Do not increase the buffer size arbitrarily, otherwise the system
56-
// pause time will increase when the database writes happen.
57-
defaultBufferSize = 64 * 1024 * 1024
58-
)
59-
60-
var (
61-
// maxDiffLayers is the maximum diff layers allowed in the layer tree.
62-
maxDiffLayers = 128
63-
)
64-
6538
// layer is the interface implemented by all state layers which includes some
6639
// public methods and some additional methods for internal usage.
6740
type layer interface {
@@ -105,76 +78,14 @@ type layer interface {
10578
// the provided dirty trie nodes along with the state change set.
10679
//
10780
// Note, the maps are retained by the method to avoid copying everything.
108-
update(root common.Hash, id uint64, block uint64, nodes *nodeSet, states *StateSetWithOrigin) *diffLayer
81+
update(root common.Hash, id uint64, block uint64, nodes *nodeSetWithOrigin, states *StateSetWithOrigin) *diffLayer
10982

11083
// journal commits an entire diff hierarchy to disk into a single journal entry.
11184
// This is meant to be used during shutdown to persist the layer without
11285
// flattening everything down (bad for reorgs).
11386
journal(w io.Writer) error
11487
}
11588

116-
// Config contains the settings for database.
117-
type Config struct {
118-
StateHistory uint64 // Number of recent blocks to maintain state history for
119-
EnableStateIndexing bool // Whether to enable state history indexing for external state access
120-
TrieCleanSize int // Maximum memory allowance (in bytes) for caching clean trie nodes
121-
StateCleanSize int // Maximum memory allowance (in bytes) for caching clean state data
122-
WriteBufferSize int // Maximum memory allowance (in bytes) for write buffer
123-
ReadOnly bool // Flag whether the database is opened in read only mode
124-
JournalDirectory string // Absolute path of journal directory (null means the journal data is persisted in key-value store)
125-
126-
// Testing configurations
127-
SnapshotNoBuild bool // Flag Whether the state generation is allowed
128-
NoAsyncFlush bool // Flag whether the background buffer flushing is allowed
129-
NoAsyncGeneration bool // Flag whether the background generation is allowed
130-
}
131-
132-
// sanitize checks the provided user configurations and changes anything that's
133-
// unreasonable or unworkable.
134-
func (c *Config) sanitize() *Config {
135-
conf := *c
136-
if conf.WriteBufferSize > maxBufferSize {
137-
log.Warn("Sanitizing invalid node buffer size", "provided", common.StorageSize(conf.WriteBufferSize), "updated", common.StorageSize(maxBufferSize))
138-
conf.WriteBufferSize = maxBufferSize
139-
}
140-
return &conf
141-
}
142-
143-
// fields returns a list of attributes of config for printing.
144-
func (c *Config) fields() []interface{} {
145-
var list []interface{}
146-
if c.ReadOnly {
147-
list = append(list, "readonly", true)
148-
}
149-
if c.SnapshotNoBuild {
150-
list = append(list, "snapshot", false)
151-
}
152-
list = append(list, "triecache", common.StorageSize(c.TrieCleanSize))
153-
list = append(list, "statecache", common.StorageSize(c.StateCleanSize))
154-
list = append(list, "buffer", common.StorageSize(c.WriteBufferSize))
155-
156-
if c.StateHistory == 0 {
157-
list = append(list, "history", "entire chain")
158-
} else {
159-
list = append(list, "history", fmt.Sprintf("last %d blocks", c.StateHistory))
160-
}
161-
if c.JournalDirectory != "" {
162-
list = append(list, "journal-dir", c.JournalDirectory)
163-
}
164-
return list
165-
}
166-
167-
// Defaults contains default settings for Ethereum mainnet.
168-
var Defaults = &Config{
169-
StateHistory: params.FullImmutabilityThreshold,
170-
TrieCleanSize: defaultTrieCleanSize,
171-
StateCleanSize: defaultStateCleanSize,
172-
WriteBufferSize: defaultBufferSize,
173-
}
174-
175-
// ReadOnly is the config in order to open database in read only mode.
176-
var ReadOnly = &Config{ReadOnly: true}
177-
17889
// nodeHasher is the function to compute the hash of supplied node blob.
17990
type nodeHasher func([]byte) (common.Hash, error)
18091

@@ -422,7 +333,8 @@ func (db *Database) Update(root common.Hash, parentRoot common.Hash, block uint6
422333
if err := db.modifyAllowed(); err != nil {
423334
return err
424335
}
425-
if err := db.tree.add(root, parentRoot, block, nodes, states); err != nil {
336+
// TODO(rjl493456442) tracking the origins in the following PRs.
337+
if err := db.tree.add(root, parentRoot, block, NewNodeSetWithOrigin(nodes.Nodes(), nil), states); err != nil {
426338
return err
427339
}
428340
// Keep 128 diff layers in the memory, persistent layer is 129th.

0 commit comments

Comments
 (0)