Skip to content

Commit 23da91f

Browse files
authored
trie: reduce the memory allocation in trie hashing (#31902)
This pull request optimizes trie hashing by reducing memory allocation overhead. Specifically: - define a fullNodeEncoder pool to reuse encoders and avoid memory allocations. - simplify the encoding logic for shortNode and fullNode by getting rid of the Go interfaces.
1 parent d4a3bf1 commit 23da91f

File tree

7 files changed

+121
-112
lines changed

7 files changed

+121
-112
lines changed

trie/hasher.go

Lines changed: 94 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717
package trie
1818

1919
import (
20+
"bytes"
21+
"fmt"
2022
"sync"
2123

2224
"github.com/ethereum/go-ethereum/crypto"
@@ -54,109 +56,118 @@ func returnHasherToPool(h *hasher) {
5456
}
5557

5658
// hash collapses a node down into a hash node.
57-
func (h *hasher) hash(n node, force bool) node {
59+
func (h *hasher) hash(n node, force bool) []byte {
5860
// Return the cached hash if it's available
5961
if hash, _ := n.cache(); hash != nil {
6062
return hash
6163
}
6264
// Trie not processed yet, walk the children
6365
switch n := n.(type) {
6466
case *shortNode:
65-
collapsed := h.hashShortNodeChildren(n)
66-
hashed := h.shortnodeToHash(collapsed, force)
67-
if hn, ok := hashed.(hashNode); ok {
68-
n.flags.hash = hn
69-
} else {
70-
n.flags.hash = nil
67+
enc := h.encodeShortNode(n)
68+
if len(enc) < 32 && !force {
69+
// Nodes smaller than 32 bytes are embedded directly in their parent.
70+
// In such cases, return the raw encoded blob instead of the node hash.
71+
// It's essential to deep-copy the node blob, as the underlying buffer
72+
// of enc will be reused later.
73+
buf := make([]byte, len(enc))
74+
copy(buf, enc)
75+
return buf
7176
}
72-
return hashed
77+
hash := h.hashData(enc)
78+
n.flags.hash = hash
79+
return hash
80+
7381
case *fullNode:
74-
collapsed := h.hashFullNodeChildren(n)
75-
hashed := h.fullnodeToHash(collapsed, force)
76-
if hn, ok := hashed.(hashNode); ok {
77-
n.flags.hash = hn
78-
} else {
79-
n.flags.hash = nil
82+
enc := h.encodeFullNode(n)
83+
if len(enc) < 32 && !force {
84+
// Nodes smaller than 32 bytes are embedded directly in their parent.
85+
// In such cases, return the raw encoded blob instead of the node hash.
86+
// It's essential to deep-copy the node blob, as the underlying buffer
87+
// of enc will be reused later.
88+
buf := make([]byte, len(enc))
89+
copy(buf, enc)
90+
return buf
8091
}
81-
return hashed
82-
default:
83-
// Value and hash nodes don't have children, so they're left as were
92+
hash := h.hashData(enc)
93+
n.flags.hash = hash
94+
return hash
95+
96+
case hashNode:
97+
// hash nodes don't have children, so they're left as were
8498
return n
99+
100+
default:
101+
panic(fmt.Errorf("unexpected node type, %T", n))
85102
}
86103
}
87104

88-
// hashShortNodeChildren returns a copy of the supplied shortNode, with its child
89-
// being replaced by either the hash or an embedded node if the child is small.
90-
func (h *hasher) hashShortNodeChildren(n *shortNode) *shortNode {
91-
var collapsed shortNode
92-
collapsed.Key = hexToCompact(n.Key)
93-
switch n.Val.(type) {
94-
case *fullNode, *shortNode:
95-
collapsed.Val = h.hash(n.Val, false)
96-
default:
97-
collapsed.Val = n.Val
105+
// encodeShortNode encodes the provided shortNode into the bytes. Notably, the
106+
// return slice must be deep-copied explicitly, otherwise the underlying slice
107+
// will be reused later.
108+
func (h *hasher) encodeShortNode(n *shortNode) []byte {
109+
// Encode leaf node
110+
if hasTerm(n.Key) {
111+
var ln leafNodeEncoder
112+
ln.Key = hexToCompact(n.Key)
113+
ln.Val = n.Val.(valueNode)
114+
ln.encode(h.encbuf)
115+
return h.encodedBytes()
98116
}
99-
return &collapsed
117+
// Encode extension node
118+
var en extNodeEncoder
119+
en.Key = hexToCompact(n.Key)
120+
en.Val = h.hash(n.Val, false)
121+
en.encode(h.encbuf)
122+
return h.encodedBytes()
123+
}
124+
125+
// fnEncoderPool is the pool for storing shared fullNode encoder to mitigate
126+
// the significant memory allocation overhead.
127+
var fnEncoderPool = sync.Pool{
128+
New: func() interface{} {
129+
var enc fullnodeEncoder
130+
return &enc
131+
},
100132
}
101133

102-
// hashFullNodeChildren returns a copy of the supplied fullNode, with its child
103-
// being replaced by either the hash or an embedded node if the child is small.
104-
func (h *hasher) hashFullNodeChildren(n *fullNode) *fullNode {
105-
var children [17]node
134+
// encodeFullNode encodes the provided fullNode into the bytes. Notably, the
135+
// return slice must be deep-copied explicitly, otherwise the underlying slice
136+
// will be reused later.
137+
func (h *hasher) encodeFullNode(n *fullNode) []byte {
138+
fn := fnEncoderPool.Get().(*fullnodeEncoder)
139+
fn.reset()
140+
106141
if h.parallel {
107142
var wg sync.WaitGroup
108143
for i := 0; i < 16; i++ {
109-
if child := n.Children[i]; child != nil {
110-
wg.Add(1)
111-
go func(i int) {
112-
hasher := newHasher(false)
113-
children[i] = hasher.hash(child, false)
114-
returnHasherToPool(hasher)
115-
wg.Done()
116-
}(i)
117-
} else {
118-
children[i] = nilValueNode
144+
if n.Children[i] == nil {
145+
continue
119146
}
147+
wg.Add(1)
148+
go func(i int) {
149+
defer wg.Done()
150+
151+
h := newHasher(false)
152+
fn.Children[i] = h.hash(n.Children[i], false)
153+
returnHasherToPool(h)
154+
}(i)
120155
}
121156
wg.Wait()
122157
} else {
123158
for i := 0; i < 16; i++ {
124159
if child := n.Children[i]; child != nil {
125-
children[i] = h.hash(child, false)
126-
} else {
127-
children[i] = nilValueNode
160+
fn.Children[i] = h.hash(child, false)
128161
}
129162
}
130163
}
131164
if n.Children[16] != nil {
132-
children[16] = n.Children[16]
133-
}
134-
return &fullNode{flags: nodeFlag{}, Children: children}
135-
}
136-
137-
// shortNodeToHash computes the hash of the given shortNode. The shortNode must
138-
// first be collapsed, with its key converted to compact form. If the RLP-encoded
139-
// node data is smaller than 32 bytes, the node itself is returned.
140-
func (h *hasher) shortnodeToHash(n *shortNode, force bool) node {
141-
n.encode(h.encbuf)
142-
enc := h.encodedBytes()
143-
144-
if len(enc) < 32 && !force {
145-
return n // Nodes smaller than 32 bytes are stored inside their parent
165+
fn.Children[16] = n.Children[16].(valueNode)
146166
}
147-
return h.hashData(enc)
148-
}
149-
150-
// fullnodeToHash computes the hash of the given fullNode. If the RLP-encoded
151-
// node data is smaller than 32 bytes, the node itself is returned.
152-
func (h *hasher) fullnodeToHash(n *fullNode, force bool) node {
153-
n.encode(h.encbuf)
154-
enc := h.encodedBytes()
167+
fn.encode(h.encbuf)
168+
fnEncoderPool.Put(fn)
155169

156-
if len(enc) < 32 && !force {
157-
return n // Nodes smaller than 32 bytes are stored inside their parent
158-
}
159-
return h.hashData(enc)
170+
return h.encodedBytes()
160171
}
161172

162173
// encodedBytes returns the result of the last encoding operation on h.encbuf.
@@ -175,9 +186,10 @@ func (h *hasher) encodedBytes() []byte {
175186
return h.tmp
176187
}
177188

178-
// hashData hashes the provided data
179-
func (h *hasher) hashData(data []byte) hashNode {
180-
n := make(hashNode, 32)
189+
// hashData hashes the provided data. It is safe to modify the returned slice after
190+
// the function returns.
191+
func (h *hasher) hashData(data []byte) []byte {
192+
n := make([]byte, 32)
181193
h.sha.Reset()
182194
h.sha.Write(data)
183195
h.sha.Read(n)
@@ -192,20 +204,17 @@ func (h *hasher) hashDataTo(dst, data []byte) {
192204
h.sha.Read(dst)
193205
}
194206

195-
// proofHash is used to construct trie proofs, and returns the 'collapsed'
196-
// node (for later RLP encoding) as well as the hashed node -- unless the
197-
// node is smaller than 32 bytes, in which case it will be returned as is.
198-
// This method does not do anything on value- or hash-nodes.
199-
func (h *hasher) proofHash(original node) (collapsed, hashed node) {
207+
// proofHash is used to construct trie proofs, returning the rlp-encoded node blobs.
208+
// Note, only resolved node (shortNode or fullNode) is expected for proofing.
209+
//
210+
// It is safe to modify the returned slice after the function returns.
211+
func (h *hasher) proofHash(original node) []byte {
200212
switch n := original.(type) {
201213
case *shortNode:
202-
sn := h.hashShortNodeChildren(n)
203-
return sn, h.shortnodeToHash(sn, false)
214+
return bytes.Clone(h.encodeShortNode(n))
204215
case *fullNode:
205-
fn := h.hashFullNodeChildren(n)
206-
return fn, h.fullnodeToHash(fn, false)
216+
return bytes.Clone(h.encodeFullNode(n))
207217
default:
208-
// Value and hash nodes don't have children, so they're left as were
209-
return n, n
218+
panic(fmt.Errorf("unexpected node type, %T", original))
210219
}
211220
}

trie/iterator.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -240,9 +240,9 @@ func (it *nodeIterator) LeafProof() [][]byte {
240240

241241
for i, item := range it.stack[:len(it.stack)-1] {
242242
// Gather nodes that end up as hash nodes (or the root)
243-
node, hashed := hasher.proofHash(item.node)
244-
if _, ok := hashed.(hashNode); ok || i == 0 {
245-
proofs = append(proofs, nodeToBytes(node))
243+
enc := hasher.proofHash(item.node)
244+
if len(enc) >= 32 || i == 0 {
245+
proofs = append(proofs, enc)
246246
}
247247
}
248248
return proofs

trie/node.go

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -68,10 +68,6 @@ type (
6868
}
6969
)
7070

71-
// nilValueNode is used when collapsing internal trie nodes for hashing, since
72-
// unset children need to serialize correctly.
73-
var nilValueNode = valueNode(nil)
74-
7571
// EncodeRLP encodes a full node into the consensus RLP format.
7672
func (n *fullNode) EncodeRLP(w io.Writer) error {
7773
eb := rlp.NewEncoderBuffer(w)

trie/node_enc.go

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -42,18 +42,29 @@ func (n *fullNode) encode(w rlp.EncoderBuffer) {
4242

4343
func (n *fullnodeEncoder) encode(w rlp.EncoderBuffer) {
4444
offset := w.List()
45-
for _, c := range n.Children {
46-
if c == nil {
45+
for i, c := range n.Children {
46+
if len(c) == 0 {
4747
w.Write(rlp.EmptyString)
48-
} else if len(c) < 32 {
49-
w.Write(c) // rawNode
5048
} else {
51-
w.WriteBytes(c) // hashNode
49+
// valueNode or hashNode
50+
if i == 16 || len(c) >= 32 {
51+
w.WriteBytes(c)
52+
} else {
53+
w.Write(c) // rawNode
54+
}
5255
}
5356
}
5457
w.ListEnd(offset)
5558
}
5659

60+
func (n *fullnodeEncoder) reset() {
61+
for i, c := range n.Children {
62+
if len(c) != 0 {
63+
n.Children[i] = n.Children[i][:0]
64+
}
65+
}
66+
}
67+
5768
func (n *shortNode) encode(w rlp.EncoderBuffer) {
5869
offset := w.List()
5970
w.WriteBytes(n.Key)
@@ -70,7 +81,7 @@ func (n *extNodeEncoder) encode(w rlp.EncoderBuffer) {
7081
w.WriteBytes(n.Key)
7182

7283
if n.Val == nil {
73-
w.Write(rlp.EmptyString)
84+
w.Write(rlp.EmptyString) // theoretically impossible to happen
7485
} else if len(n.Val) < 32 {
7586
w.Write(n.Val) // rawNode
7687
} else {

trie/proof.go

Lines changed: 4 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ import (
2222
"fmt"
2323

2424
"github.com/ethereum/go-ethereum/common"
25+
"github.com/ethereum/go-ethereum/crypto"
2526
"github.com/ethereum/go-ethereum/ethdb"
2627
"github.com/ethereum/go-ethereum/log"
2728
)
@@ -85,16 +86,9 @@ func (t *Trie) Prove(key []byte, proofDb ethdb.KeyValueWriter) error {
8586
defer returnHasherToPool(hasher)
8687

8788
for i, n := range nodes {
88-
var hn node
89-
n, hn = hasher.proofHash(n)
90-
if hash, ok := hn.(hashNode); ok || i == 0 {
91-
// If the node's database encoding is a hash (or is the
92-
// root node), it becomes a proof element.
93-
enc := nodeToBytes(n)
94-
if !ok {
95-
hash = hasher.hashData(enc)
96-
}
97-
proofDb.Put(hash, enc)
89+
enc := hasher.proofHash(n)
90+
if len(enc) >= 32 || i == 0 {
91+
proofDb.Put(crypto.Keccak256(enc), enc)
9892
}
9993
}
10094
return nil

trie/trie.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -626,7 +626,7 @@ func (t *Trie) resolveAndTrack(n hashNode, prefix []byte) (node, error) {
626626
// Hash returns the root hash of the trie. It does not write to the
627627
// database and can be used even if the trie doesn't have one.
628628
func (t *Trie) Hash() common.Hash {
629-
return common.BytesToHash(t.hashRoot().(hashNode))
629+
return common.BytesToHash(t.hashRoot())
630630
}
631631

632632
// Commit collects all dirty nodes in the trie and replaces them with the
@@ -677,9 +677,9 @@ func (t *Trie) Commit(collectLeaf bool) (common.Hash, *trienode.NodeSet) {
677677
}
678678

679679
// hashRoot calculates the root hash of the given trie
680-
func (t *Trie) hashRoot() node {
680+
func (t *Trie) hashRoot() []byte {
681681
if t.root == nil {
682-
return hashNode(types.EmptyRootHash.Bytes())
682+
return types.EmptyRootHash.Bytes()
683683
}
684684
// If the number of changes is below 100, we let one thread handle it
685685
h := newHasher(t.unhashed >= 100)

trie/trie_test.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -863,7 +863,6 @@ func (s *spongeDb) Flush() {
863863
s.sponge.Write([]byte(key))
864864
s.sponge.Write([]byte(s.values[key]))
865865
}
866-
fmt.Println(len(s.keys))
867866
}
868867

869868
// spongeBatch is a dummy batch which immediately writes to the underlying spongedb

0 commit comments

Comments
 (0)