Skip to content

Commit 6eec3a9

Browse files
holimanrjl493456442
authored andcommitted
trie: reduce allocations in stacktrie (#30743)
This PR uses various tweaks and tricks to make the stacktrie near alloc-free. ``` [user@work go-ethereum]$ benchstat stacktrie.1 stacktrie.7 goos: linux goarch: amd64 pkg: github.com/ethereum/go-ethereum/trie cpu: 12th Gen Intel(R) Core(TM) i7-1270P │ stacktrie.1 │ stacktrie.7 │ │ sec/op │ sec/op vs base │ Insert100K-8 106.97m ± 8% 88.21m ± 34% -17.54% (p=0.000 n=10) │ stacktrie.1 │ stacktrie.7 │ │ B/op │ B/op vs base │ Insert100K-8 13199.608Ki ± 0% 3.424Ki ± 3% -99.97% (p=0.000 n=10) │ stacktrie.1 │ stacktrie.7 │ │ allocs/op │ allocs/op vs base │ Insert100K-8 553428.50 ± 0% 22.00 ± 5% -100.00% (p=0.000 n=10) ``` Also improves derivesha: ``` goos: linux goarch: amd64 pkg: github.com/ethereum/go-ethereum/core/types cpu: 12th Gen Intel(R) Core(TM) i7-1270P │ derivesha.1 │ derivesha.2 │ │ sec/op │ sec/op vs base │ DeriveSha200/stack_trie-8 477.8µ ± 2% 430.0µ ± 12% -10.00% (p=0.000 n=10) │ derivesha.1 │ derivesha.2 │ │ B/op │ B/op vs base │ DeriveSha200/stack_trie-8 45.17Ki ± 0% 25.65Ki ± 0% -43.21% (p=0.000 n=10) │ derivesha.1 │ derivesha.2 │ │ allocs/op │ allocs/op vs base │ DeriveSha200/stack_trie-8 1259.0 ± 0% 232.0 ± 0% -81.57% (p=0.000 n=10) ``` --------- Co-authored-by: Gary Rong <[email protected]>
1 parent 3971ed6 commit 6eec3a9

File tree

7 files changed

+238
-39
lines changed

7 files changed

+238
-39
lines changed

trie/bytepool.go

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
// Copyright 2024 The go-ethereum Authors
2+
// This file is part of the go-ethereum library.
3+
//
4+
// The go-ethereum library is free software: you can redistribute it and/or modify
5+
// it under the terms of the GNU Lesser General Public License as published by
6+
// the Free Software Foundation, either version 3 of the License, or
7+
// (at your option) any later version.
8+
//
9+
// The go-ethereum library is distributed in the hope that it will be useful,
10+
// but WITHOUT ANY WARRANTY; without even the implied warranty of
11+
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12+
// GNU Lesser General Public License for more details.
13+
//
14+
// You should have received a copy of the GNU Lesser General Public License
15+
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
16+
17+
package trie
18+
19+
// bytesPool is a pool for byte slices. It is safe for concurrent use.
20+
type bytesPool struct {
21+
c chan []byte
22+
w int
23+
}
24+
25+
// newBytesPool creates a new bytesPool. The sliceCap sets the capacity of
26+
// newly allocated slices, and the nitems determines how many items the pool
27+
// will hold, at maximum.
28+
func newBytesPool(sliceCap, nitems int) *bytesPool {
29+
return &bytesPool{
30+
c: make(chan []byte, nitems),
31+
w: sliceCap,
32+
}
33+
}
34+
35+
// Get returns a slice. Safe for concurrent use.
36+
func (bp *bytesPool) Get() []byte {
37+
select {
38+
case b := <-bp.c:
39+
return b
40+
default:
41+
return make([]byte, 0, bp.w)
42+
}
43+
}
44+
45+
// GetWithSize returns a slice with specified byte slice size.
46+
func (bp *bytesPool) GetWithSize(s int) []byte {
47+
b := bp.Get()
48+
if cap(b) < s {
49+
return make([]byte, s)
50+
}
51+
return b[:s]
52+
}
53+
54+
// Put returns a slice to the pool. Safe for concurrent use. This method
55+
// will ignore slices that are too small or too large (>3x the cap)
56+
func (bp *bytesPool) Put(b []byte) {
57+
if c := cap(b); c < bp.w || c > 3*bp.w {
58+
return
59+
}
60+
select {
61+
case bp.c <- b:
62+
default:
63+
}
64+
}

trie/encoding.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,18 @@ func keybytesToHex(str []byte) []byte {
104104
return nibbles
105105
}
106106

107+
// writeHexKey writes the hexkey into the given slice.
108+
// OBS! This method omits the termination flag.
109+
// OBS! The dst slice must be at least 2x as large as the key
110+
func writeHexKey(dst []byte, key []byte) []byte {
111+
_ = dst[2*len(key)-1]
112+
for i, b := range key {
113+
dst[i*2] = b / 16
114+
dst[i*2+1] = b % 16
115+
}
116+
return dst[:2*len(key)]
117+
}
118+
107119
// hexToKeybytes turns hex nibbles into key bytes.
108120
// This can only be used for keys of even length.
109121
func hexToKeybytes(hex []byte) []byte {

trie/hasher.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,14 @@ func (h *hasher) hashData(data []byte) hashNode {
188188
return n
189189
}
190190

191+
// hashDataTo hashes the provided data to the given destination buffer. The caller
192+
// must ensure that the dst buffer is of appropriate size.
193+
func (h *hasher) hashDataTo(dst, data []byte) {
194+
h.sha.Reset()
195+
h.sha.Write(data)
196+
h.sha.Read(dst)
197+
}
198+
191199
// proofHash is used to construct trie proofs, and returns the 'collapsed'
192200
// node (for later RLP encoding) as well as the hashed node -- unless the
193201
// node is smaller than 32 bytes, in which case it will be returned as is.

trie/node.go

Lines changed: 22 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,27 @@ type (
4545
}
4646
hashNode []byte
4747
valueNode []byte
48+
49+
// fullnodeEncoder is a type used exclusively for encoding fullNode.
50+
// Briefly instantiating a fullnodeEncoder and initializing with
51+
// existing slices is less memory intense than using the fullNode type.
52+
fullnodeEncoder struct {
53+
Children [17][]byte
54+
}
55+
56+
// extNodeEncoder is a type used exclusively for encoding extension node.
57+
// Briefly instantiating a extNodeEncoder and initializing with existing
58+
// slices is less memory intense than using the shortNode type.
59+
extNodeEncoder struct {
60+
Key []byte
61+
Val []byte
62+
}
63+
64+
// leafNodeEncoder is a type used exclusively for encoding leaf node.
65+
leafNodeEncoder struct {
66+
Key []byte
67+
Val []byte
68+
}
4869
)
4970

5071
// nilValueNode is used when collapsing internal trie nodes for hashing, since
@@ -89,6 +110,7 @@ func (n *fullNode) fstring(ind string) string {
89110
}
90111
return resp + fmt.Sprintf("\n%s] ", ind)
91112
}
113+
92114
func (n *shortNode) fstring(ind string) string {
93115
return fmt.Sprintf("{%x: %v} ", n.Key, n.Val.fstring(ind+" "))
94116
}
@@ -99,19 +121,6 @@ func (n valueNode) fstring(ind string) string {
99121
return fmt.Sprintf("%x ", []byte(n))
100122
}
101123

102-
// rawNode is a simple binary blob used to differentiate between collapsed trie
103-
// nodes and already encoded RLP binary blobs (while at the same time store them
104-
// in the same cache fields).
105-
type rawNode []byte
106-
107-
func (n rawNode) cache() (hashNode, bool) { panic("this should never end up in a live trie") }
108-
func (n rawNode) fstring(ind string) string { panic("this should never end up in a live trie") }
109-
110-
func (n rawNode) EncodeRLP(w io.Writer) error {
111-
_, err := w.Write(n)
112-
return err
113-
}
114-
115124
// mustDecodeNode is a wrapper of decodeNode and panic if any error is encountered.
116125
func mustDecodeNode(hash, buf []byte) node {
117126
n, err := decodeNode(hash, buf)

trie/node_enc.go

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,20 @@ func (n *fullNode) encode(w rlp.EncoderBuffer) {
4040
w.ListEnd(offset)
4141
}
4242

43+
func (n *fullnodeEncoder) encode(w rlp.EncoderBuffer) {
44+
offset := w.List()
45+
for _, c := range n.Children {
46+
if c == nil {
47+
w.Write(rlp.EmptyString)
48+
} else if len(c) < 32 {
49+
w.Write(c) // rawNode
50+
} else {
51+
w.WriteBytes(c) // hashNode
52+
}
53+
}
54+
w.ListEnd(offset)
55+
}
56+
4357
func (n *shortNode) encode(w rlp.EncoderBuffer) {
4458
offset := w.List()
4559
w.WriteBytes(n.Key)
@@ -51,14 +65,31 @@ func (n *shortNode) encode(w rlp.EncoderBuffer) {
5165
w.ListEnd(offset)
5266
}
5367

68+
func (n *extNodeEncoder) encode(w rlp.EncoderBuffer) {
69+
offset := w.List()
70+
w.WriteBytes(n.Key)
71+
72+
if n.Val == nil {
73+
w.Write(rlp.EmptyString)
74+
} else if len(n.Val) < 32 {
75+
w.Write(n.Val) // rawNode
76+
} else {
77+
w.WriteBytes(n.Val) // hashNode
78+
}
79+
w.ListEnd(offset)
80+
}
81+
82+
func (n *leafNodeEncoder) encode(w rlp.EncoderBuffer) {
83+
offset := w.List()
84+
w.WriteBytes(n.Key) // Compact format key
85+
w.WriteBytes(n.Val) // Value node, must be non-nil
86+
w.ListEnd(offset)
87+
}
88+
5489
func (n hashNode) encode(w rlp.EncoderBuffer) {
5590
w.WriteBytes(n)
5691
}
5792

5893
func (n valueNode) encode(w rlp.EncoderBuffer) {
5994
w.WriteBytes(n)
6095
}
61-
62-
func (n rawNode) encode(w rlp.EncoderBuffer) {
63-
w.Write(n)
64-
}

trie/stacktrie.go

Lines changed: 51 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727

2828
var (
2929
stPool = sync.Pool{New: func() any { return new(stNode) }}
30+
bPool = newBytesPool(32, 100)
3031
_ = types.TrieHasher((*StackTrie)(nil))
3132
)
3233

@@ -47,6 +48,8 @@ type StackTrie struct {
4748
h *hasher
4849
last []byte
4950
onTrieNode OnTrieNode
51+
kBuf []byte // buf space used for hex-key during insertions
52+
pBuf []byte // buf space used for path during insertions
5053
}
5154

5255
// NewStackTrie allocates and initializes an empty trie. The committed nodes
@@ -56,6 +59,17 @@ func NewStackTrie(onTrieNode OnTrieNode) *StackTrie {
5659
root: stPool.Get().(*stNode),
5760
h: newHasher(false),
5861
onTrieNode: onTrieNode,
62+
kBuf: make([]byte, 64),
63+
pBuf: make([]byte, 64),
64+
}
65+
}
66+
67+
func (t *StackTrie) grow(key []byte) {
68+
if cap(t.kBuf) < 2*len(key) {
69+
t.kBuf = make([]byte, 2*len(key))
70+
}
71+
if cap(t.pBuf) < 2*len(key) {
72+
t.pBuf = make([]byte, 2*len(key))
5973
}
6074
}
6175

@@ -64,7 +78,8 @@ func (t *StackTrie) Update(key, value []byte) error {
6478
if len(value) == 0 {
6579
return errors.New("trying to insert empty (deletion)")
6680
}
67-
k := t.TrieKey(key)
81+
t.grow(key)
82+
k := writeHexKey(t.kBuf, key)
6883
if bytes.Compare(t.last, k) >= 0 {
6984
return errors.New("non-ascending key order")
7085
}
@@ -73,7 +88,7 @@ func (t *StackTrie) Update(key, value []byte) error {
7388
} else {
7489
t.last = append(t.last[:0], k...) // reuse key slice
7590
}
76-
t.insert(t.root, k, value, nil)
91+
t.insert(t.root, k, value, t.pBuf[:0])
7792
return nil
7893
}
7994

@@ -129,6 +144,12 @@ const (
129144
)
130145

131146
func (n *stNode) reset() *stNode {
147+
if n.typ == hashedNode {
148+
// On hashnodes, we 'own' the val: it is guaranteed to be not held
149+
// by external caller. Hence, when we arrive here, we can put it back
150+
// into the pool
151+
bPool.Put(n.val)
152+
}
132153
n.key = n.key[:0]
133154
n.val = nil
134155
for i := range n.children {
@@ -150,8 +171,12 @@ func (n *stNode) getDiffIndex(key []byte) int {
150171
return len(n.key)
151172
}
152173

153-
// Helper function to that inserts a (key, value) pair into
154-
// the trie.
174+
// Helper function to that inserts a (key, value) pair into the trie.
175+
//
176+
// - The key is not retained by this method, but always copied if needed.
177+
// - The value is retained by this method, as long as the leaf that it represents
178+
// remains unhashed. However: it is never modified.
179+
// - The path is not retained by this method.
155180
func (t *StackTrie) insert(st *stNode, key, value []byte, path []byte) {
156181
switch st.typ {
157182
case branchNode: /* Branch */
@@ -283,7 +308,7 @@ func (t *StackTrie) insert(st *stNode, key, value []byte, path []byte) {
283308

284309
case emptyNode: /* Empty */
285310
st.typ = leafNode
286-
st.key = key
311+
st.key = append(st.key, key...) // deep-copy the key as it's volatile
287312
st.val = value
288313

289314
case hashedNode:
@@ -318,35 +343,33 @@ func (t *StackTrie) hash(st *stNode, path []byte) {
318343
return
319344

320345
case branchNode:
321-
var nodes fullNode
346+
var nodes fullnodeEncoder
322347
for i, child := range st.children {
323348
if child == nil {
324-
nodes.Children[i] = nilValueNode
325349
continue
326350
}
327351
t.hash(child, append(path, byte(i)))
352+
nodes.Children[i] = child.val
353+
}
354+
nodes.encode(t.h.encbuf)
355+
blob = t.h.encodedBytes()
328356

329-
if len(child.val) < 32 {
330-
nodes.Children[i] = rawNode(child.val)
331-
} else {
332-
nodes.Children[i] = hashNode(child.val)
357+
for i, child := range st.children {
358+
if child == nil {
359+
continue
333360
}
334361
st.children[i] = nil
335362
stPool.Put(child.reset()) // Release child back to pool.
336363
}
337-
nodes.encode(t.h.encbuf)
338-
blob = t.h.encodedBytes()
339364

340365
case extNode:
341366
// recursively hash and commit child as the first step
342367
t.hash(st.children[0], append(path, st.key...))
343368

344369
// encode the extension node
345-
n := shortNode{Key: hexToCompactInPlace(st.key)}
346-
if len(st.children[0].val) < 32 {
347-
n.Val = rawNode(st.children[0].val)
348-
} else {
349-
n.Val = hashNode(st.children[0].val)
370+
n := extNodeEncoder{
371+
Key: hexToCompactInPlace(st.key),
372+
Val: st.children[0].val,
350373
}
351374
n.encode(t.h.encbuf)
352375
blob = t.h.encodedBytes()
@@ -356,8 +379,10 @@ func (t *StackTrie) hash(st *stNode, path []byte) {
356379

357380
case leafNode:
358381
st.key = append(st.key, byte(16))
359-
n := shortNode{Key: hexToCompactInPlace(st.key), Val: valueNode(st.val)}
360-
382+
n := leafNodeEncoder{
383+
Key: hexToCompactInPlace(st.key),
384+
Val: st.val,
385+
}
361386
n.encode(t.h.encbuf)
362387
blob = t.h.encodedBytes()
363388

@@ -368,15 +393,19 @@ func (t *StackTrie) hash(st *stNode, path []byte) {
368393
st.typ = hashedNode
369394
st.key = st.key[:0]
370395

396+
st.val = nil // Release reference to potentially externally held slice.
397+
371398
// Skip committing the non-root node if the size is smaller than 32 bytes
372399
// as tiny nodes are always embedded in their parent except root node.
373400
if len(blob) < 32 && len(path) > 0 {
374-
st.val = common.CopyBytes(blob)
401+
st.val = bPool.GetWithSize(len(blob))
402+
copy(st.val, blob)
375403
return
376404
}
377405
// Write the hash to the 'val'. We allocate a new val here to not mutate
378406
// input values.
379-
st.val = t.h.hashData(blob)
407+
st.val = bPool.GetWithSize(32)
408+
t.h.hashDataTo(st.val, blob)
380409

381410
// Invoke the callback it's provided. Notably, the path and blob slices are
382411
// volatile, please deep-copy the slices in callback if the contents need

0 commit comments

Comments
 (0)