Skip to content

Commit 8167897

Browse files
authored
trie, tests/fuzzers: implement a stacktrie fuzzer + stacktrie fixes (#21799)
* trie: fix error in stacktrie not committing small roots * fuzzers: make trie-fuzzer use correct returnvalues * trie: improved tests * tests/fuzzers: fuzzer for stacktrie vs regular trie * test/fuzzers: make stacktrie fuzzer use 32-byte keys * trie: fix error in stacktrie with small nodes * trie: add (skipped) testcase for stacktrie * tests/fuzzers: address review comments for stacktrie fuzzer * trie: fix docs in stacktrie
1 parent 97fc1c3 commit 8167897

File tree

6 files changed

+341
-14
lines changed

6 files changed

+341
-14
lines changed
Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
package main
2+
3+
import (
4+
"fmt"
5+
"io/ioutil"
6+
"os"
7+
8+
"github.com/ethereum/go-ethereum/tests/fuzzers/stacktrie"
9+
)
10+
11+
func main() {
12+
if len(os.Args) != 2 {
13+
fmt.Fprintf(os.Stderr, "Usage: debug <file>")
14+
os.Exit(1)
15+
}
16+
crasher := os.Args[1]
17+
data, err := ioutil.ReadFile(crasher)
18+
if err != nil {
19+
fmt.Fprintf(os.Stderr, "error loading crasher %v: %v", crasher, err)
20+
os.Exit(1)
21+
}
22+
stacktrie.Debug(data)
23+
}
Lines changed: 197 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,197 @@
1+
// Copyright 2020 The go-ethereum Authors
2+
// This file is part of the go-ethereum library.
3+
//
4+
// The go-ethereum library is free software: you can redistribute it and/or modify
5+
// it under the terms of the GNU Lesser General Public License as published by
6+
// the Free Software Foundation, either version 3 of the License, or
7+
// (at your option) any later version.
8+
//
9+
// The go-ethereum library is distributed in the hope that it will be useful,
10+
// but WITHOUT ANY WARRANTY; without even the implied warranty of
11+
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12+
// GNU Lesser General Public License for more details.
13+
//
14+
// You should have received a copy of the GNU Lesser General Public License
15+
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
16+
17+
package stacktrie
18+
19+
import (
20+
"bytes"
21+
"encoding/binary"
22+
"errors"
23+
"fmt"
24+
"hash"
25+
"io"
26+
"sort"
27+
28+
"github.com/ethereum/go-ethereum/common"
29+
"github.com/ethereum/go-ethereum/ethdb"
30+
"github.com/ethereum/go-ethereum/trie"
31+
"golang.org/x/crypto/sha3"
32+
)
33+
34+
type fuzzer struct {
35+
input io.Reader
36+
exhausted bool
37+
debugging bool
38+
}
39+
40+
func (f *fuzzer) read(size int) []byte {
41+
out := make([]byte, size)
42+
if _, err := f.input.Read(out); err != nil {
43+
f.exhausted = true
44+
}
45+
return out
46+
}
47+
48+
func (f *fuzzer) readSlice(min, max int) []byte {
49+
var a uint16
50+
binary.Read(f.input, binary.LittleEndian, &a)
51+
size := min + int(a)%(max-min)
52+
out := make([]byte, size)
53+
if _, err := f.input.Read(out); err != nil {
54+
f.exhausted = true
55+
}
56+
return out
57+
}
58+
59+
// spongeDb is a dummy db backend which accumulates writes in a sponge
60+
type spongeDb struct {
61+
sponge hash.Hash
62+
debug bool
63+
}
64+
65+
func (s *spongeDb) Has(key []byte) (bool, error) { panic("implement me") }
66+
func (s *spongeDb) Get(key []byte) ([]byte, error) { return nil, errors.New("no such elem") }
67+
func (s *spongeDb) Delete(key []byte) error { panic("implement me") }
68+
func (s *spongeDb) NewBatch() ethdb.Batch { return &spongeBatch{s} }
69+
func (s *spongeDb) Stat(property string) (string, error) { panic("implement me") }
70+
func (s *spongeDb) Compact(start []byte, limit []byte) error { panic("implement me") }
71+
func (s *spongeDb) Close() error { return nil }
72+
73+
func (s *spongeDb) Put(key []byte, value []byte) error {
74+
if s.debug {
75+
fmt.Printf("db.Put %x : %x\n", key, value)
76+
}
77+
s.sponge.Write(key)
78+
s.sponge.Write(value)
79+
return nil
80+
}
81+
func (s *spongeDb) NewIterator(prefix []byte, start []byte) ethdb.Iterator { panic("implement me") }
82+
83+
// spongeBatch is a dummy batch which immediately writes to the underlying spongedb
84+
type spongeBatch struct {
85+
db *spongeDb
86+
}
87+
88+
func (b *spongeBatch) Put(key, value []byte) error {
89+
b.db.Put(key, value)
90+
return nil
91+
}
92+
func (b *spongeBatch) Delete(key []byte) error { panic("implement me") }
93+
func (b *spongeBatch) ValueSize() int { return 100 }
94+
func (b *spongeBatch) Write() error { return nil }
95+
func (b *spongeBatch) Reset() {}
96+
func (b *spongeBatch) Replay(w ethdb.KeyValueWriter) error { return nil }
97+
98+
type kv struct {
99+
k, v []byte
100+
}
101+
type kvs []kv
102+
103+
func (k kvs) Len() int {
104+
return len(k)
105+
}
106+
107+
func (k kvs) Less(i, j int) bool {
108+
return bytes.Compare(k[i].k, k[j].k) < 0
109+
}
110+
111+
func (k kvs) Swap(i, j int) {
112+
k[j], k[i] = k[i], k[j]
113+
}
114+
115+
// The function must return
116+
// 1 if the fuzzer should increase priority of the
117+
// given input during subsequent fuzzing (for example, the input is lexically
118+
// correct and was parsed successfully);
119+
// -1 if the input must not be added to corpus even if gives new coverage; and
120+
// 0 otherwise
121+
// other values are reserved for future use.
122+
func Fuzz(data []byte) int {
123+
f := fuzzer{
124+
input: bytes.NewReader(data),
125+
exhausted: false,
126+
}
127+
return f.fuzz()
128+
}
129+
130+
func Debug(data []byte) int {
131+
f := fuzzer{
132+
input: bytes.NewReader(data),
133+
exhausted: false,
134+
debugging: true,
135+
}
136+
return f.fuzz()
137+
}
138+
139+
func (f *fuzzer) fuzz() int {
140+
141+
// This spongeDb is used to check the sequence of disk-db-writes
142+
var (
143+
spongeA = &spongeDb{sponge: sha3.NewLegacyKeccak256()}
144+
dbA = trie.NewDatabase(spongeA)
145+
trieA, _ = trie.New(common.Hash{}, dbA)
146+
spongeB = &spongeDb{sponge: sha3.NewLegacyKeccak256()}
147+
trieB = trie.NewStackTrie(spongeB)
148+
vals kvs
149+
useful bool
150+
maxElements = 10000
151+
)
152+
// Fill the trie with elements
153+
for i := 0; !f.exhausted && i < maxElements; i++ {
154+
k := f.read(32)
155+
v := f.readSlice(1, 500)
156+
if f.exhausted {
157+
// If it was exhausted while reading, the value may be all zeroes,
158+
// thus 'deletion' which is not supported on stacktrie
159+
break
160+
}
161+
vals = append(vals, kv{k: k, v: v})
162+
trieA.Update(k, v)
163+
useful = true
164+
}
165+
if !useful {
166+
return 0
167+
}
168+
// Flush trie -> database
169+
rootA, err := trieA.Commit(nil)
170+
if err != nil {
171+
panic(err)
172+
}
173+
// Flush memdb -> disk (sponge)
174+
dbA.Commit(rootA, false, nil)
175+
176+
// Stacktrie requires sorted insertion
177+
sort.Sort(vals)
178+
for _, kv := range vals {
179+
if f.debugging {
180+
fmt.Printf("{\"0x%x\" , \"0x%x\"} // stacktrie.Update\n", kv.k, kv.v)
181+
}
182+
trieB.Update(kv.k, kv.v)
183+
}
184+
rootB := trieB.Hash()
185+
if _, err := trieB.Commit(); err != nil {
186+
panic(err)
187+
}
188+
if rootA != rootB {
189+
panic(fmt.Sprintf("roots differ: (trie) %x != %x (stacktrie)", rootA, rootB))
190+
}
191+
sumA := spongeA.sponge.Sum(nil)
192+
sumB := spongeB.sponge.Sum(nil)
193+
if !bytes.Equal(sumA, sumB) {
194+
panic(fmt.Sprintf("sequence differ: (trie) %x != %x (stacktrie)", sumA, sumB))
195+
}
196+
return 1
197+
}

tests/fuzzers/trie/trie-fuzzer.go

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,15 +122,22 @@ func Generate(input []byte) randTest {
122122
return steps
123123
}
124124

125+
// The function must return
126+
// 1 if the fuzzer should increase priority of the
127+
// given input during subsequent fuzzing (for example, the input is lexically
128+
// correct and was parsed successfully);
129+
// -1 if the input must not be added to corpus even if gives new coverage; and
130+
// 0 otherwise
131+
// other values are reserved for future use.
125132
func Fuzz(input []byte) int {
126133
program := Generate(input)
127134
if len(program) == 0 {
128-
return -1
135+
return 0
129136
}
130137
if err := runRandTest(program); err != nil {
131138
panic(err)
132139
}
133-
return 0
140+
return 1
134141
}
135142

136143
func runRandTest(rt randTest) error {

trie/stacktrie.go

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -314,19 +314,22 @@ func (st *StackTrie) hash() {
314314
panic(err)
315315
}
316316
case extNode:
317+
st.children[0].hash()
317318
h = newHasher(false)
318319
defer returnHasherToPool(h)
319320
h.tmp.Reset()
320-
st.children[0].hash()
321-
// This is also possible:
322-
//sz := hexToCompactInPlace(st.key)
323-
//n := [][]byte{
324-
// st.key[:sz],
325-
// st.children[0].val,
326-
//}
327-
n := [][]byte{
328-
hexToCompact(st.key),
329-
st.children[0].val,
321+
var valuenode node
322+
if len(st.children[0].val) < 32 {
323+
valuenode = rawNode(st.children[0].val)
324+
} else {
325+
valuenode = hashNode(st.children[0].val)
326+
}
327+
n := struct {
328+
Key []byte
329+
Val node
330+
}{
331+
Key: hexToCompact(st.key),
332+
Val: valuenode,
330333
}
331334
if err := rlp.Encode(&h.tmp, n); err != nil {
332335
panic(err)
@@ -406,6 +409,18 @@ func (st *StackTrie) Commit() (common.Hash, error) {
406409
return common.Hash{}, ErrCommitDisabled
407410
}
408411
st.hash()
409-
h := common.BytesToHash(st.val)
410-
return h, nil
412+
if len(st.val) != 32 {
413+
// If the node's RLP isn't 32 bytes long, the node will not
414+
// be hashed (and committed), and instead contain the rlp-encoding of the
415+
// node. For the top level node, we need to force the hashing+commit.
416+
ret := make([]byte, 32)
417+
h := newHasher(false)
418+
defer returnHasherToPool(h)
419+
h.sha.Reset()
420+
h.sha.Write(st.val)
421+
h.sha.Read(ret)
422+
st.db.Put(ret, st.val)
423+
return common.BytesToHash(ret), nil
424+
}
425+
return common.BytesToHash(st.val), nil
411426
}

trie/stacktrie_test.go

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,3 +240,52 @@ func TestDerivableList(t *testing.T) {
240240
}
241241
}
242242
}
243+
244+
// TestUpdateSmallNodes tests a case where the leaves are small (both key and value),
245+
// which causes a lot of node-within-node. This case was found via fuzzing.
246+
func TestUpdateSmallNodes(t *testing.T) {
247+
st := NewStackTrie(nil)
248+
nt, _ := New(common.Hash{}, NewDatabase(memorydb.New()))
249+
kvs := []struct {
250+
K string
251+
V string
252+
}{
253+
{"63303030", "3041"}, // stacktrie.Update
254+
{"65", "3000"}, // stacktrie.Update
255+
}
256+
for _, kv := range kvs {
257+
nt.TryUpdate(common.FromHex(kv.K), common.FromHex(kv.V))
258+
st.TryUpdate(common.FromHex(kv.K), common.FromHex(kv.V))
259+
}
260+
if nt.Hash() != st.Hash() {
261+
t.Fatalf("error %x != %x", st.Hash(), nt.Hash())
262+
}
263+
}
264+
265+
// TestUpdateVariableKeys contains a case which stacktrie fails: when keys of different
266+
// sizes are used, and the second one has the same prefix as the first, then the
267+
// stacktrie fails, since it's unable to 'expand' on an already added leaf.
268+
// For all practical purposes, this is fine, since keys are fixed-size length
269+
// in account and storage tries.
270+
//
271+
// The test is marked as 'skipped', and exists just to have the behaviour documented.
272+
// This case was found via fuzzing.
273+
func TestUpdateVariableKeys(t *testing.T) {
274+
t.SkipNow()
275+
st := NewStackTrie(nil)
276+
nt, _ := New(common.Hash{}, NewDatabase(memorydb.New()))
277+
kvs := []struct {
278+
K string
279+
V string
280+
}{
281+
{"0x33303534636532393561313031676174", "303030"},
282+
{"0x3330353463653239356131303167617430", "313131"},
283+
}
284+
for _, kv := range kvs {
285+
nt.TryUpdate(common.FromHex(kv.K), common.FromHex(kv.V))
286+
st.TryUpdate(common.FromHex(kv.K), common.FromHex(kv.V))
287+
}
288+
if nt.Hash() != st.Hash() {
289+
t.Fatalf("error %x != %x", st.Hash(), nt.Hash())
290+
}
291+
}

trie/trie_test.go

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -853,6 +853,42 @@ func TestCommitSequenceStackTrie(t *testing.T) {
853853
}
854854
}
855855

856+
// TestCommitSequenceSmallRoot tests that a trie which is essentially only a
857+
// small (<32 byte) shortnode with an included value is properly committed to a
858+
// database.
859+
// This case might not matter, since in practice, all keys are 32 bytes, which means
860+
// that even a small trie which contains a leaf will have an extension making it
861+
// not fit into 32 bytes, rlp-encoded. However, it's still the correct thing to do.
862+
func TestCommitSequenceSmallRoot(t *testing.T) {
863+
s := &spongeDb{sponge: sha3.NewLegacyKeccak256(), id: "a"}
864+
db := NewDatabase(s)
865+
trie, _ := New(common.Hash{}, db)
866+
// Another sponge is used for the stacktrie commits
867+
stackTrieSponge := &spongeDb{sponge: sha3.NewLegacyKeccak256(), id: "b"}
868+
stTrie := NewStackTrie(stackTrieSponge)
869+
// Add a single small-element to the trie(s)
870+
key := make([]byte, 5)
871+
key[0] = 1
872+
trie.TryUpdate(key, []byte{0x1})
873+
stTrie.TryUpdate(key, []byte{0x1})
874+
// Flush trie -> database
875+
root, _ := trie.Commit(nil)
876+
// Flush memdb -> disk (sponge)
877+
db.Commit(root, false, nil)
878+
// And flush stacktrie -> disk
879+
stRoot, err := stTrie.Commit()
880+
if err != nil {
881+
t.Fatalf("Failed to commit stack trie %v", err)
882+
}
883+
if stRoot != root {
884+
t.Fatalf("root wrong, got %x exp %x", stRoot, root)
885+
}
886+
fmt.Printf("root: %x\n", stRoot)
887+
if got, exp := stackTrieSponge.sponge.Sum(nil), s.sponge.Sum(nil); !bytes.Equal(got, exp) {
888+
t.Fatalf("test, disk write sequence wrong:\ngot %x exp %x\n", got, exp)
889+
}
890+
}
891+
856892
// BenchmarkCommitAfterHashFixedSize benchmarks the Commit (after Hash) of a fixed number of updates to a trie.
857893
// This benchmark is meant to capture the difference on efficiency of small versus large changes. Typically,
858894
// storage tries are small (a couple of entries), whereas the full post-block account trie update is large (a couple

0 commit comments

Comments
 (0)