trie, tests/fuzzers: implement a stacktrie fuzzer + stacktrie fixes (#21799)

holiman · web-flow · commit 81678971dbd5 · 2020-11-09T15:08:12.000+01:00
* trie: fix error in stacktrie not committing small roots

* fuzzers: make trie-fuzzer use correct returnvalues

* trie: improved tests

* tests/fuzzers: fuzzer for stacktrie vs regular trie

* test/fuzzers: make stacktrie fuzzer use 32-byte keys

* trie: fix error in stacktrie with small nodes

* trie: add (skipped) testcase for stacktrie

* tests/fuzzers: address review comments for stacktrie fuzzer

* trie: fix docs in stacktrie
diff --git a/tests/fuzzers/stacktrie/debug/main.go b/tests/fuzzers/stacktrie/debug/main.go
@@ -0,0 +1,23 @@
+package main
+
+import (
+	"fmt"
+	"io/ioutil"
+	"os"
+
+	"github.com/ethereum/go-ethereum/tests/fuzzers/stacktrie"
+)
+
+func main() {
+	if len(os.Args) != 2 {
+		fmt.Fprintf(os.Stderr, "Usage: debug <file>")
+		os.Exit(1)
+	}
+	crasher := os.Args[1]
+	data, err := ioutil.ReadFile(crasher)
+	if err != nil {
+		fmt.Fprintf(os.Stderr, "error loading crasher %v: %v", crasher, err)
+		os.Exit(1)
+	}
+	stacktrie.Debug(data)
+}
diff --git a/tests/fuzzers/stacktrie/trie_fuzzer.go b/tests/fuzzers/stacktrie/trie_fuzzer.go
@@ -0,0 +1,197 @@
+// Copyright 2020 The go-ethereum Authors
+// This file is part of the go-ethereum library.
+//
+// The go-ethereum library is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// The go-ethereum library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU Lesser General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
+
+package stacktrie
+
+import (
+	"bytes"
+	"encoding/binary"
+	"errors"
+	"fmt"
+	"hash"
+	"io"
+	"sort"
+
+	"github.com/ethereum/go-ethereum/common"
+	"github.com/ethereum/go-ethereum/ethdb"
+	"github.com/ethereum/go-ethereum/trie"
+	"golang.org/x/crypto/sha3"
+)
+
+type fuzzer struct {
+	input     io.Reader
+	exhausted bool
+	debugging bool
+}
+
+func (f *fuzzer) read(size int) []byte {
+	out := make([]byte, size)
+	if _, err := f.input.Read(out); err != nil {
+		f.exhausted = true
+	}
+	return out
+}
+
+func (f *fuzzer) readSlice(min, max int) []byte {
+	var a uint16
+	binary.Read(f.input, binary.LittleEndian, &a)
+	size := min + int(a)%(max-min)
+	out := make([]byte, size)
+	if _, err := f.input.Read(out); err != nil {
+		f.exhausted = true
+	}
+	return out
+}
+
+// spongeDb is a dummy db backend which accumulates writes in a sponge
+type spongeDb struct {
+	sponge hash.Hash
+	debug  bool
+}
+
+func (s *spongeDb) Has(key []byte) (bool, error)             { panic("implement me") }
+func (s *spongeDb) Get(key []byte) ([]byte, error)           { return nil, errors.New("no such elem") }
+func (s *spongeDb) Delete(key []byte) error                  { panic("implement me") }
+func (s *spongeDb) NewBatch() ethdb.Batch                    { return &spongeBatch{s} }
+func (s *spongeDb) Stat(property string) (string, error)     { panic("implement me") }
+func (s *spongeDb) Compact(start []byte, limit []byte) error { panic("implement me") }
+func (s *spongeDb) Close() error                             { return nil }
+
+func (s *spongeDb) Put(key []byte, value []byte) error {
+	if s.debug {
+		fmt.Printf("db.Put %x : %x\n", key, value)
+	}
+	s.sponge.Write(key)
+	s.sponge.Write(value)
+	return nil
+}
+func (s *spongeDb) NewIterator(prefix []byte, start []byte) ethdb.Iterator { panic("implement me") }
+
+// spongeBatch is a dummy batch which immediately writes to the underlying spongedb
+type spongeBatch struct {
+	db *spongeDb
+}
+
+func (b *spongeBatch) Put(key, value []byte) error {
+	b.db.Put(key, value)
+	return nil
+}
+func (b *spongeBatch) Delete(key []byte) error             { panic("implement me") }
+func (b *spongeBatch) ValueSize() int                      { return 100 }
+func (b *spongeBatch) Write() error                        { return nil }
+func (b *spongeBatch) Reset()                              {}
+func (b *spongeBatch) Replay(w ethdb.KeyValueWriter) error { return nil }
+
+type kv struct {
+	k, v []byte
+}
+type kvs []kv
+
+func (k kvs) Len() int {
+	return len(k)
+}
+
+func (k kvs) Less(i, j int) bool {
+	return bytes.Compare(k[i].k, k[j].k) < 0
+}
+
+func (k kvs) Swap(i, j int) {
+	k[j], k[i] = k[i], k[j]
+}
+
+// The function must return
+// 1 if the fuzzer should increase priority of the
+//    given input during subsequent fuzzing (for example, the input is lexically
+//    correct and was parsed successfully);
+// -1 if the input must not be added to corpus even if gives new coverage; and
+// 0  otherwise
+// other values are reserved for future use.
+func Fuzz(data []byte) int {
+	f := fuzzer{
+		input:     bytes.NewReader(data),
+		exhausted: false,
+	}
+	return f.fuzz()
+}
+
+func Debug(data []byte) int {
+	f := fuzzer{
+		input:     bytes.NewReader(data),
+		exhausted: false,
+		debugging: true,
+	}
+	return f.fuzz()
+}
+
+func (f *fuzzer) fuzz() int {
+
+	// This spongeDb is used to check the sequence of disk-db-writes
+	var (
+		spongeA     = &spongeDb{sponge: sha3.NewLegacyKeccak256()}
+		dbA         = trie.NewDatabase(spongeA)
+		trieA, _    = trie.New(common.Hash{}, dbA)
+		spongeB     = &spongeDb{sponge: sha3.NewLegacyKeccak256()}
+		trieB       = trie.NewStackTrie(spongeB)
+		vals        kvs
+		useful      bool
+		maxElements = 10000
+	)
+	// Fill the trie with elements
+	for i := 0; !f.exhausted && i < maxElements; i++ {
+		k := f.read(32)
+		v := f.readSlice(1, 500)
+		if f.exhausted {
+			// If it was exhausted while reading, the value may be all zeroes,
+			// thus 'deletion' which is not supported on stacktrie
+			break
+		}
+		vals = append(vals, kv{k: k, v: v})
+		trieA.Update(k, v)
+		useful = true
+	}
+	if !useful {
+		return 0
+	}
+	// Flush trie -> database
+	rootA, err := trieA.Commit(nil)
+	if err != nil {
+		panic(err)
+	}
+	// Flush memdb -> disk (sponge)
+	dbA.Commit(rootA, false, nil)
+
+	// Stacktrie requires sorted insertion
+	sort.Sort(vals)
+	for _, kv := range vals {
+		if f.debugging {
+			fmt.Printf("{\"0x%x\" , \"0x%x\"} // stacktrie.Update\n", kv.k, kv.v)
+		}
+		trieB.Update(kv.k, kv.v)
+	}
+	rootB := trieB.Hash()
+	if _, err := trieB.Commit(); err != nil {
+		panic(err)
+	}
+	if rootA != rootB {
+		panic(fmt.Sprintf("roots differ: (trie) %x != %x (stacktrie)", rootA, rootB))
+	}
+	sumA := spongeA.sponge.Sum(nil)
+	sumB := spongeB.sponge.Sum(nil)
+	if !bytes.Equal(sumA, sumB) {
+		panic(fmt.Sprintf("sequence differ: (trie) %x != %x (stacktrie)", sumA, sumB))
+	}
+	return 1
+}
diff --git a/tests/fuzzers/trie/trie-fuzzer.go b/tests/fuzzers/trie/trie-fuzzer.go
@@ -122,15 +122,22 @@ func Generate(input []byte) randTest {
 	return steps
 }
 
+// The function must return
+// 1 if the fuzzer should increase priority of the
+//    given input during subsequent fuzzing (for example, the input is lexically
+//    correct and was parsed successfully);
+// -1 if the input must not be added to corpus even if gives new coverage; and
+// 0  otherwise
+// other values are reserved for future use.
 func Fuzz(input []byte) int {
 	program := Generate(input)
 	if len(program) == 0 {
-		return -1
+		return 0
 	}
 	if err := runRandTest(program); err != nil {
 		panic(err)
 	}
-	return 0
+	return 1
 }
 
 func runRandTest(rt randTest) error {
diff --git a/trie/stacktrie.go b/trie/stacktrie.go
@@ -314,19 +314,22 @@ func (st *StackTrie) hash() {
 			panic(err)
 		}
 	case extNode:
+		st.children[0].hash()
 		h = newHasher(false)
 		defer returnHasherToPool(h)
 		h.tmp.Reset()
-		st.children[0].hash()
-		// This is also possible:
-		//sz := hexToCompactInPlace(st.key)
-		//n := [][]byte{
-		//	st.key[:sz],
-		//	st.children[0].val,
-		//}
-		n := [][]byte{
-			hexToCompact(st.key),
-			st.children[0].val,
+		var valuenode node
+		if len(st.children[0].val) < 32 {
+			valuenode = rawNode(st.children[0].val)
+		} else {
+			valuenode = hashNode(st.children[0].val)
+		}
+		n := struct {
+			Key []byte
+			Val node
+		}{
+			Key: hexToCompact(st.key),
+			Val: valuenode,
 		}
 		if err := rlp.Encode(&h.tmp, n); err != nil {
 			panic(err)
@@ -406,6 +409,18 @@ func (st *StackTrie) Commit() (common.Hash, error) {
 		return common.Hash{}, ErrCommitDisabled
 	}
 	st.hash()
-	h := common.BytesToHash(st.val)
-	return h, nil
+	if len(st.val) != 32 {
+		// If the node's RLP isn't 32 bytes long, the node will not
+		// be hashed (and committed), and instead contain the  rlp-encoding of the
+		// node. For the top level node, we need to force the hashing+commit.
+		ret := make([]byte, 32)
+		h := newHasher(false)
+		defer returnHasherToPool(h)
+		h.sha.Reset()
+		h.sha.Write(st.val)
+		h.sha.Read(ret)
+		st.db.Put(ret, st.val)
+		return common.BytesToHash(ret), nil
+	}
+	return common.BytesToHash(st.val), nil
 }
diff --git a/trie/stacktrie_test.go b/trie/stacktrie_test.go
@@ -240,3 +240,52 @@ func TestDerivableList(t *testing.T) {
 		}
 	}
 }
+
+// TestUpdateSmallNodes tests a case where the leaves are small (both key and value),
+// which causes a lot of node-within-node. This case was found via fuzzing.
+func TestUpdateSmallNodes(t *testing.T) {
+	st := NewStackTrie(nil)
+	nt, _ := New(common.Hash{}, NewDatabase(memorydb.New()))
+	kvs := []struct {
+		K string
+		V string
+	}{
+		{"63303030", "3041"}, // stacktrie.Update
+		{"65", "3000"},       // stacktrie.Update
+	}
+	for _, kv := range kvs {
+		nt.TryUpdate(common.FromHex(kv.K), common.FromHex(kv.V))
+		st.TryUpdate(common.FromHex(kv.K), common.FromHex(kv.V))
+	}
+	if nt.Hash() != st.Hash() {
+		t.Fatalf("error %x != %x", st.Hash(), nt.Hash())
+	}
+}
+
+// TestUpdateVariableKeys contains a case which stacktrie fails: when keys of different
+// sizes are used, and the second one has the same prefix as the first, then the
+// stacktrie fails, since it's unable to 'expand' on an already added leaf.
+// For all practical purposes, this is fine, since keys are fixed-size length
+// in account and storage tries.
+//
+// The test is marked as 'skipped', and exists just to have the behaviour documented.
+// This case was found via fuzzing.
+func TestUpdateVariableKeys(t *testing.T) {
+	t.SkipNow()
+	st := NewStackTrie(nil)
+	nt, _ := New(common.Hash{}, NewDatabase(memorydb.New()))
+	kvs := []struct {
+		K string
+		V string
+	}{
+		{"0x33303534636532393561313031676174", "303030"},
+		{"0x3330353463653239356131303167617430", "313131"},
+	}
+	for _, kv := range kvs {
+		nt.TryUpdate(common.FromHex(kv.K), common.FromHex(kv.V))
+		st.TryUpdate(common.FromHex(kv.K), common.FromHex(kv.V))
+	}
+	if nt.Hash() != st.Hash() {
+		t.Fatalf("error %x != %x", st.Hash(), nt.Hash())
+	}
+}
diff --git a/trie/trie_test.go b/trie/trie_test.go
@@ -853,6 +853,42 @@ func TestCommitSequenceStackTrie(t *testing.T) {
 	}
 }
 
+// TestCommitSequenceSmallRoot tests that a trie which is essentially only a
+// small (<32 byte) shortnode with an included value is properly committed to a
+// database.
+// This case might not matter, since in practice, all keys are 32 bytes, which means
+// that even a small trie which contains a leaf will have an extension making it
+// not fit into 32 bytes, rlp-encoded. However, it's still the correct thing to do.
+func TestCommitSequenceSmallRoot(t *testing.T) {
+	s := &spongeDb{sponge: sha3.NewLegacyKeccak256(), id: "a"}
+	db := NewDatabase(s)
+	trie, _ := New(common.Hash{}, db)
+	// Another sponge is used for the stacktrie commits
+	stackTrieSponge := &spongeDb{sponge: sha3.NewLegacyKeccak256(), id: "b"}
+	stTrie := NewStackTrie(stackTrieSponge)
+	// Add a single small-element to the trie(s)
+	key := make([]byte, 5)
+	key[0] = 1
+	trie.TryUpdate(key, []byte{0x1})
+	stTrie.TryUpdate(key, []byte{0x1})
+	// Flush trie -> database
+	root, _ := trie.Commit(nil)
+	// Flush memdb -> disk (sponge)
+	db.Commit(root, false, nil)
+	// And flush stacktrie -> disk
+	stRoot, err := stTrie.Commit()
+	if err != nil {
+		t.Fatalf("Failed to commit stack trie %v", err)
+	}
+	if stRoot != root {
+		t.Fatalf("root wrong, got %x exp %x", stRoot, root)
+	}
+	fmt.Printf("root: %x\n", stRoot)
+	if got, exp := stackTrieSponge.sponge.Sum(nil), s.sponge.Sum(nil); !bytes.Equal(got, exp) {
+		t.Fatalf("test, disk write sequence wrong:\ngot %x exp %x\n", got, exp)
+	}
+}
+
 // BenchmarkCommitAfterHashFixedSize benchmarks the Commit (after Hash) of a fixed number of updates to a trie.
 // This benchmark is meant to capture the difference on efficiency of small versus large changes. Typically,
 // storage tries are small (a couple of entries), whereas the full post-block account trie update is large (a couple

Original file line number	Diff line number	Diff line change
`@@ -122,15 +122,22 @@ func Generate(input []byte) randTest {`
`122`	`122`	`return steps`
`123`	`123`	`}`
`124`	`124`
	`125`	`+// The function must return`
	`126`	`+// 1 if the fuzzer should increase priority of the`
	`127`	`+// given input during subsequent fuzzing (for example, the input is lexically`
	`128`	`+// correct and was parsed successfully);`
	`129`	`+// -1 if the input must not be added to corpus even if gives new coverage; and`
	`130`	`+// 0 otherwise`
	`131`	`+// other values are reserved for future use.`
`125`	`132`	`func Fuzz(input []byte) int {`
`126`	`133`	`program := Generate(input)`
`127`	`134`	`if len(program) == 0 {`
`128`		`- return -1`
	`135`	`+ return 0`
`129`	`136`	`}`
`130`	`137`	`if err := runRandTest(program); err != nil {`
`131`	`138`	`panic(err)`
`132`	`139`	`}`
`133`		`- return 0`
	`140`	`+ return 1`
`134`	`141`	`}`
`135`	`142`
`136`	`143`	`func runRandTest(rt randTest) error {`