Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 50 additions & 6 deletions core/stateless/stats.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,9 @@
package stateless

import (
"maps"

"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/metrics"
"github.com/ethereum/go-ethereum/rlp"
)

var (
Expand Down Expand Up @@ -86,17 +85,62 @@
}
}

// isLeafNode checks if the given RLP-encoded node data represents a leaf node.
// In Ethereum's Modified Merkle Patricia Trie, a leaf node is identified by:
// - Having exactly 2 RLP list elements (for both shortNode and leafNode encodings)
// - The second element being a value (not a hash reference to another node)
func isLeafNode(nodeData []byte) bool {
if len(nodeData) == 0 {
return false
}

Check failure on line 96 in core/stateless/stats.go

View workflow job for this annotation

GitHub Actions / Lint

File is not properly formatted (goimports)
// Decode the RLP list
var elems [][]byte
if err := rlp.DecodeBytes(nodeData, &elems); err != nil {
return false
}

// A leaf node in MPT has exactly 2 elements: [key, value]
// An extension node also has 2 elements but the value is a hash (32 bytes)
if len(elems) != 2 {
return false // Branch nodes have 17 elements
}

// If the second element is 32 bytes, it's likely a hash reference (extension node)
// Leaf nodes typically have values that are not exactly 32 bytes
// However, this is not a perfect heuristic as values could be 32 bytes
// A more accurate check would require checking the key's terminator flag

// Check if the key has a terminator (indicates leaf node)
// In compact encoding, the first nibble of the key indicates the node type
if len(elems[0]) > 0 {
// Get the first byte which contains the flags
flags := elems[0][0]
// Check if the terminator flag is set (bit 5)
// Leaf nodes have the terminator flag set (0x20 or 0x30)
return (flags & 0x20) != 0
}

return false
}

// Add records trie access depths from the given node paths.
// If `owner` is the zero hash, accesses are attributed to the account trie;
// otherwise, they are attributed to the storage trie of that account.
func (s *WitnessStats) Add(nodes map[string][]byte, owner common.Hash) {
if owner == (common.Hash{}) {
for path := range maps.Keys(nodes) {
s.accountTrie.add(int64(len(path)))
for path, nodeData := range nodes {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We can determine the out-most node ("shortNode" with value embedded) by node path.

e.g., paths: [], [1], [1,2], [1,2,3], [1,2,4]

then we can know [1,2,3] is the one of the target node.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's a good point, the problem is that this is a map and so it forces us to get its keys, sort them and then go over them again and again, like a tree. Probably more efficient if you can make it work, but this is also quite easy to mess up and the code will be way more unreadable. But that's just my intuition, I'll think about it.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

actually I think sorting and checking the path lengths is enough to make things more efficient. I'll continue investigating.

// Only record depth for leaf nodes
if isLeafNode(nodeData) {
s.accountTrie.add(int64(len(path)))
}
}
} else {
for path := range maps.Keys(nodes) {
s.storageTrie.add(int64(len(path)))
for path, nodeData := range nodes {
// Only record depth for leaf nodes
if isLeafNode(nodeData) {
s.storageTrie.add(int64(len(path)))
}
}
}
}
Expand Down
176 changes: 176 additions & 0 deletions core/stateless/stats_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
// Copyright 2025 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.

package stateless

import (
"testing"

"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/rlp"
)

func TestIsLeafNode(t *testing.T) {
tests := []struct {
name string
nodeData []byte
want bool
}{
{
name: "leaf node with terminator",
// Compact encoding: first byte 0x20 means odd length key with terminator
// This represents a leaf node
nodeData: mustEncodeNode(t, [][]byte{
{0x20, 0x01, 0x02, 0x03}, // Key with terminator flag
{0x01, 0x02, 0x03, 0x04}, // Value

Check failure on line 38 in core/stateless/stats_test.go

View workflow job for this annotation

GitHub Actions / Lint

File is not properly formatted (goimports)
}),
want: true,
},
{
name: "leaf node with even key and terminator",
// Compact encoding: first byte 0x30 means even length key with terminator
nodeData: mustEncodeNode(t, [][]byte{
{0x30, 0x01, 0x02}, // Key with terminator flag (even length)
{0x05, 0x06}, // Value
}),
want: true,
},
{
name: "extension node (no terminator)",
// Compact encoding: first byte 0x00 means even length key without terminator
// This represents an extension node
nodeData: mustEncodeNode(t, [][]byte{
{0x00, 0x01, 0x02}, // Key without terminator flag
{0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, // 32-byte hash
0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a,
0x1b, 0x1c, 0x1d, 0x1e, 0x1f},
}),
want: false,
},
{
name: "extension node with odd key (no terminator)",
// Compact encoding: first byte 0x10 means odd length key without terminator
nodeData: mustEncodeNode(t, [][]byte{
{0x10, 0x01, 0x02, 0x03}, // Key without terminator flag (odd length)
{0x01, 0x02, 0x03, 0x04}, // Could be hash reference
}),
want: false,
},
{
name: "branch node",
// Branch nodes have 17 elements
nodeData: mustEncodeNode(t, [][]byte{
{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {},
}),
want: false,
},
{
name: "empty data",
nodeData: []byte{},
want: false,
},
{
name: "invalid RLP",
nodeData: []byte{0xff, 0xff, 0xff},
want: false,
},
}

for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := isLeafNode(tt.nodeData)
if got != tt.want {
t.Errorf("isLeafNode() = %v, want %v", got, tt.want)
}
})
}
}

func mustEncodeNode(t *testing.T, elems [][]byte) []byte {
data, err := rlp.EncodeToBytes(elems)
if err != nil {
t.Fatalf("Failed to encode node: %v", err)
}
return data
}

func TestWitnessStats(t *testing.T) {
// Create a witness stats collector
stats := NewWitnessStats()

// Create witness data with both leaf and non-leaf nodes
witness := map[string][]byte{
// Leaf node at depth 4 (path length 4)
"abcd": mustEncodeNode(t, [][]byte{
{0x20, 0x01, 0x02}, // Key with terminator
{0x01, 0x02}, // Value
}),
// Extension node at depth 2 (should not be counted)
"ab": mustEncodeNode(t, [][]byte{
{0x00, 0x01, 0x02}, // Key without terminator
{0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d,
0x0e, 0x0f, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0x1a,
0x1b, 0x1c, 0x1d, 0x1e, 0x1f}, // 31-byte hash (simulated)
}),
// Another leaf node at depth 6
"abcdef": mustEncodeNode(t, [][]byte{
{0x30, 0x01}, // Key with terminator
{0x03, 0x04}, // Value
}),
// Branch node (should not be counted)
"a": mustEncodeNode(t, [][]byte{
{}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {},
}),
}

// Add account trie data (zero owner hash)
stats.Add(witness, common.Hash{})

// Verify only leaf nodes were counted
if stats.accountTrie.samples != 2 {
t.Errorf("Expected 2 leaf nodes in account trie, got %d", stats.accountTrie.samples)
}

// Check the depth statistics
expectedAvg := int64((4 + 6) / 2) // Average of path lengths 4 and 6
if stats.accountTrie.totalDepth/stats.accountTrie.samples != expectedAvg {
t.Errorf("Expected average depth %d, got %d", expectedAvg, stats.accountTrie.totalDepth/stats.accountTrie.samples)
}
if stats.accountTrie.minDepth != 4 {
t.Errorf("Expected min depth 4, got %d", stats.accountTrie.minDepth)
}
if stats.accountTrie.maxDepth != 6 {
t.Errorf("Expected max depth 6, got %d", stats.accountTrie.maxDepth)
}

// Test storage trie (non-zero owner hash)
storageStats := NewWitnessStats()
storageWitness := map[string][]byte{
// Leaf node
"xyz": mustEncodeNode(t, [][]byte{
{0x20, 0x01}, // Key with terminator
{0x05, 0x06}, // Value
}),
}
storageStats.Add(storageWitness, common.HexToHash("0x1234"))

if storageStats.storageTrie.samples != 1 {
t.Errorf("Expected 1 leaf node in storage trie, got %d", storageStats.storageTrie.samples)
}
if storageStats.accountTrie.samples != 0 {
t.Errorf("Expected 0 nodes in account trie for storage access, got %d", storageStats.accountTrie.samples)
}
}
Loading