feat: rlp.ParseTree()

ARR4N · ARR4N · commit e470b2d1c813 · 2024-12-06T16:14:02.000Z
diff --git a/core/types/rlp_backwards_compat.libevm_test.go b/core/types/rlp_backwards_compat.libevm_test.go
@@ -1,18 +1,37 @@
+// Copyright 2024 the libevm authors.
+//
+// The libevm additions to go-ethereum are free software: you can redistribute
+// them and/or modify them under the terms of the GNU Lesser General Public License
+// as published by the Free Software Foundation, either version 3 of the License,
+// or (at your option) any later version.
+//
+// The libevm additions are distributed in the hope that they will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
+// General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see
+// <http://www.gnu.org/licenses/>.
+
 package types_test
 
 import (
-	"bytes"
 	"encoding/hex"
+	"math/big"
 	"testing"
 
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 
 	. "github.com/ava-labs/libevm/core/types"
 	"github.com/ava-labs/libevm/libevm/ethtest"
+	"github.com/ava-labs/libevm/rlp"
 )
 
 func TestHeaderRLPBackwardsCompatibility(t *testing.T) {
+	// This is a deliberate change-detector test that locks in backwards
+	// compatibility of RLP encoding.
 	rng := ethtest.NewPseudoRand(42)
 
 	const numExtraBytes = 16
@@ -43,11 +62,48 @@ func TestHeaderRLPBackwardsCompatibility(t *testing.T) {
 	require.Equal(t, len(BlockNonce{}), rng.Read(hdr.Nonce[:]))
 	require.Equal(t, numExtraBytes, rng.Read(hdr.Extra))
 
-	var got bytes.Buffer
-	require.NoError(t, hdr.EncodeRLP(&got))
-
+	// WARNING: changing this hex might break backwards compatibility of RLP
+	// encoding (i.e. block hashes might change)!
 	const wantHex = `f9029aa01a571e7e4d774caf46053201cfe0001b3c355ffcc93f510e671e8809741f0eeda0756095410506ec72a2c287fe83ebf68efb0be177e61acec1c985277e90e52087941bfc3bc193012ba58912c01fb35a3454831a8971a00bc9f064144eb5965c5e5d1020f9f90392e7e06ded9225966abc7c754b410e61a0d942eab201424f4320ec1e1ffa9390baf941629b9349977b5d48e0502dbb9386a035d9d550a9c113f78689b4c161c4605609bb57b83061914c42ad244daa7fc38eb90100718d155798390a6c6782181d1bac1dd64cd956332b008412ddc735f2994e297c8a088c6bb4c637542295ba3cbc3cd399c8127076f4d834d74d5b11a36b6d02e2fe3a583216aa4ccef052df9a96e7a454256bebabdfc38c429079f25913e0f1d7416b2f056c4a115fc757012b1757d2d69f0e5fb87c08605098d9031fa37cd0df6942c5a2da12a4424b978febf5479896165caf573cf82fb3aa10f6ebf6b62bef8ed36b8ea3d4b1ddb80c99afafa37cb8f3393eb6d802f5bc6c8cd6bcd168a7e0061a718218b848d945135b6dff228a4e66bade4717e6f4d318ac98fca12a053af6f98805a764fb5d523cb6f69029522cab9ced907cc75718f7e2c79154ef3fc7a04b31d39ae246d689f23176d679a62ff328f530407cbafd0146f45b2ed635282e88b36f6a5752feff5b881fc7fa9ef217f81d889f073433138e6ba58857515405d28f2a8e904bcda3066d382675f37dd1a18507b5fba02812f2701021506f27190adb52a1313f6d28c77d66ae1aa3d3d6757a762476f488294c7768cddd9ccf881b5da1b6a47970a3a0c8a2b7b2c44161190c82d5e1c8b55e05c7354f1e5f6512924c941fb3d93667dc889bc9df25654e163c88859405c51041475fa03a8c304a732153e20300c3482832d07b65f97958360da414cb438ce252aec6c2`
 	want, err := hex.DecodeString(wantHex)
-	require.NoError(t, err)
-	assert.Equal(t, want, got.Bytes())
+	require.NoError(t, err, "hex.DecodeString()")
+
+	got, err := rlp.EncodeToBytes(hdr)
+	require.NoErrorf(t, err, "rlp.EncodeToBytes(%T)", hdr)
+	assert.Equalf(t, want, got, "rlp.EncodeToBytes(%T)", hdr)
+
+	t.Run("ParseTree", func(t *testing.T) {
+		got, err := rlp.ParseTree(got)
+		require.NoErrorf(t, err, "rlp.ParseTree(rlp.EncodeToBytes(%T))", hdr)
+
+		type (
+			l = rlp.ListNode
+			s = rlp.StringNode
+		)
+		u64Bytes := func(u uint64) []byte { return new(big.Int).SetUint64(u).Bytes() }
+		want := l{
+			s(hdr.ParentHash[:]),
+			s(hdr.UncleHash[:]),
+			s(hdr.Coinbase[:]),
+			s(hdr.Root[:]),
+			s(hdr.TxHash[:]),
+			s(hdr.ReceiptHash[:]),
+			s(hdr.Bloom[:]),
+			s(hdr.Difficulty.Bytes()),
+			s(hdr.Number.Bytes()),
+			s(u64Bytes(hdr.GasLimit)),
+			s(u64Bytes(hdr.GasUsed)),
+			s(u64Bytes(hdr.Time)),
+			s(hdr.Extra[:]),
+			s(hdr.MixDigest[:]),
+			s(hdr.Nonce[:]),
+			s(hdr.BaseFee.Bytes()),
+			s(hdr.WithdrawalsHash[:]),
+			s(u64Bytes(*hdr.BlobGasUsed)),
+			s(u64Bytes(*hdr.ExcessBlobGas)),
+			s(hdr.ParentBeaconRoot[:]),
+		}
+
+		assert.Equal(t, want, got)
+	})
 }
diff --git a/rlp/tree.libevm.go b/rlp/tree.libevm.go
@@ -0,0 +1,194 @@
+// Copyright 2024 the libevm authors.
+//
+// The libevm additions to go-ethereum are free software: you can redistribute
+// them and/or modify them under the terms of the GNU Lesser General Public License
+// as published by the Free Software Foundation, either version 3 of the License,
+// or (at your option) any later version.
+//
+// The libevm additions are distributed in the hope that they will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
+// General Public License for more details.
+//
+// You should have received a copy of the GNU Lesser General Public License
+// along with the go-ethereum library. If not, see
+// <http://www.gnu.org/licenses/>.
+
+package rlp
+
+import (
+	"encoding/binary"
+	"errors"
+	"io"
+)
+
+// An ItemNode is a parsed RLP item as part of a tree, which may have only a
+// root node. Nodes contain only their unpacked values, not their length- and
+// type-denoting tags.
+type ItemNode interface {
+	rlpItem()
+}
+
+var _ = []ItemNode{ListNode(nil), StringNode(nil), ByteNode(0)}
+
+// A ListNode is a slice of RLP items. It is the ItemNode equivalent of [List].
+type ListNode []ItemNode
+
+// A StringNode is an RLP [ItemNode] holding an arbitrary byte slice. It is the
+// ItemNode equivalent of [String].
+type StringNode []byte
+
+// An ByteNode is an RLP [ItemNode] representing an unsigned integer <= 127. It
+// is the ItemNode equivalent of [Byte].
+//
+// [ParseTree] will only return an ByteNode if the value is in the range [0,127]
+// but an ByteNode MAY be outside of this range for the purpose of re-encoding.
+type ByteNode byte
+
+func (ListNode) rlpItem()   {}
+func (StringNode) rlpItem() {}
+func (ByteNode) rlpItem()   {}
+
+var (
+	errConcatenated  = errors.New("concatenated items outside of list")
+	errTrailingBytes = errors.New("trailing bytes after parsing")
+	errTooLong       = errors.New("parsing >8 big-endian bytes")
+)
+
+// ParseTree parses the RLP-encoded buffer and returns one of the concrete
+// ItemNode types. All [StringNode] instances will be backed by the same memory
+// as the argument received by ParseTree.
+func ParseTree(rlp []byte) (ItemNode, error) {
+	return parse(rlp, false /*inList*/)
+}
+
+// parseList is a convenience wrapper around [slicer.short] and [slicer.long],
+// returning their return buffer as a [ListNode].
+func parseList(str []byte, err error) (ItemNode, error) {
+	if err != nil {
+		return nil, err
+	}
+	return parse(str, true)
+}
+
+func parse(rlp []byte, inList bool) (ItemNode, error) {
+	buf := &slicer{buf: rlp, i: 0}
+	var items []ItemNode
+
+	for eof := false; !eof; {
+		switch tag, err := buf.byte(); {
+		case err == io.EOF:
+			eof = true
+
+		case err != nil:
+			// Impossible but being defensive in case of a future refactor.
+			return nil, err
+
+		case tag <= 0x7f:
+			items = append(items, ByteNode(tag))
+
+		case tag <= 0xb7:
+			str, err := buf.short(tag, 0x80)
+			if err != nil {
+				return nil, err
+			}
+			items = append(items, StringNode(str))
+
+		case tag <= 0xbf:
+			str, err := buf.long(tag, 0xb7)
+			if err != nil {
+				return nil, err
+			}
+			items = append(items, StringNode(str))
+
+		case tag <= 0xf7:
+			list, err := parseList(buf.short(tag, 0xc0))
+			if err != nil {
+				return nil, err
+			}
+			items = append(items, list)
+
+		default:
+			list, err := parseList(buf.long(tag, 0xf7))
+			if err != nil {
+				return nil, err
+			}
+			items = append(items, list)
+		}
+
+		if !inList && len(items) > 1 {
+			return nil, errConcatenated
+		}
+	}
+
+	if n := buf.left(); n > 0 {
+		return nil, errTrailingBytes
+	}
+	if inList {
+		return ListNode(items), nil
+	}
+	return items[0], nil
+}
+
+// A slicer is a byte-slice reader that returns slices backed by the same memory
+// as its buffer.
+type slicer struct {
+	buf []byte
+	i   uint64
+}
+
+func (s *slicer) len() uint64 {
+	return uint64(len(s.buf))
+}
+
+func (s *slicer) left() uint64 {
+	return s.len() - s.i
+}
+
+// next returns the next `n` bytes.
+func (s *slicer) next(n uint64) ([]byte, error) {
+	if n > s.left() {
+		return nil, io.EOF
+	}
+	b := s.buf[s.i : s.i+n]
+	s.i += n
+	return b, nil
+}
+
+func (s *slicer) byte() (byte, error) {
+	b, err := s.next(1)
+	if err != nil {
+		return 0, err
+	}
+	return b[0], nil
+}
+
+// short returns the bytes encoding either a string or a list of <=55 bytes.
+func (s *slicer) short(tag, base byte) ([]byte, error) {
+	return s.next(uint64(tag - base))
+}
+
+// long returns the bytes encoding either a string or a list of >55 bytes, first
+// reading the length.
+func (s *slicer) long(tag, base byte) ([]byte, error) {
+	n, err := s.bigEndian(uint64(tag - base))
+	if err != nil {
+		return nil, err
+	}
+	return s.next(n)
+}
+
+// bigEndian returns the next `nBytes` bytes interpreted as a big-endian uint64.
+func (s *slicer) bigEndian(nBytes uint64) (uint64, error) {
+	if nBytes > 8 {
+		return 0, errTooLong
+	}
+	buf, err := s.next(nBytes)
+	if err != nil {
+		return 0, err
+	}
+
+	var padded [8]byte
+	copy(padded[8-len(buf):], buf)
+	return binary.BigEndian.Uint64(padded[:]), nil
+}
diff --git a/rlp/tree.libevm_test.go b/rlp/tree.libevm_test.go