diff --git a/.changeset/lovely-cooks-add.md b/.changeset/lovely-cooks-add.md new file mode 100644 index 00000000000..6637c92478d --- /dev/null +++ b/.changeset/lovely-cooks-add.md @@ -0,0 +1,5 @@ +--- +'openzeppelin-solidity': minor +--- + +`RLP`: Add library for Ethereum's Recursive Length Prefix encoding/decoding. diff --git a/contracts/utils/README.adoc b/contracts/utils/README.adoc index a536e3b51f8..f3e19d0eca2 100644 --- a/contracts/utils/README.adoc +++ b/contracts/utils/README.adoc @@ -42,6 +42,7 @@ Miscellaneous contracts and libraries containing utility functions you can use t * {InteroperableAddress}: Library for formatting and parsing ERC-7930 interoperable addresses. * {Blockhash}: A library for accessing historical block hashes beyond the standard 256 block limit utilizing EIP-2935's historical blockhash functionality. * {Time}: A library that provides helpers for manipulating time-related objects, including a `Delay` type. + * {RLP}: Library for encoding and decoding data in Ethereum's Recursive Length Prefix format. [NOTE] ==== @@ -143,3 +144,5 @@ Ethereum contracts have no native concept of an interface, so applications must {{Blockhash}} {{Time}} + +{{RLP}} diff --git a/contracts/utils/RLP.sol b/contracts/utils/RLP.sol new file mode 100644 index 00000000000..78dcfc9aa96 --- /dev/null +++ b/contracts/utils/RLP.sol @@ -0,0 +1,327 @@ +// SPDX-License-Identifier: MIT +pragma solidity ^0.8.27; + +import {Math} from "./math/Math.sol"; +import {Bytes} from "./Bytes.sol"; +import {Memory} from "./Memory.sol"; + +/** + * @dev Library for encoding and decoding data in RLP format. + * Recursive Length Prefix (RLP) is the main encoding method used to serialize objects in Ethereum. + * It's used for encoding everything from transactions to blocks to Patricia-Merkle tries. + */ +library RLP { + using Math for uint256; + using Bytes for *; + using Memory for *; + + /// @dev Items with length 0 are not RLP items. + error RLPEmptyItem(); + + /// @dev The `item` is not of the `expected` type. + error RLPUnexpectedType(ItemType expected, ItemType actual); + + /// @dev The item is not long enough to contain the data. + error RLPInvalidDataRemainder(uint256 minLength, uint256 actualLength); + + /// @dev The content length does not match the expected length. + error RLPContentLengthMismatch(uint256 expectedLength, uint256 actualLength); + + struct Item { + uint256 length; // Total length of the item in bytes + Memory.Pointer ptr; // Memory pointer to the start of the item + } + + enum ItemType { + Data, // Single data value + List // List of RLP encoded items + } + + /** + * @dev Maximum length for data that will be encoded using the short format. + * If `data.length <= 55 bytes`, it will be encoded as: `[0x80 + length]` + data. + */ + uint8 internal constant SHORT_THRESHOLD = 55; + + /// @dev Single byte prefix for short strings (0-55 bytes) + uint8 internal constant SHORT_OFFSET = 128; + /// @dev Prefix for long string length (0xB8) + uint8 internal constant LONG_LENGTH_OFFSET = SHORT_OFFSET + SHORT_THRESHOLD + 1; // 184 + /// @dev Prefix for list items (0xC0) + uint8 internal constant LONG_OFFSET = LONG_LENGTH_OFFSET + 8; // 192 + /// @dev Prefix for long list length (0xF8) + uint8 internal constant SHORT_LIST_OFFSET = LONG_OFFSET + SHORT_THRESHOLD + 1; // 248 + + /** + * @dev Encodes a bytes array using RLP rules. + * Single bytes below 128 are encoded as themselves, otherwise as length prefix + data. + */ + function encode(bytes memory buffer) internal pure returns (bytes memory) { + return _isSingleByte(buffer) ? buffer : bytes.concat(_encodeLength(buffer.length, SHORT_OFFSET), buffer); + } + + /** + * @dev Encodes an array of bytes using RLP (as a list). + * First it {_flatten}s the list of encoded items, then encodes it with the list prefix. + */ + function encode(bytes[] memory list) internal pure returns (bytes memory) { + bytes memory flattened = _flatten(list); + return bytes.concat(_encodeLength(flattened.length, LONG_OFFSET), flattened); + } + + /// @dev Convenience method to encode a string as RLP. + function encode(string memory str) internal pure returns (bytes memory) { + return encode(bytes(str)); + } + + /// @dev Convenience method to encode an address as RLP bytes (i.e. encoded as packed 20 bytes). + function encode(address addr) internal pure returns (bytes memory) { + return encode(abi.encodePacked(addr)); + } + + /// @dev Convenience method to encode a uint256 as RLP. See {_binaryBuffer}. + function encode(uint256 value) internal pure returns (bytes memory) { + return encode(_binaryBuffer(value)); + } + + /// @dev Same as {encode-uint256-}, but for bytes32. + function encode(bytes32 value) internal pure returns (bytes memory) { + return encode(uint256(value)); + } + + /** + * @dev Convenience method to encode a boolean as RLP. + * + * Boolean `true` is encoded as 0x01, `false` as 0x80 (equivalent to encoding integers 1 and 0). + * This follows the de facto ecosystem standard where booleans are treated as 0/1 integers. + * + * NOTE: Both this and {encodeStrict} produce identical encoded bytes at the output level. + * Use this for ecosystem compatibility; use {encodeStrict} for strict RLP spec compliance. + */ + function encode(bool value) internal pure returns (bytes memory) { + return encode(value ? uint256(1) : uint256(0)); + } + + /** + * @dev Strict RLP encoding of a boolean following literal spec interpretation. + * Boolean `true` is encoded as 0x01, `false` as empty bytes (0x80). + * + * NOTE: This is the strict RLP spec interpretation where false represents "empty". + * Use this for strict RLP spec compliance; use {encode} for ecosystem compatibility. + */ + function encodeStrict(bool value) internal pure returns (bytes memory) { + return value ? abi.encodePacked(bytes1(0x01)) : encode(new bytes(0)); + } + + /// @dev Creates an RLP Item from a bytes array. + function toItem(bytes memory value) internal pure returns (Item memory) { + require(value.length != 0, RLPEmptyItem()); // Empty arrays are not RLP items. + return Item(value.length, _addOffset(_asPointer(value), 32)); + } + + /// @dev Decodes an RLP encoded list into an array of RLP Items. See {_decodeLength} + function decodeList(Item memory item) internal pure returns (Item[] memory) { + (uint256 listOffset, uint256 listLength, ItemType itemType) = _decodeLength(item); + require(itemType == ItemType.List, RLPUnexpectedType(ItemType.List, itemType)); + uint256 expectedLength = listOffset + listLength; + require(expectedLength == item.length, RLPContentLengthMismatch(expectedLength, item.length)); + Item[] memory items = new Item[](32); + + uint256 itemCount; + + for (uint256 currentOffset = listOffset; currentOffset < item.length; ++itemCount) { + (uint256 itemOffset, uint256 itemLength, ) = _decodeLength( + Item(item.length - currentOffset, _addOffset(item.ptr, currentOffset)) + ); + items[itemCount] = Item(itemLength + itemOffset, _addOffset(item.ptr, currentOffset)); + currentOffset += itemOffset + itemLength; + } + + // Decrease the array size to match the actual item count. + assembly ("memory-safe") { + mstore(items, itemCount) + } + return items; + } + + /// @dev Same as {decodeList} but for `bytes`. See {toItem}. + function decodeList(bytes memory value) internal pure returns (Item[] memory) { + return decodeList(toItem(value)); + } + + /// @dev Decodes an RLP encoded item. + function decodeBytes(Item memory item) internal pure returns (bytes memory) { + (uint256 itemOffset, uint256 itemLength, ItemType itemType) = _decodeLength(item); + require(itemType == ItemType.Data, RLPUnexpectedType(ItemType.Data, itemType)); + uint256 expectedLength = itemOffset + itemLength; + require(expectedLength == item.length, RLPContentLengthMismatch(expectedLength, item.length)); + + bytes memory result = new bytes(itemLength); + _copy(_addOffset(_asPointer(result), 32), _addOffset(item.ptr, itemOffset), itemLength); + + return result; + } + + /// @dev Same as {decodeBytes} but for `bytes`. See {toItem}. + function decodeBytes(bytes memory item) internal pure returns (bytes memory) { + return decodeBytes(toItem(item)); + } + + /// @dev Reads the raw bytes of an RLP item without decoding the content. Includes prefix bytes. + function decodeRawBytes(Item memory item) internal pure returns (bytes memory) { + uint256 itemLength = item.length; + bytes memory result = new bytes(itemLength); + _copy(_addOffset(_asPointer(result), 32), item.ptr, itemLength); + + return result; + } + + /// @dev Checks if a buffer is a single byte below 128 (0x80). Encoded as-is in RLP. + function _isSingleByte(bytes memory buffer) private pure returns (bool) { + return buffer.length == 1 && uint8(buffer[0]) < SHORT_OFFSET; + } + + /** + * @dev Encodes a length with appropriate RLP prefix. + * + * Uses short encoding for lengths <= 55 bytes (i.e. `abi.encodePacked(bytes1(uint8(length) + uint8(offset)))`). + * Uses long encoding for lengths > 55 bytes See {_encodeLongLength}. + */ + function _encodeLength(uint256 length, uint256 offset) private pure returns (bytes memory) { + return + length <= SHORT_THRESHOLD + ? abi.encodePacked(bytes1(uint8(length) + uint8(offset))) + : _encodeLongLength(length, offset); + } + + /** + * @dev Encodes a long length value (>55 bytes) with a length-of-length prefix. + * Format: [prefix + length of the length] + [length in big-endian] + */ + function _encodeLongLength(uint256 length, uint256 offset) private pure returns (bytes memory) { + uint256 bytesLength = length.log256() + 1; // Result is floored + return + abi.encodePacked( + bytes1(uint8(bytesLength) + uint8(offset) + SHORT_THRESHOLD), + _binaryBuffer(length) // already in big-endian, minimal representation + ); + } + + /// @dev Converts a uint256 to minimal binary representation, removing leading zeros. + function _binaryBuffer(uint256 value) private pure returns (bytes memory) { + return abi.encodePacked(value).slice(value.clz() / 8); + } + + /// @dev Concatenates all byte arrays in the `list` sequentially. Returns a flattened buffer. + function _flatten(bytes[] memory list) private pure returns (bytes memory) { + // TODO: Move to Arrays.sol + bytes memory flattened = new bytes(_totalLength(list)); + Memory.Pointer dataPtr = _addOffset(_asPointer(flattened), 32); + for (uint256 i = 0; i < list.length; i++) { + bytes memory item = list[i]; + uint256 length = item.length; + _copy(dataPtr, _addOffset(_asPointer(item), 32), length); + dataPtr = _addOffset(dataPtr, length); + } + return flattened; + } + + /// @dev Sums up the length of each array in the list. + function _totalLength(bytes[] memory list) private pure returns (uint256) { + // TODO: Move to Arrays.sol + uint256 totalLength; + for (uint256 i = 0; i < list.length; i++) { + totalLength += list[i].length; + } + return totalLength; + } + + /** + * @dev Decodes an RLP `item`'s `length and type from its prefix. + * Returns the offset, length, and type of the RLP item based on the encoding rules. + */ + function _decodeLength(Item memory item) private pure returns (uint256 offset, uint256 length, ItemType) { + require(item.length != 0, RLPEmptyItem()); + uint256 prefix = uint8(_loadByte(item.ptr, 0)); + + // Single byte below 128 + if (prefix < SHORT_OFFSET) return (0, 1, ItemType.Data); + + // Short string (0-55 bytes) + if (prefix < LONG_LENGTH_OFFSET) return _decodeShortString(prefix - SHORT_OFFSET, item); + + // Long string (>55 bytes) + if (prefix < LONG_OFFSET) { + (offset, length) = _decodeLong(prefix - LONG_LENGTH_OFFSET, item); + return (offset, length, ItemType.Data); + } + + // Short list + if (prefix < SHORT_LIST_OFFSET) return _decodeShortList(prefix - LONG_OFFSET, item); + + // Long list + (offset, length) = _decodeLong(prefix - SHORT_LIST_OFFSET, item); + return (offset, length, ItemType.List); + } + + /// @dev Decodes a short string (0-55 bytes). The first byte contains the length, and the rest is the payload. + function _decodeShortString( + uint256 strLength, + Item memory item + ) private pure returns (uint256 offset, uint256 length, ItemType) { + require(item.length > strLength, RLPInvalidDataRemainder(strLength, item.length)); + require(strLength != 1 || _loadByte(_addOffset(item.ptr, 1), 0) >= bytes1(SHORT_OFFSET)); + return (1, strLength, ItemType.Data); + } + + /// @dev Decodes a short list (0-55 bytes). The first byte contains the length of the entire list. + function _decodeShortList( + uint256 listLength, + Item memory item + ) private pure returns (uint256 offset, uint256 length, ItemType) { + require(item.length > listLength, RLPInvalidDataRemainder(listLength, item.length)); + return (1, listLength, ItemType.List); + } + + /// @dev Decodes a long string or list (>55 bytes). The first byte indicates the length of the length, followed by the length itself. + function _decodeLong(uint256 lengthLength, Item memory item) private pure returns (uint256 offset, uint256 length) { + lengthLength += 1; // 1 byte for the length itself + require(item.length > lengthLength, RLPInvalidDataRemainder(lengthLength, item.length)); + require(_loadByte(item.ptr, 0) != 0x00); + + // Extract the length value from the next bytes + uint256 len = uint256(_load(_addOffset(item.ptr, 1)) >> (256 - 8 * lengthLength)); + require(len > SHORT_THRESHOLD, RLPInvalidDataRemainder(SHORT_THRESHOLD, len)); + uint256 expectedLength = lengthLength + len; + require(item.length <= expectedLength, RLPContentLengthMismatch(expectedLength, item.length)); + return (lengthLength + 1, len); + } + + function _addOffset(Memory.Pointer ptr, uint256 offset) private pure returns (Memory.Pointer) { + return bytes32(uint256(ptr.asBytes32()) + offset).asPointer(); + } + + function _copy(Memory.Pointer destPtr, Memory.Pointer srcPtr, uint256 length) private pure { + assembly ("memory-safe") { + mcopy(destPtr, srcPtr, length) + } + } + + function _loadByte(Memory.Pointer ptr, uint256 offset) private pure returns (bytes1 v) { + assembly ("memory-safe") { + v := byte(offset, mload(ptr)) + } + } + + function _load(Memory.Pointer ptr) private pure returns (bytes32 v) { + assembly ("memory-safe") { + v := mload(ptr) + } + } + + function _asPointer(bytes memory value) private pure returns (Memory.Pointer ptr) { + assembly ("memory-safe") { + ptr := value + } + } +} diff --git a/test/helpers/enums.js b/test/helpers/enums.js index 6adbf64ad82..804262d4e7a 100644 --- a/test/helpers/enums.js +++ b/test/helpers/enums.js @@ -11,4 +11,5 @@ module.exports = { Rounding: EnumTyped('Floor', 'Ceil', 'Trunc', 'Expand'), OperationState: Enum('Unset', 'Waiting', 'Ready', 'Done'), RevertType: EnumTyped('None', 'RevertWithoutMessage', 'RevertWithMessage', 'RevertWithCustomError', 'Panic'), + ItemType: Enum('Data', 'List'), }; diff --git a/test/utils/RLP.test.js b/test/utils/RLP.test.js new file mode 100644 index 00000000000..6d915ab8fd6 --- /dev/null +++ b/test/utils/RLP.test.js @@ -0,0 +1,180 @@ +const { ethers } = require('hardhat'); +const { expect } = require('chai'); +const { loadFixture } = require('@nomicfoundation/hardhat-network-helpers'); + +async function fixture() { + const mock = await ethers.deployContract('$RLP'); + + // Resolve function overload ambiguities like in Math.test.js + mock.$encode_bytes = mock['$encode(bytes)']; + mock.$encode_list = mock['$encode(bytes[])']; + mock.$encode_string = mock['$encode(string)']; + mock.$encode_address = mock['$encode(address)']; + mock.$encode_uint256 = mock['$encode(uint256)']; + mock.$encode_bytes32 = mock['$encode(bytes32)']; + mock.$encode_bool = mock['$encode(bool)']; + mock.$decodeBytes_item = mock['$decodeBytes((uint256,bytes32))']; + mock.$decodeBytes_bytes = mock['$decodeBytes(bytes)']; + mock.$decodeList_item = mock['$decodeList((uint256,bytes32))']; + mock.$decodeList_bytes = mock['$decodeList(bytes)']; + + return { mock }; +} + +describe('RLP', function () { + beforeEach(async function () { + Object.assign(this, await loadFixture(fixture)); + }); + + describe('encoding', function () { + it('encodes zero', async function () { + await expect(this.mock.$encode_uint256(0)).to.eventually.equal('0x80'); + }); + + it('encodes single byte < 128', async function () { + await expect(this.mock.$encode_bytes('0x00')).to.eventually.equal('0x00'); + await expect(this.mock.$encode_bytes('0x01')).to.eventually.equal('0x01'); + await expect(this.mock.$encode_bytes('0x7f')).to.eventually.equal('0x7f'); + }); + + it('encodes single byte >= 128', async function () { + await expect(this.mock.$encode_bytes('0x80')).to.eventually.equal('0x8180'); + await expect(this.mock.$encode_bytes('0xff')).to.eventually.equal('0x81ff'); + }); + + it('encodes short strings (0-55 bytes)', async function () { + // 1 byte + await expect(this.mock.$encode_bytes('0xab')).to.eventually.equal('0x81ab'); + + // 2 bytes + await expect(this.mock.$encode_bytes('0x1234')).to.eventually.equal('0x821234'); + + // 55 bytes (maximum for short encoding) + const fiftyFiveBytes = '0x' + '00'.repeat(55); + const expectedShort = '0xb7' + '00'.repeat(55); + await expect(this.mock.$encode_bytes(fiftyFiveBytes)).to.eventually.equal(expectedShort); + }); + + it('encodes long strings (>55 bytes)', async function () { + // 56 bytes (minimum for long encoding) + const fiftySixBytes = '0x' + '00'.repeat(56); + const expectedLong = '0xb838' + '00'.repeat(56); + await expect(this.mock.$encode_bytes(fiftySixBytes)).to.eventually.equal(expectedLong); + + // 100 bytes + const hundredBytes = '0x' + '00'.repeat(100); + const expectedHundred = '0xb864' + '00'.repeat(100); + await expect(this.mock.$encode_bytes(hundredBytes)).to.eventually.equal(expectedHundred); + }); + + it('encodes strings', async function () { + await expect(this.mock.$encode_string('')).to.eventually.equal('0x80'); + await expect(this.mock.$encode_string('dog')).to.eventually.equal('0x83646f67'); + await expect( + this.mock.$encode_string('Lorem ipsum dolor sit amet, consectetur adipisicing elit'), + ).to.eventually.equal( + '0xb8384c6f72656d20697073756d20646f6c6f722073697420616d65742c20636f6e7365637465747572206164697069736963696e6720656c6974', + ); + }); + + it('encodes addresses', async function () { + const addr = '0x1234567890123456789012345678901234567890'; + await expect(this.mock.$encode_address(addr)).to.eventually.equal('0x941234567890123456789012345678901234567890'); + }); + + it('encodes uint256', async function () { + await expect(this.mock.$encode_uint256(0)).to.eventually.equal('0x80'); + await expect(this.mock.$encode_uint256(1)).to.eventually.equal('0x01'); + await expect(this.mock.$encode_uint256(127)).to.eventually.equal('0x7f'); + await expect(this.mock.$encode_uint256(128)).to.eventually.equal('0x8180'); + await expect(this.mock.$encode_uint256(256)).to.eventually.equal('0x820100'); + await expect(this.mock.$encode_uint256(1024)).to.eventually.equal('0x820400'); + await expect(this.mock.$encode_uint256(0xffffff)).to.eventually.equal('0x83ffffff'); + }); + + it('encodes bytes32', async function () { + await expect( + this.mock.$encode_bytes32('0x0000000000000000000000000000000000000000000000000000000000000000'), + ).to.eventually.equal('0x80'); + await expect( + this.mock.$encode_bytes32('0x0000000000000000000000000000000000000000000000000000000000000001'), + ).to.eventually.equal('0x01'); + await expect( + this.mock.$encode_bytes32('0x1000000000000000000000000000000000000000000000000000000000000000'), + ).to.eventually.equal('0xa01000000000000000000000000000000000000000000000000000000000000000'); + }); + + it('encodes booleans', async function () { + await expect(this.mock.$encode_bool(false)).to.eventually.equal('0x80'); // 0 + await expect(this.mock.$encode_bool(true)).to.eventually.equal('0x01'); // 1 + }); + + it('encodes strict booleans', async function () { + await expect(this.mock.$encodeStrict(false)).to.eventually.equal('0x80'); // empty + await expect(this.mock.$encodeStrict(true)).to.eventually.equal('0x01'); // 0x01 + }); + + const validTests = [ + // Basic string encoding + { name: 'empty string', input: '' }, + { name: 'dog', input: 'dog' }, + { + name: 'Lorem ipsum', + input: 'Lorem ipsum dolor sit amet, consectetur adipisicing elit', + }, + + // Numeric encoding + { name: 'small integer 1', input: 1 }, + { name: 'small integer 16', input: 16 }, + { name: 'small integer 79', input: 79 }, + { name: 'small integer 127', input: 127 }, + { name: 'medium integer 128', input: 128 }, + { name: 'medium integer 1000', input: 1000 }, + { name: 'medium integer 100000', input: 100000 }, + + // List encoding + { name: 'empty list', input: [] }, + { name: 'list of strings', input: ['dog', 'god', 'cat'] }, + ]; + + validTests.forEach(({ name, input }) => { + it(`encodes ${name}`, async function () { + let encoded; + let expected; + + if (typeof input === 'string') { + encoded = await this.mock.$encode_string(input); + expected = ethers.encodeRlp(ethers.toUtf8Bytes(input)); + } else if (typeof input === 'number') { + encoded = await this.mock.$encode_uint256(input); + expected = ethers.encodeRlp(ethers.toBeHex(input)); + } else if (Array.isArray(input)) { + if (input.length === 0) { + encoded = await this.mock.$encode_list(input); + } else { + const encodedItems = input.map(item => ethers.encodeRlp(ethers.toUtf8Bytes(item))); + encoded = await this.mock.$encode_list(encodedItems); + } + expected = ethers.encodeRlp(input.map(item => ethers.toUtf8Bytes(item))); + } + + expect(encoded).to.equal(expected); + }); + }); + + // const invalidTests = [ + // { name: 'short string with invalid length', input: '0x8100' }, + // { name: 'long string with invalid length prefix', input: '0xb800' }, + // { name: 'list with invalid length', input: '0xc100' }, + // { name: 'truncated long string', input: '0xb838' }, + // { name: 'invalid single byte encoding (non-minimal)', input: '0x8100' }, + // ]; + + // invalidTests.forEach(({ name, input }) => { + // it(`encodes ${name} into invalid RLP`, async function () { + // const item = await this.mock.$toItem(input); + // await expect(this.mock.$decodeBytes_bytes(item)).to.be.reverted; + // }); + // }); + }); +});