Skip to content

Commit 5eb047a

Browse files
AmxxVectorizedarr00ernestognw
authored
Add Base58 library (#5762)
Co-authored-by: Vectorized <[email protected]> Co-authored-by: Arr00 <[email protected]> Co-authored-by: ernestognw <[email protected]>
1 parent 92033fc commit 5eb047a

File tree

8 files changed

+347
-10
lines changed

8 files changed

+347
-10
lines changed

.changeset/loose-lamps-bake.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
'openzeppelin-solidity': minor
3+
---
4+
5+
`Base58`: Add a library for encoding and decoding bytes buffers into base58 strings.

contracts/mocks/Stateless.sol

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import {Accumulators} from "../utils/structs/Accumulators.sol";
88
import {Address} from "../utils/Address.sol";
99
import {Arrays} from "../utils/Arrays.sol";
1010
import {AuthorityUtils} from "../access/manager/AuthorityUtils.sol";
11+
import {Base58} from "../utils/Base58.sol";
1112
import {Base64} from "../utils/Base64.sol";
1213
import {BitMaps} from "../utils/structs/BitMaps.sol";
1314
import {Blockhash} from "../utils/Blockhash.sol";

contracts/utils/Base58.sol

Lines changed: 239 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,239 @@
1+
// SPDX-License-Identifier: MIT
2+
3+
pragma solidity ^0.8.20;
4+
5+
/**
6+
* @dev Provides a set of functions to operate with Base58 strings.
7+
*
8+
* Base58 is an encoding scheme that converts binary data into a human-readable text format.
9+
* Similar to {Base64} but specifically designed for better human usability.
10+
*
11+
* 1. Human-friendly alphabet: Excludes visually similar characters to reduce human error:
12+
* * No 0 (zero) vs O (capital o) confusion
13+
* * No I (capital i) vs l (lowercase L) confusion
14+
* * No non-alphanumeric characters like + or =
15+
* 2. URL-safe: Contains only alphanumeric characters, making it safe for URLs without encoding.
16+
*
17+
* Initially based on https://github.com/storyicon/base58-solidity/commit/807428e5174e61867e4c606bdb26cba58a8c5cb1[storyicon's implementation] (MIT).
18+
* Based on the updated and improved https://github.com/Vectorized/solady/blob/208e4f31cfae26e4983eb95c3488a14fdc497ad7/src/utils/Base58.sol[Vectorized version] (MIT).
19+
*/
20+
library Base58 {
21+
/// @dev Unrecognized Base58 character on decoding.
22+
error InvalidBase58Char(bytes1);
23+
24+
/**
25+
* @dev Encode a `bytes` buffer as a Base58 `string`.
26+
*/
27+
function encode(bytes memory input) internal pure returns (string memory) {
28+
return string(_encode(input));
29+
}
30+
31+
/**
32+
* @dev Decode a Base58 `string` into a `bytes` buffer.
33+
*/
34+
function decode(string memory input) internal pure returns (bytes memory) {
35+
return _decode(bytes(input));
36+
}
37+
38+
function _encode(bytes memory input) private pure returns (bytes memory output) {
39+
uint256 inputLength = input.length;
40+
if (inputLength == 0) return "";
41+
42+
assembly ("memory-safe") {
43+
// Count number of zero bytes at the beginning of `input`. These are encoded using the same number of '1's
44+
// at the beginning of the encoded string.
45+
let inputLeadingZeros := 0
46+
for {} lt(byte(0, mload(add(add(input, 0x20), inputLeadingZeros))), lt(inputLeadingZeros, inputLength)) {} {
47+
inputLeadingZeros := add(inputLeadingZeros, 1)
48+
}
49+
50+
// Start the output offset by an over-estimate of the length.
51+
// When converting from base-256 (bytes) to base-58, the theoretical length ratio is log(256)/log(58).
52+
// We use 9886/7239 ≈ 1.3657 as a rational approximation that slightly over-estimates to ensure
53+
// sufficient memory allocation.
54+
let outputLengthEstim := add(inputLeadingZeros, div(mul(sub(inputLength, inputLeadingZeros), 9886), 7239))
55+
56+
// This is going to be our "scratch" workspace. We leave enough room so that we can store length + encoded output at the FMP location.
57+
// 0x21 = 0x20 (32 bytes for result length prefix) + 0x1 (safety buffer for division truncation)
58+
let scratch := add(mload(0x40), add(outputLengthEstim, 0x21))
59+
60+
// Chunk input into 31-byte limbs (248 bits) for efficient batch processing.
61+
// Each limb fits safely in a 256-bit word with 8-bit overflow protection.
62+
// Memory layout: [output chars] [limb₁(248 bits)][limb₂(248 bits)][limb₃(248 bits)]...
63+
// ↑ scratch
64+
// ↑ ptr (moves right)
65+
let ptr := scratch
66+
for {
67+
// Handle partial first limb if input length isn't divisible by 31
68+
let i := mod(inputLength, 31)
69+
if i {
70+
// Right-shift to align partial limb in high bits of 256-bit word
71+
mstore(ptr, shr(mul(sub(32, i), 8), mload(add(input, 0x20))))
72+
ptr := add(ptr, 0x20) // next limb
73+
}
74+
} lt(i, inputLength) {
75+
ptr := add(ptr, 0x20) // next limb
76+
i := add(i, 31) // move in buffer
77+
} {
78+
// Load 31 bytes from input, right-shift by 8 bits to leave 1 zero byte on the left.
79+
mstore(ptr, shr(8, mload(add(add(input, 0x20), i))))
80+
}
81+
82+
// Store the encoding table. This overlaps with the FMP that we are going to reset later anyway.
83+
// See https://datatracker.ietf.org/doc/html/draft-msporny-base58-03#section-2
84+
mstore(0x1f, "123456789ABCDEFGHJKLMNPQRSTUVWXY")
85+
mstore(0x3f, "Zabcdefghijkmnopqrstuvwxyz")
86+
87+
// Core Base58 encoding: repeated division by 58 on input limbs
88+
// Memory layout: [output chars] [limb₁(248 bits)][limb₂(248 bits)][limb₃(248 bits)]...
89+
// ↑ scratch ↑ ptr
90+
// ↑ output (moves left)
91+
// ↑ data (moves right)
92+
for {
93+
let data := scratch // Points to first non-zero limb
94+
output := scratch // Builds result right-to-left from scratch
95+
} 1 {} {
96+
// Skip zero limbs at the beginning (limbs become 0 after repeated divisions)
97+
for {} and(iszero(mload(data)), lt(data, ptr)) {
98+
data := add(data, 0x20)
99+
} {}
100+
// Exit when all limbs are zero (conversion complete)
101+
if eq(data, ptr) {
102+
break
103+
}
104+
105+
// Division by 58 across all remaining limbs
106+
let carry := 0
107+
for {
108+
let i := data
109+
} lt(i, ptr) {
110+
i := add(i, 0x20)
111+
} {
112+
let acc := add(shl(248, carry), mload(i)) // Combine carry from previous limb with current limb
113+
mstore(i, div(acc, 58)) // Store quotient back in limb
114+
carry := mod(acc, 58) // Remainder becomes next carry
115+
}
116+
117+
// Convert remainder (0-57) to Base58 character and store right-to-left in the output space
118+
output := sub(output, 1)
119+
mstore8(output, mload(carry))
120+
}
121+
122+
// Write the input leading zeros at the left of the encoded.
123+
// This may spill to the left into the "length" of the buffer.
124+
for {
125+
let i := 0
126+
} lt(i, inputLeadingZeros) {} {
127+
i := add(i, 0x20)
128+
mstore(sub(output, i), "11111111111111111111111111111111")
129+
}
130+
131+
// Move output pointer to account for inputLeadingZeros
132+
output := sub(output, add(inputLeadingZeros, 0x20))
133+
134+
// Store length and allocate (reserve) memory up to scratch.
135+
mstore(output, sub(scratch, add(output, 0x20))) // Overwrite spilled bytes
136+
mstore(0x40, scratch)
137+
}
138+
}
139+
140+
function _decode(bytes memory input) private pure returns (bytes memory output) {
141+
bytes4 errorSelector = InvalidBase58Char.selector;
142+
143+
uint256 inputLength = input.length;
144+
if (inputLength == 0) return "";
145+
146+
assembly ("memory-safe") {
147+
let inputLeadingZeros := 0 // Number of leading '1' in `input`.
148+
// Count leading zeros. In base58, zeros are represented using '1' (chr(49)).
149+
for {} and(
150+
eq(byte(0, mload(add(add(input, 0x20), inputLeadingZeros))), 49),
151+
lt(inputLeadingZeros, inputLength)
152+
) {} {
153+
inputLeadingZeros := add(inputLeadingZeros, 1)
154+
}
155+
156+
// Estimate the output length using the base conversion ratio.
157+
// When converting from base-58 to base-256 (bytes), the theoretical length ratio is log(58)/log(256).
158+
// We use 6115/8351 ≈ 0.7322 as a rational approximation that slightly over-estimates to ensure
159+
// sufficient memory allocation.
160+
let outputLengthEstim := add(inputLeadingZeros, div(mul(sub(inputLength, inputLeadingZeros), 6115), 8351))
161+
162+
// This is going to be our "scratch" workspace. We leave enough room so that we can store length + decoded output at the FMP location.
163+
// 0x21 = 0x20 (32 bytes for result length prefix) + 0x1 (safety buffer for division truncation)
164+
let scratch := add(mload(0x40), add(outputLengthEstim, 0x21))
165+
166+
// Store the decoding table for character-to-value lookup. This overlaps with the FMP that we are going to reset later anyway.
167+
// Maps ASCII characters (minus 49) to their Base58 numeric values (0-57), with 0xff for invalid characters
168+
mstore(0x2a, 0x30313233343536373839)
169+
mstore(0x20, 0x1718191a1b1c1d1e1f20ffffffffffff2122232425262728292a2bff2c2d2e2f)
170+
mstore(0x00, 0x000102030405060708ffffffffffffff090a0b0c0d0e0f10ff1112131415ff16)
171+
172+
// Core Base58 decoding: process each character and accumulate into 31-byte limbs
173+
// Memory layout: [output bytes] [limb₁(248 bits)][limb₂(248 bits)][limb₃(248 bits)]...
174+
// ↑ scratch
175+
// ↑ ptr (moves right as limbs are added)
176+
let ptr := scratch
177+
let mask := shr(8, not(0))
178+
for {
179+
let j := 0
180+
} lt(j, inputLength) {
181+
j := add(j, 1)
182+
} {
183+
// Decode each character: convert from ASCII to Base58 numeric value (0-57)
184+
let c := sub(byte(0, mload(add(add(input, 0x20), j))), 49) // Offset from '1' (ASCII 49)
185+
186+
// Validate character using bit manipulation: each bit in the bitmask represents a valid character offset
187+
// 0x3fff7ff03ffbeff01ff has bits set for all valid Base58 characters (excludes 0, O, I, l)
188+
// shl(c, 1) creates a single bit at position c, AND with bitmask checks if character is valid
189+
// slither-disable-next-line incorrect-shift
190+
if iszero(and(shl(c, 1), 0x3fff7ff03ffbeff01ff)) {
191+
mstore(0, errorSelector)
192+
mstore(4, shl(248, add(c, 49)))
193+
revert(0, 0x24)
194+
}
195+
let carry := byte(0, mload(c)) // Look up Base58 numeric value from decoding table
196+
197+
// Multiplication by 58 and addition across all existing limbs
198+
for {
199+
let i := scratch
200+
} lt(i, ptr) {
201+
i := add(i, 0x20)
202+
} {
203+
let acc := add(carry, mul(58, mload(i))) // Multiply limb by 58 and add carry
204+
mstore(i, and(mask, acc)) // Store lower 248 bits back in limb
205+
carry := shr(248, acc) // Upper bits become carry for next limb
206+
}
207+
// If carry remains, we need a new limb to store the overflow
208+
if carry {
209+
mstore(ptr, carry)
210+
ptr := add(ptr, 0x20) // Extend limbs array
211+
}
212+
}
213+
214+
// Copy and compact the uint248 limbs + remove any zeros at the beginning.
215+
output := scratch
216+
for {
217+
let i := scratch
218+
} lt(i, ptr) {
219+
i := add(i, 0x20)
220+
} {
221+
output := sub(output, 31)
222+
mstore(sub(output, 1), mload(i))
223+
}
224+
for {} lt(byte(0, mload(output)), lt(output, scratch)) {} {
225+
output := add(output, 1)
226+
}
227+
228+
// Add the zeros that were encoded in the input (prefix '1's)
229+
calldatacopy(sub(output, inputLeadingZeros), calldatasize(), inputLeadingZeros)
230+
231+
// Move output pointer to account for inputLeadingZeros
232+
output := sub(output, add(inputLeadingZeros, 0x20))
233+
234+
// Store length and allocate (reserve) memory up to scratch.
235+
mstore(output, sub(scratch, add(output, 0x20)))
236+
mstore(0x40, scratch)
237+
}
238+
}
239+
}

contracts/utils/Base64.sol

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,17 +11,17 @@ import {SafeCast} from "./math/SafeCast.sol";
1111
library Base64 {
1212
using SafeCast for bool;
1313

14-
error InvalidBase64Digit(bytes1);
14+
error InvalidBase64Char(bytes1);
1515

1616
/**
17-
* @dev Converts a `bytes` to its Bytes64 `string` representation.
17+
* @dev Converts a `bytes` to its Base64 `string` representation.
1818
*/
1919
function encode(bytes memory data) internal pure returns (string memory) {
2020
return string(_encode(data, false));
2121
}
2222

2323
/**
24-
* @dev Converts a `bytes` to its Bytes64Url `string` representation.
24+
* @dev Converts a `bytes` to its Base64Url `string` representation.
2525
* Output is not padded with `=` as specified in https://www.rfc-editor.org/rfc/rfc4648[rfc4648].
2626
*/
2727
function encodeURL(bytes memory data) internal pure returns (string memory) {
@@ -142,7 +142,7 @@ library Base64 {
142142
* @dev Internal decoding
143143
*/
144144
function _decode(bytes memory data) private pure returns (bytes memory result) {
145-
bytes4 errorSelector = InvalidBase64Digit.selector;
145+
bytes4 errorSelector = InvalidBase64Char.selector;
146146

147147
uint256 dataLength = data.length;
148148
if (dataLength == 0) return "";

contracts/utils/README.adoc

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ Miscellaneous contracts and libraries containing utility functions you can use t
2424
* {MerkleTree}: A library with https://wikipedia.org/wiki/Merkle_Tree[Merkle Tree] data structures and helper functions.
2525
* {Address}: Collection of functions for overloading Solidity's https://docs.soliditylang.org/en/latest/types.html#address[`address`] type.
2626
* {Arrays}: Collection of functions that operate on https://docs.soliditylang.org/en/latest/types.html#arrays[`arrays`].
27+
* {Base58}: On-chain base58 encoding and decoding.
2728
* {Base64}: On-chain base64 and base64URL encoding according to https://datatracker.ietf.org/doc/html/rfc4648[RFC-4648].
2829
* {Blockhash}: A library for accessing historical block hashes beyond the standard 256 block limit utilizing EIP-2935's historical blockhash functionality.
2930
* {Bytes}: Common operations on bytes objects.
@@ -110,6 +111,8 @@ Ethereum contracts have no native concept of an interface, so applications must
110111

111112
{{Arrays}}
112113

114+
{{Base58}}
115+
113116
{{Base64}}
114117

115118
{{Blockhash}}

test/utils/Base58.t.sol

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
// SPDX-License-Identifier: MIT
2+
3+
pragma solidity ^0.8.26;
4+
5+
import {Test} from "forge-std/Test.sol";
6+
import {Base58} from "@openzeppelin/contracts/utils/Base58.sol";
7+
8+
contract Base58Test is Test {
9+
function testEncodeDecodeEmpty() external pure {
10+
assertEq(Base58.decode(Base58.encode(hex"")), hex"");
11+
}
12+
13+
function testEncodeDecodeZeros() external pure {
14+
bytes memory zeros = hex"0000000000000000";
15+
assertEq(Base58.decode(Base58.encode(zeros)), zeros);
16+
17+
bytes memory almostZeros = hex"00000000a400000000";
18+
assertEq(Base58.decode(Base58.encode(almostZeros)), almostZeros);
19+
}
20+
21+
function testEncodeDecode(bytes memory input) external pure {
22+
assertEq(Base58.decode(Base58.encode(input)), input);
23+
}
24+
}

test/utils/Base58.test.js

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
const { ethers } = require('hardhat');
2+
const { expect } = require('chai');
3+
const { loadFixture } = require('@nomicfoundation/hardhat-network-helpers');
4+
5+
async function fixture() {
6+
const mock = await ethers.deployContract('$Base58');
7+
return { mock };
8+
}
9+
10+
describe('Base58', function () {
11+
beforeEach(async function () {
12+
Object.assign(this, await loadFixture(fixture));
13+
});
14+
15+
describe('base58', function () {
16+
describe('encode/decode random buffers', function () {
17+
// length 512 runs out of gas.
18+
// this checks are very slow when running coverage, causing CI to timeout.
19+
for (const length of [0, 1, 2, 3, 4, 32, 42, 128, 384])
20+
it(
21+
[length > 32 && '[skip-on-coverage]', `buffer of length ${length}`].filter(Boolean).join(' '),
22+
async function () {
23+
const buffer = ethers.randomBytes(length);
24+
const hex = ethers.hexlify(buffer);
25+
const b58 = ethers.encodeBase58(buffer);
26+
27+
await expect(this.mock.$encode(hex)).to.eventually.equal(b58);
28+
await expect(this.mock.$decode(b58)).to.eventually.equal(hex);
29+
},
30+
);
31+
});
32+
33+
// Tests case from section 5 of the (no longer active) Base58 Encoding Scheme RFC
34+
// https://datatracker.ietf.org/doc/html/draft-msporny-base58-03
35+
describe('test vectors', function () {
36+
for (const { raw, b58 } of [
37+
{ raw: 'Hello World!', b58: '2NEpo7TZRRrLZSi2U' },
38+
{
39+
raw: 'The quick brown fox jumps over the lazy dog.',
40+
b58: 'USm3fpXnKG5EUBx2ndxBDMPVciP5hGey2Jh4NDv6gmeo1LkMeiKrLJUUBk6Z',
41+
},
42+
{ raw: '0x0000287fb4cd', b58: '11233QC4' },
43+
])
44+
it(raw, async function () {
45+
const buffer = (ethers.isHexString(raw) ? ethers.getBytes : ethers.toUtf8Bytes)(raw);
46+
const hex = ethers.hexlify(buffer);
47+
48+
await expect(this.mock.$encode(hex)).to.eventually.equal(b58);
49+
await expect(this.mock.$decode(b58)).to.eventually.equal(hex);
50+
});
51+
});
52+
53+
describe('decode invalid format', function () {
54+
for (const chr of ['I', '-', '~'])
55+
it(`Invalid base58 char ${chr}`, async function () {
56+
const getHexCode = str => ethers.hexlify(ethers.toUtf8Bytes(str));
57+
const helper = { interface: ethers.Interface.from(['error InvalidBase58Char(bytes1)']) };
58+
59+
await expect(this.mock.$decode(`VYRWKp${chr}pnN7`))
60+
.to.be.revertedWithCustomError(helper, 'InvalidBase58Char')
61+
.withArgs(getHexCode(chr));
62+
});
63+
});
64+
});
65+
});

0 commit comments

Comments
 (0)