Skip to content

Commit 17fa85a

Browse files
committed
Add Base64 test cases with and without inline assembly
Adds two implementations of Base64 encoding as specified in RFC4648. Implementation (1) uses inline assembly, while Implementation (2) is written purely in Solidity. Assertions are added to replicate the test vectors specified in the RFC for Base64 to ensure both implementations to specification.
1 parent 198b705 commit 17fa85a

File tree

3 files changed

+196
-0
lines changed

3 files changed

+196
-0
lines changed
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
// SPDX-License-Identifier: MIT
2+
3+
pragma solidity ^0.8.0;
4+
5+
/**
6+
* @dev Provides a set of functions to operate with Base64 strings.
7+
*/
8+
library InlineAsmBase64 {
9+
/**
10+
* @dev Base64 Encoding/Decoding Table
11+
*/
12+
string internal constant _TABLE = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
13+
14+
/**
15+
* @dev Converts a `bytes` to its Bytes64 `string` representation.
16+
*/
17+
function encode(bytes memory data) internal pure returns (string memory) {
18+
/**
19+
* Inspired by OpenZepplin Base64 implementation
20+
* https://github.com/OpenZeppelin/openzeppelin-contracts/pull/2884/commits/157c32b65a15cb0b58257543643cafa1cebf883a
21+
*/
22+
if (data.length == 0) return "";
23+
24+
// Loads the table into memory
25+
string memory table = _TABLE;
26+
27+
// Encoding takes 3 bytes chunks of binary data from `bytes` data parameter
28+
// and split into 4 numbers of 6 bits.
29+
// The final Base64 length should be `bytes` data length multiplied by 4/3 rounded up
30+
// - `data.length + 2` -> Round up
31+
// - `/ 3` -> Number of 3-bytes chunks
32+
// - `4 *` -> 4 characters for each chunk
33+
uint256 encodedLen = 4 * ((data.length + 2) / 3);
34+
35+
// Add some extra buffer at the end required for the writing
36+
string memory result = new string(encodedLen);
37+
38+
assembly {
39+
// Store the actual result length in memory
40+
mstore(result, encodedLen)
41+
42+
// Prepare the lookup table
43+
let tablePtr := add(table, 1)
44+
45+
// Prepare input pointer
46+
let dataPtr := data
47+
let endPtr := add(dataPtr, mload(data))
48+
49+
// Prepare result pointer, jump over length
50+
let resultPtr := add(result, 32)
51+
52+
// Run over the input, 3 bytes at a time
53+
for {
54+
55+
} lt(dataPtr, endPtr) {
56+
57+
} {
58+
// Advance 3 bytes
59+
dataPtr := add(dataPtr, 3)
60+
let input := mload(dataPtr)
61+
62+
// To write each character, shift the 3 bytes (24 bits) chunk 4
63+
// times in blocks of 6 bits for each character (18, 12, 6, 0)
64+
// and apply logical AND with 0x3F to extract the 6-bit group.
65+
// Add the 6-bit group with the table ptr to index into the
66+
// table and acquire the character to write. Finally, write
67+
// the character to the result pointer.
68+
69+
mstore8(resultPtr, mload(add(tablePtr, and(shr(18, input), 0x3F))))
70+
resultPtr := add(resultPtr, 1) // Advance
71+
72+
mstore8(resultPtr, mload(add(tablePtr, and(shr(12, input), 0x3F))))
73+
resultPtr := add(resultPtr, 1) // Advance
74+
75+
mstore8(resultPtr, mload(add(tablePtr, and(shr(6, input), 0x3F))))
76+
resultPtr := add(resultPtr, 1) // Advance
77+
78+
mstore8(resultPtr, mload(add(tablePtr, and(input, 0x3F))))
79+
resultPtr := add(resultPtr, 1) // Advance
80+
}
81+
82+
// When data `bytes` is not exactly 3 bytes long
83+
// it is padded with `=` characters at the end
84+
switch mod(mload(data), 3)
85+
case 1 {
86+
mstore8(sub(resultPtr, 1), 0x3d)
87+
mstore8(sub(resultPtr, 2), 0x3d)
88+
}
89+
case 2 {
90+
mstore8(sub(resultPtr, 1), 0x3d)
91+
}
92+
}
93+
94+
return result;
95+
}
96+
}
Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
// SPDX-License-Identifier: MIT
2+
3+
pragma solidity ^0.8.0;
4+
5+
/**
6+
* @dev Provides a set of functions to operate with Base64 strings.
7+
*/
8+
library NoAsmBase64 {
9+
bytes private constant TABLE = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
10+
11+
function encode(bytes memory data) internal pure returns (string memory) {
12+
if (data.length == 0) return "";
13+
14+
bytes memory table = TABLE;
15+
bytes memory result = new bytes(4 * ((data.length + 2) / 3));
16+
uint256 resultPtr = 0;
17+
18+
for (uint256 dataPtr = 0; dataPtr < data.length; dataPtr += 3) {
19+
uint24 chunk = ( (uint24(uint8(data[dataPtr + 0])) << 16))
20+
+ (dataPtr + 1 < data.length ? (uint24(uint8(data[dataPtr + 1])) << 8) : 0)
21+
+ (dataPtr + 2 < data.length ? (uint24(uint8(data[dataPtr + 2])) ) : 0);
22+
23+
result[resultPtr++] = table[uint8(chunk >> 18) & 0x3f];
24+
result[resultPtr++] = table[uint8(chunk >> 12) & 0x3f];
25+
result[resultPtr++] = table[uint8(chunk >> 6) & 0x3f];
26+
result[resultPtr++] = table[uint8(chunk ) & 0x3f];
27+
}
28+
29+
if (data.length % 3 == 1) {
30+
result[--resultPtr] = 0x3d;
31+
result[--resultPtr] = 0x3d;
32+
}
33+
else if (data.length % 3 == 2) {
34+
result[--resultPtr] = 0x3d;
35+
}
36+
37+
return (string(result));
38+
}
39+
}
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
==== ExternalSource: _base64/base64_inline_asm.sol ====
2+
==== ExternalSource: _base64/base64_no_inline_asm.sol ====
3+
==== Source: base64.sol ====
4+
5+
import "_base64/base64_inline_asm.sol";
6+
import "_base64/base64_no_inline_asm.sol";
7+
8+
contract test {
9+
function encode_inline_asm(bytes memory data) external pure returns (string memory) {
10+
return InlineAsmBase64.encode(data);
11+
}
12+
13+
function encode_no_asm(bytes memory data) external pure returns (string memory) {
14+
return NoAsmBase64.encode(data);
15+
}
16+
17+
function encode_inline_asm_large() external {
18+
for (uint i = 0; i < 1000; i++) {
19+
InlineAsmBase64.encode("foo");
20+
}
21+
}
22+
23+
function encode_no_asm_large() external {
24+
for (uint i = 0; i < 1000; i++) {
25+
NoAsmBase64.encode("foo");
26+
}
27+
}
28+
}
29+
// Test cases derived from Base64 specification: RFC4648
30+
// https://datatracker.ietf.org/doc/html/rfc4648#section-10
31+
//
32+
// ====
33+
// EVMVersion: >=constantinople
34+
// compileViaYul: also
35+
// ----
36+
// constructor()
37+
// gas irOptimized: 450044
38+
// gas legacy: 766936
39+
// gas legacyOptimized: 543094
40+
// encode_inline_asm(bytes): 0x20, 0 -> 0x20, 0
41+
// encode_inline_asm(bytes): 0x20, 1, "f" -> 0x20, 4, "Zg=="
42+
// encode_inline_asm(bytes): 0x20, 2, "fo" -> 0x20, 4, "Zm8="
43+
// encode_inline_asm(bytes): 0x20, 3, "foo" -> 0x20, 4, "Zm9v"
44+
// encode_inline_asm(bytes): 0x20, 4, "foob" -> 0x20, 8, "Zm9vYg=="
45+
// encode_inline_asm(bytes): 0x20, 5, "fooba" -> 0x20, 8, "Zm9vYmE="
46+
// encode_inline_asm(bytes): 0x20, 6, "foobar" -> 0x20, 8, "Zm9vYmFy"
47+
// encode_no_asm(bytes): 0x20, 0 -> 0x20, 0
48+
// encode_no_asm(bytes): 0x20, 1, "f" -> 0x20, 4, "Zg=="
49+
// encode_no_asm(bytes): 0x20, 2, "fo" -> 0x20, 4, "Zm8="
50+
// encode_no_asm(bytes): 0x20, 3, "foo" -> 0x20, 4, "Zm9v"
51+
// encode_no_asm(bytes): 0x20, 4, "foob" -> 0x20, 8, "Zm9vYg=="
52+
// encode_no_asm(bytes): 0x20, 5, "fooba" -> 0x20, 8, "Zm9vYmE="
53+
// encode_no_asm(bytes): 0x20, 6, "foobar" -> 0x20, 8, "Zm9vYmFy"
54+
// encode_inline_asm_large()
55+
// gas irOptimized: 1385047
56+
// gas legacy: 1658033
57+
// gas legacyOptimized: 1210033
58+
// encode_no_asm_large()
59+
// gas irOptimized: 3335101
60+
// gas legacy: 4801077
61+
// gas legacyOptimized: 2929077

0 commit comments

Comments
 (0)