Skip to content

Commit 27dddf8

Browse files
ernestognwAmxx
andcommitted
Escape control characters in Strings.escapeJSON (#6344)
Co-authored-by: Hadrien Croubois <hadrien.croubois@gmail.com> Signed-off-by: Hadrien Croubois <hadrien.croubois@gmail.com>
1 parent f5cd8d8 commit 27dddf8

File tree

3 files changed

+54
-23
lines changed

3 files changed

+54
-23
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
### Breaking changes
1111

12+
- `Strings`: The `escapeJSON` function now escapes all control characters in the range U+0000 to U+001F per RFC-4627. Previously only backspace, tab, newline, form feed, carriage return, double quote, and backslash were escaped. Input strings containing any other control character (e.g. null `0x00`) or raw bytes in U+0001–U+001F will now produce different, longer output (e.g. `\u0000` for null). ([#6344](https://github.com/OpenZeppelin/openzeppelin-contracts/pull/6344))
1213
- `ERC1155`: Performing batch transfers with exactly one id/value in the batch no-longer calls `IERC1155Receiver.onERC1155Received`. `IERC1155Receiver.onERC1155BatchReceived` is called instead (with arrays of length one). ([#6170](https://github.com/OpenZeppelin/openzeppelin-contracts/pull/6170))
1314
- `ERC1967Proxy` and `TransparentUpgradeableProxy`: Mandate initialization during construction. Deployment now reverts with `ERC1967ProxyUninitialized` if an initialize call is not provided. Developers that rely on the previous behavior and want to disable this check can do so by overriding the internal `_unsafeAllowUninitialized` function to return true. ([#5906](https://github.com/OpenZeppelin/openzeppelin-contracts/pull/5906))
1415
- `ERC721` and `ERC1155`: Prevent setting an operator for `address(0)`. In the case of `ERC721` this type of operator allowance could lead to obfuscated mint permission. ([#6171](https://github.com/OpenZeppelin/openzeppelin-contracts/pull/6171))

contracts/utils/Strings.sol

Lines changed: 46 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -17,11 +17,7 @@ library Strings {
1717
bytes16 private constant HEX_DIGITS = "0123456789abcdef";
1818
uint8 private constant ADDRESS_LENGTH = 20;
1919
uint256 private constant SPECIAL_CHARS_LOOKUP =
20-
(1 << 0x08) | // backspace
21-
(1 << 0x09) | // tab
22-
(1 << 0x0a) | // newline
23-
(1 << 0x0c) | // form feed
24-
(1 << 0x0d) | // carriage return
20+
0xffffffff | // first 32 bits corresponding to the control characters (U+0000 to U+001F)
2521
(1 << 0x22) | // double quote
2622
(1 << 0x5c); // backslash
2723

@@ -457,37 +453,52 @@ library Strings {
457453
*
458454
* WARNING: This function should only be used in double quoted JSON strings. Single quotes are not escaped.
459455
*
460-
* NOTE: This function escapes all unicode characters, and not just the ones in ranges defined in section 2.5 of
461-
* RFC-4627 (U+0000 to U+001F, U+0022 and U+005C). ECMAScript's `JSON.parse` does recover escaped unicode
462-
* characters that are not in this range, but other tooling may provide different results.
456+
* NOTE: This function escapes backslashes (including those in \uXXXX sequences) and the characters in ranges
457+
* defined in section 2.5 of RFC-4627 (U+0000 to U+001F, U+0022 and U+005C). All control characters in U+0000
458+
* to U+001F are escaped (\b, \t, \n, \f, \r use short form; others use \u00XX). ECMAScript's `JSON.parse` does
459+
* recover escaped unicode characters that are not in this range, but other tooling may provide different results.
463460
*/
464461
function escapeJSON(string memory input) internal pure returns (string memory) {
465462
bytes memory buffer = bytes(input);
466-
bytes memory output = new bytes(2 * buffer.length); // worst case scenario
463+
464+
// Put output at the FMP. Memory will be reserved later when we figure out the actual length of the escaped
465+
// string. All write are done using _unsafeWriteBytesOffset, which avoid the (expensive) length checks for
466+
// each character written.
467+
bytes memory output;
468+
assembly ("memory-safe") {
469+
output := mload(0x40)
470+
}
467471
uint256 outputLength = 0;
468472

469473
for (uint256 i = 0; i < buffer.length; ++i) {
470-
bytes1 char = bytes1(_unsafeReadBytesOffset(buffer, i));
471-
if (((SPECIAL_CHARS_LOOKUP & (1 << uint8(char))) != 0)) {
472-
output[outputLength++] = "\\";
473-
if (char == 0x08) output[outputLength++] = "b";
474-
else if (char == 0x09) output[outputLength++] = "t";
475-
else if (char == 0x0a) output[outputLength++] = "n";
476-
else if (char == 0x0c) output[outputLength++] = "f";
477-
else if (char == 0x0d) output[outputLength++] = "r";
478-
else if (char == 0x5c) output[outputLength++] = "\\";
474+
uint8 char = uint8(bytes1(_unsafeReadBytesOffset(buffer, i)));
475+
if (((SPECIAL_CHARS_LOOKUP & (1 << char)) != 0)) {
476+
_unsafeWriteBytesOffset(output, outputLength++, "\\");
477+
if (char == 0x08) _unsafeWriteBytesOffset(output, outputLength++, "b");
478+
else if (char == 0x09) _unsafeWriteBytesOffset(output, outputLength++, "t");
479+
else if (char == 0x0a) _unsafeWriteBytesOffset(output, outputLength++, "n");
480+
else if (char == 0x0c) _unsafeWriteBytesOffset(output, outputLength++, "f");
481+
else if (char == 0x0d) _unsafeWriteBytesOffset(output, outputLength++, "r");
482+
else if (char == 0x5c) _unsafeWriteBytesOffset(output, outputLength++, "\\");
479483
else if (char == 0x22) {
480484
// solhint-disable-next-line quotes
481-
output[outputLength++] = '"';
485+
_unsafeWriteBytesOffset(output, outputLength++, '"');
486+
} else {
487+
// U+0000 to U+001F without short form: output \u00XX
488+
_unsafeWriteBytesOffset(output, outputLength++, "u");
489+
_unsafeWriteBytesOffset(output, outputLength++, "0");
490+
_unsafeWriteBytesOffset(output, outputLength++, "0");
491+
_unsafeWriteBytesOffset(output, outputLength++, HEX_DIGITS[char >> 4]);
492+
_unsafeWriteBytesOffset(output, outputLength++, HEX_DIGITS[char & 0x0f]);
482493
}
483494
} else {
484-
output[outputLength++] = char;
495+
_unsafeWriteBytesOffset(output, outputLength++, bytes1(char));
485496
}
486497
}
487-
// write the actual length and deallocate unused memory
498+
// write the actual length and reserve memory
488499
assembly ("memory-safe") {
489500
mstore(output, outputLength)
490-
mstore(0x40, add(output, shl(5, shr(5, add(outputLength, 63)))))
501+
mstore(0x40, add(output, add(outputLength, 0x20)))
491502
}
492503

493504
return string(output);
@@ -505,4 +516,17 @@ library Strings {
505516
value := mload(add(add(buffer, 0x20), offset))
506517
}
507518
}
519+
520+
/**
521+
* @dev Write a bytes1 to a bytes array without bounds checking.
522+
*
523+
* NOTE: making this function internal would mean it could be used with memory unsafe offset, and marking the
524+
* assembly block as such would prevent some optimizations.
525+
*/
526+
function _unsafeWriteBytesOffset(bytes memory buffer, uint256 offset, bytes1 value) private pure {
527+
// This is not memory safe in the general case, but all calls to this private function are within bounds.
528+
assembly ("memory-safe") {
529+
mstore8(add(add(buffer, 0x20), offset), shr(248, value))
530+
}
531+
}
508532
}

test/utils/Strings.test.js

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,13 @@ describe('Strings', function () {
352352
});
353353

354354
describe('Escape JSON string', function () {
355-
for (const input of ['', 'a', '{"a":"b/c"}', 'a\tb\nc\\d"e\rf/g\fh\bi'])
355+
for (const input of [
356+
'',
357+
'a',
358+
'{"a":"b/c"}',
359+
'a\tb\nc\\d"e\rf/g\fh\bi',
360+
'\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f',
361+
])
356362
it(`escape ${JSON.stringify(input)}`, async function () {
357363
await expect(this.mock.$escapeJSON(input)).to.eventually.equal(JSON.stringify(input).slice(1, -1));
358364
});

0 commit comments

Comments
 (0)