Skip to content

Commit f56928d

Browse files
authored
Merge pull request Tencent#1744 from lklein53/improve-surrogate-handling
Improve surrogate handling (Tencent#1738)
2 parents 88bd956 + 6694c99 commit f56928d

File tree

2 files changed

+19
-8
lines changed

2 files changed

+19
-8
lines changed

include/rapidjson/reader.h

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1023,15 +1023,23 @@ class GenericReader {
10231023
is.Take();
10241024
unsigned codepoint = ParseHex4(is, escapeOffset);
10251025
RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
1026-
if (RAPIDJSON_UNLIKELY(codepoint >= 0xD800 && codepoint <= 0xDBFF)) {
1027-
// Handle UTF-16 surrogate pair
1028-
if (RAPIDJSON_UNLIKELY(!Consume(is, '\\') || !Consume(is, 'u')))
1029-
RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset);
1030-
unsigned codepoint2 = ParseHex4(is, escapeOffset);
1031-
RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
1032-
if (RAPIDJSON_UNLIKELY(codepoint2 < 0xDC00 || codepoint2 > 0xDFFF))
1026+
if (RAPIDJSON_UNLIKELY(codepoint >= 0xD800 && codepoint <= 0xDFFF)) {
1027+
// high surrogate, check if followed by valid low surrogate
1028+
if (RAPIDJSON_LIKELY(codepoint <= 0xDBFF)) {
1029+
// Handle UTF-16 surrogate pair
1030+
if (RAPIDJSON_UNLIKELY(!Consume(is, '\\') || !Consume(is, 'u')))
1031+
RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset);
1032+
unsigned codepoint2 = ParseHex4(is, escapeOffset);
1033+
RAPIDJSON_PARSE_ERROR_EARLY_RETURN_VOID;
1034+
if (RAPIDJSON_UNLIKELY(codepoint2 < 0xDC00 || codepoint2 > 0xDFFF))
1035+
RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset);
1036+
codepoint = (((codepoint - 0xD800) << 10) | (codepoint2 - 0xDC00)) + 0x10000;
1037+
}
1038+
// single low surrogate
1039+
else
1040+
{
10331041
RAPIDJSON_PARSE_ERROR(kParseErrorStringUnicodeSurrogateInvalid, escapeOffset);
1034-
codepoint = (((codepoint - 0xD800) << 10) | (codepoint2 - 0xDC00)) + 0x10000;
1042+
}
10351043
}
10361044
TEncoding::Encode(os, codepoint);
10371045
}

test/unittest/readertest.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -944,6 +944,9 @@ TEST(Reader, ParseString_Error) {
944944
TEST_STRING_ERROR(kParseErrorStringUnicodeSurrogateInvalid, "[\"\\uD800X\"]", 2u, 8u);
945945
TEST_STRING_ERROR(kParseErrorStringUnicodeSurrogateInvalid, "[\"\\uD800\\uFFFF\"]", 2u, 14u);
946946

947+
// Single low surrogate pair in string is invalid.
948+
TEST_STRING_ERROR(kParseErrorStringUnicodeSurrogateInvalid, "[\"\\udc4d\"]", 2u, 8u);
949+
947950
// Missing a closing quotation mark in string.
948951
TEST_STRING_ERROR(kParseErrorStringMissQuotationMark, "[\"Test]", 7u, 7u);
949952

0 commit comments

Comments
 (0)