Skip to content

Commit b8f0414

Browse files
authored
Merge pull request Tencent#907 from almavi/master
Fixed bug on space hexadecimal encoding
2 parents 430e8d4 + 85500e8 commit b8f0414

File tree

4 files changed

+14
-10
lines changed

4 files changed

+14
-10
lines changed

include/rapidjson/reader.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -955,7 +955,7 @@ class GenericReader {
955955
if (c == '\0')
956956
RAPIDJSON_PARSE_ERROR(kParseErrorStringMissQuotationMark, is.Tell());
957957
else
958-
RAPIDJSON_PARSE_ERROR(kParseErrorStringEscapeInvalid, is.Tell());
958+
RAPIDJSON_PARSE_ERROR(kParseErrorStringInvalidEncoding, is.Tell());
959959
}
960960
else {
961961
size_t offset = is.Tell();
@@ -990,7 +990,7 @@ class GenericReader {
990990
// The rest of string using SIMD
991991
static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
992992
static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
993-
static const char space[16] = { 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 };
993+
static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F };
994994
const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
995995
const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
996996
const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
@@ -999,7 +999,7 @@ class GenericReader {
999999
const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
10001000
const __m128i t1 = _mm_cmpeq_epi8(s, dq);
10011001
const __m128i t2 = _mm_cmpeq_epi8(s, bs);
1002-
const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x19) == 0x19
1002+
const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
10031003
const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
10041004
unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
10051005
if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped
@@ -1053,7 +1053,7 @@ class GenericReader {
10531053
// The rest of string using SIMD
10541054
static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
10551055
static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
1056-
static const char space[16] = { 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 };
1056+
static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F };
10571057
const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
10581058
const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
10591059
const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
@@ -1062,7 +1062,7 @@ class GenericReader {
10621062
const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
10631063
const __m128i t1 = _mm_cmpeq_epi8(s, dq);
10641064
const __m128i t2 = _mm_cmpeq_epi8(s, bs);
1065-
const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x19) == 0x19
1065+
const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
10661066
const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
10671067
unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
10681068
if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped
@@ -1101,7 +1101,7 @@ class GenericReader {
11011101
// The rest of string using SIMD
11021102
static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
11031103
static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
1104-
static const char space[16] = { 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 };
1104+
static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F };
11051105
const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
11061106
const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
11071107
const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
@@ -1110,7 +1110,7 @@ class GenericReader {
11101110
const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
11111111
const __m128i t1 = _mm_cmpeq_epi8(s, dq);
11121112
const __m128i t2 = _mm_cmpeq_epi8(s, bs);
1113-
const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x19) == 0x19
1113+
const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
11141114
const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
11151115
unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
11161116
if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped

include/rapidjson/writer.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -585,7 +585,7 @@ inline bool Writer<StringBuffer>::ScanWriteUnescapedString(StringStream& is, siz
585585
// The rest of string using SIMD
586586
static const char dquote[16] = { '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"', '\"' };
587587
static const char bslash[16] = { '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\', '\\' };
588-
static const char space[16] = { 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19, 0x19 };
588+
static const char space[16] = { 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F, 0x1F };
589589
const __m128i dq = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&dquote[0]));
590590
const __m128i bs = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&bslash[0]));
591591
const __m128i sp = _mm_loadu_si128(reinterpret_cast<const __m128i *>(&space[0]));
@@ -594,7 +594,7 @@ inline bool Writer<StringBuffer>::ScanWriteUnescapedString(StringStream& is, siz
594594
const __m128i s = _mm_load_si128(reinterpret_cast<const __m128i *>(p));
595595
const __m128i t1 = _mm_cmpeq_epi8(s, dq);
596596
const __m128i t2 = _mm_cmpeq_epi8(s, bs);
597-
const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x19) == 0x19
597+
const __m128i t3 = _mm_cmpeq_epi8(_mm_max_epu8(s, sp), sp); // s < 0x20 <=> max(s, 0x1F) == 0x1F
598598
const __m128i x = _mm_or_si128(_mm_or_si128(t1, t2), t3);
599599
unsigned short r = static_cast<unsigned short>(_mm_movemask_epi8(x));
600600
if (RAPIDJSON_UNLIKELY(r != 0)) { // some of characters is escaped

test/unittest/readertest.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -725,6 +725,8 @@ TEST(Reader, ParseString_Error) {
725725

726726
// Malform ASCII sequence
727727
TEST_STRINGENCODING_ERROR(ASCII<>, UTF8<>, char, ARRAY('[', '\"', char(0x80u), '\"', ']', '\0'));
728+
TEST_STRINGENCODING_ERROR(ASCII<>, UTF8<>, char, ARRAY('[', '\"', char(0x01u), '\"', ']', '\0'));
729+
TEST_STRINGENCODING_ERROR(ASCII<>, UTF8<>, char, ARRAY('[', '\"', char(0x1Cu), '\"', ']', '\0'));
728730

729731
#undef ARRAY
730732
#undef TEST_STRINGARRAY_ERROR

test/unittest/writertest.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -412,8 +412,10 @@ TEST(Writer, ValidateEncoding) {
412412
EXPECT_TRUE(writer.String("\xC2\xA2")); // Cents sign U+00A2
413413
EXPECT_TRUE(writer.String("\xE2\x82\xAC")); // Euro sign U+20AC
414414
EXPECT_TRUE(writer.String("\xF0\x9D\x84\x9E")); // G clef sign U+1D11E
415+
EXPECT_TRUE(writer.String("\x01")); // SOH control U+0001
416+
EXPECT_TRUE(writer.String("\x1B")); // Escape control U+001B
415417
writer.EndArray();
416-
EXPECT_STREQ("[\"\x24\",\"\xC2\xA2\",\"\xE2\x82\xAC\",\"\xF0\x9D\x84\x9E\"]", buffer.GetString());
418+
EXPECT_STREQ("[\"\x24\",\"\xC2\xA2\",\"\xE2\x82\xAC\",\"\xF0\x9D\x84\x9E\",\"\\u0001\",\"\\u001B\"]", buffer.GetString());
417419
}
418420

419421
// Fail in decoding invalid UTF-8 sequence http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt

0 commit comments

Comments
 (0)