@@ -139,6 +139,7 @@ class JSON_Parser
139
139
140
140
virtual bool CompleteComment (Token& token);
141
141
virtual bool CompleteStringLiteral (Token& token);
142
+ int convert_unicode_to_code_point ();
142
143
bool handle_unescape_char (Token& token);
143
144
144
145
private:
@@ -652,7 +653,15 @@ bool JSON_StringParser<CharType>::CompleteComment(typename JSON_Parser<CharType>
652
653
return true ;
653
654
}
654
655
655
- void convert_append_unicode_code_unit (JSON_Parser<wchar_t >::Token& token, utf16char value)
656
+ void convert_append_unicode_code_unit (JSON_Parser<utf16char>::Token& token, utf16string value)
657
+ {
658
+ token.string_val .append (value);
659
+ }
660
+ void convert_append_unicode_code_unit (JSON_Parser<char >::Token& token, utf16string value)
661
+ {
662
+ token.string_val .append (::utility::conversions::utf16_to_utf8 (value));
663
+ }
664
+ void convert_append_unicode_code_unit (JSON_Parser<utf16char>::Token& token, utf16char value)
656
665
{
657
666
token.string_val .push_back (value);
658
667
}
@@ -662,6 +671,37 @@ void convert_append_unicode_code_unit(JSON_Parser<char>::Token& token, utf16char
662
671
token.string_val .append (::utility::conversions::utf16_to_utf8 (utf16));
663
672
}
664
673
674
+ template <typename CharType>
675
+ int JSON_Parser<CharType>::convert_unicode_to_code_point()
676
+ {
677
+ // A four-hexdigit Unicode character.
678
+ // Transform into a 16 bit code point.
679
+ int decoded = 0 ;
680
+ for (int i = 0 ; i < 4 ; ++i)
681
+ {
682
+ auto ch = NextCharacter ();
683
+ int ch_int = static_cast <int >(ch);
684
+ if (ch_int < 0 || ch_int > 127 ) return -1 ;
685
+ #ifdef _WIN32
686
+ const int isxdigitResult = _isxdigit_l (ch_int, utility::details::scoped_c_thread_locale::c_locale ());
687
+ #else
688
+ const int isxdigitResult = isxdigit (ch_int);
689
+ #endif
690
+ if (!isxdigitResult) return -1 ;
691
+
692
+ int val = _hexval[static_cast <size_t >(ch_int)];
693
+
694
+ _ASSERTE (val != -1 );
695
+
696
+ // Add the input char to the decoded number
697
+ decoded |= (val << (4 * (3 - i)));
698
+ }
699
+ return decoded;
700
+ }
701
+
702
+ #define H_SURROGATE_START 0xD800
703
+ #define H_SURROGATE_END 0xDBFF
704
+
665
705
template <typename CharType>
666
706
inline bool JSON_Parser<CharType>::handle_unescape_char(Token& token)
667
707
{
@@ -682,26 +722,31 @@ inline bool JSON_Parser<CharType>::handle_unescape_char(Token& token)
682
722
case ' t' : token.string_val .push_back (' \t ' ); return true ;
683
723
case ' u' :
684
724
{
685
- // A four-hexdigit Unicode character.
686
- // Transform into a 16 bit code point.
687
- int decoded = 0 ;
688
- for (int i = 0 ; i < 4 ; ++i)
725
+ int decoded = convert_unicode_to_code_point ();
726
+ if (decoded == -1 )
689
727
{
690
- ch = NextCharacter ();
691
- int ch_int = static_cast <int >(ch);
692
- if (ch_int < 0 || ch_int > 127 ) return false ;
693
- #ifdef _WIN32
694
- const int isxdigitResult = _isxdigit_l (ch_int, utility::details::scoped_c_thread_locale::c_locale ());
695
- #else
696
- const int isxdigitResult = isxdigit (ch_int);
697
- #endif
698
- if (!isxdigitResult) return false ;
728
+ return false ;
729
+ }
730
+
731
+ // handle multi-block characters that start with a high-surrogate
732
+ if (decoded >= H_SURROGATE_START && decoded <= H_SURROGATE_END)
733
+ {
734
+ // skip escape character '\u'
735
+ if (NextCharacter () != ' \\ ' || NextCharacter () != ' u' )
736
+ {
737
+ return false ;
738
+ }
739
+ int decoded2 = convert_unicode_to_code_point ();
740
+
741
+ if (decoded2 == -1 )
742
+ {
743
+ return false ;
744
+ }
699
745
700
- int val = _hexval[ static_cast <size_t >(ch_int)] ;
701
- _ASSERTE (val != - 1 );
746
+ utf16string compoundUTF16 = { static_cast <utf16char>(decoded), static_cast <utf16char>(decoded2)} ;
747
+ convert_append_unicode_code_unit (token, compoundUTF16 );
702
748
703
- // Add the input char to the decoded number
704
- decoded |= (val << (4 * (3 - i)));
749
+ return true ;
705
750
}
706
751
707
752
// Construct the character based on the decoded number
@@ -1015,9 +1060,13 @@ std::unique_ptr<web::json::details::_Value> JSON_Parser<CharType>::_ParseValue(
1015
1060
{
1016
1061
switch (tkn.kind )
1017
1062
{
1018
- case JSON_Parser<CharType>::Token::TKN_OpenBrace: { return _ParseObject (tkn);
1063
+ case JSON_Parser<CharType>::Token::TKN_OpenBrace:
1064
+ {
1065
+ return _ParseObject (tkn);
1019
1066
}
1020
- case JSON_Parser<CharType>::Token::TKN_OpenBracket: { return _ParseArray (tkn);
1067
+ case JSON_Parser<CharType>::Token::TKN_OpenBracket:
1068
+ {
1069
+ return _ParseArray (tkn);
1021
1070
}
1022
1071
case JSON_Parser<CharType>::Token::TKN_StringLiteral:
1023
1072
{
0 commit comments