@@ -264,7 +264,8 @@ static String toHex16Bit(unsigned int x) {
264264 return result;
265265}
266266
267- static String valueToQuotedStringN (const char * value, unsigned length) {
267+ static String valueToQuotedStringN (const char * value, unsigned length,
268+ bool emitUTF8 = false ) {
268269 if (value == nullptr )
269270 return " " ;
270271
@@ -310,21 +311,31 @@ static String valueToQuotedStringN(const char* value, unsigned length) {
310311 // Should add a flag to allow this compatibility mode and prevent this
311312 // sequence from occurring.
312313 default : {
313- unsigned int cp = utf8ToCodepoint (c, end);
314- // don't escape non-control characters
315- // (short escape sequence are applied above)
316- if (cp < 0x80 && cp >= 0x20 )
317- result += static_cast <char >(cp);
318- else if (cp < 0x10000 ) { // codepoint is in Basic Multilingual Plane
319- result += " \\ u" ;
320- result += toHex16Bit (cp);
321- } else { // codepoint is not in Basic Multilingual Plane
322- // convert to surrogate pair first
323- cp -= 0x10000 ;
324- result += " \\ u" ;
325- result += toHex16Bit ((cp >> 10 ) + 0xD800 );
326- result += " \\ u" ;
327- result += toHex16Bit ((cp & 0x3FF ) + 0xDC00 );
314+ if (emitUTF8) {
315+ result += *c;
316+ } else {
317+ unsigned int codepoint = utf8ToCodepoint (c, end);
318+ const unsigned int FIRST_NON_CONTROL_CODEPOINT = 0x20 ;
319+ const unsigned int LAST_NON_CONTROL_CODEPOINT = 0x7F ;
320+ const unsigned int FIRST_SURROGATE_PAIR_CODEPOINT = 0x10000 ;
321+ // don't escape non-control characters
322+ // (short escape sequence are applied above)
323+ if (FIRST_NON_CONTROL_CODEPOINT <= codepoint &&
324+ codepoint <= LAST_NON_CONTROL_CODEPOINT) {
325+ result += static_cast <char >(codepoint);
326+ } else if (codepoint <
327+ FIRST_SURROGATE_PAIR_CODEPOINT) { // codepoint is in Basic
328+ // Multilingual Plane
329+ result += " \\ u" ;
330+ result += toHex16Bit (codepoint);
331+ } else { // codepoint is not in Basic Multilingual Plane
332+ // convert to surrogate pair first
333+ codepoint -= FIRST_SURROGATE_PAIR_CODEPOINT;
334+ result += " \\ u" ;
335+ result += toHex16Bit ((codepoint >> 10 ) + 0xD800 );
336+ result += " \\ u" ;
337+ result += toHex16Bit ((codepoint & 0x3FF ) + 0xDC00 );
338+ }
328339 }
329340 } break ;
330341 }
@@ -864,7 +875,8 @@ struct BuiltStyledStreamWriter : public StreamWriter {
864875 BuiltStyledStreamWriter (String indentation, CommentStyle::Enum cs,
865876 String colonSymbol, String nullSymbol,
866877 String endingLineFeedSymbol, bool useSpecialFloats,
867- unsigned int precision, PrecisionType precisionType);
878+ bool emitUTF8, unsigned int precision,
879+ PrecisionType precisionType);
868880 int write (Value const & root, OStream* sout) override ;
869881
870882private:
@@ -893,19 +905,20 @@ struct BuiltStyledStreamWriter : public StreamWriter {
893905 bool addChildValues_ : 1 ;
894906 bool indented_ : 1 ;
895907 bool useSpecialFloats_ : 1 ;
908+ bool emitUTF8_ : 1 ;
896909 unsigned int precision_;
897910 PrecisionType precisionType_;
898911};
899912BuiltStyledStreamWriter::BuiltStyledStreamWriter (
900913 String indentation, CommentStyle::Enum cs, String colonSymbol,
901914 String nullSymbol, String endingLineFeedSymbol, bool useSpecialFloats,
902- unsigned int precision, PrecisionType precisionType)
915+ bool emitUTF8, unsigned int precision, PrecisionType precisionType)
903916 : rightMargin_(74 ), indentation_(std::move(indentation)), cs_(cs),
904917 colonSymbol_(std::move(colonSymbol)), nullSymbol_(std::move(nullSymbol)),
905918 endingLineFeedSymbol_(std::move(endingLineFeedSymbol)),
906919 addChildValues_(false ), indented_(false ),
907- useSpecialFloats_(useSpecialFloats), precision_(precision ),
908- precisionType_(precisionType) {}
920+ useSpecialFloats_(useSpecialFloats), emitUTF8_(emitUTF8 ),
921+ precision_(precision), precisionType_(precisionType) {}
909922int BuiltStyledStreamWriter::write (Value const & root, OStream* sout) {
910923 sout_ = sout;
911924 addChildValues_ = false ;
@@ -942,7 +955,8 @@ void BuiltStyledStreamWriter::writeValue(Value const& value) {
942955 char const * end;
943956 bool ok = value.getString (&str, &end);
944957 if (ok)
945- pushValue (valueToQuotedStringN (str, static_cast <unsigned >(end - str)));
958+ pushValue (valueToQuotedStringN (str, static_cast <unsigned >(end - str),
959+ emitUTF8_));
946960 else
947961 pushValue (" " );
948962 break ;
@@ -966,7 +980,7 @@ void BuiltStyledStreamWriter::writeValue(Value const& value) {
966980 Value const & childValue = value[name];
967981 writeCommentBeforeValue (childValue);
968982 writeWithIndent (valueToQuotedStringN (
969- name.data (), static_cast <unsigned >(name.length ())));
983+ name.data (), static_cast <unsigned >(name.length ()), emitUTF8_ ));
970984 *sout_ << colonSymbol_;
971985 writeValue (childValue);
972986 if (++it == members.end ()) {
@@ -1142,12 +1156,13 @@ StreamWriter::Factory::~Factory() = default;
11421156StreamWriterBuilder::StreamWriterBuilder () { setDefaults (&settings_); }
11431157StreamWriterBuilder::~StreamWriterBuilder () = default ;
11441158StreamWriter* StreamWriterBuilder::newStreamWriter () const {
1145- String indentation = settings_[" indentation" ].asString ();
1146- String cs_str = settings_[" commentStyle" ].asString ();
1147- String pt_str = settings_[" precisionType" ].asString ();
1148- bool eyc = settings_[" enableYAMLCompatibility" ].asBool ();
1149- bool dnp = settings_[" dropNullPlaceholders" ].asBool ();
1150- bool usf = settings_[" useSpecialFloats" ].asBool ();
1159+ const String indentation = settings_[" indentation" ].asString ();
1160+ const String cs_str = settings_[" commentStyle" ].asString ();
1161+ const String pt_str = settings_[" precisionType" ].asString ();
1162+ const bool eyc = settings_[" enableYAMLCompatibility" ].asBool ();
1163+ const bool dnp = settings_[" dropNullPlaceholders" ].asBool ();
1164+ const bool usf = settings_[" useSpecialFloats" ].asBool ();
1165+ const bool emitUTF8 = settings_[" emitUTF8" ].asBool ();
11511166 unsigned int pre = settings_[" precision" ].asUInt ();
11521167 CommentStyle::Enum cs = CommentStyle::All;
11531168 if (cs_str == " All" ) {
@@ -1179,7 +1194,7 @@ StreamWriter* StreamWriterBuilder::newStreamWriter() const {
11791194 pre = 17 ;
11801195 String endingLineFeedSymbol;
11811196 return new BuiltStyledStreamWriter (indentation, cs, colonSymbol, nullSymbol,
1182- endingLineFeedSymbol, usf, pre ,
1197+ endingLineFeedSymbol, usf, emitUTF8, pre ,
11831198 precisionType);
11841199}
11851200static void getValidWriterKeys (std::set<String>* valid_keys) {
@@ -1189,6 +1204,7 @@ static void getValidWriterKeys(std::set<String>* valid_keys) {
11891204 valid_keys->insert (" enableYAMLCompatibility" );
11901205 valid_keys->insert (" dropNullPlaceholders" );
11911206 valid_keys->insert (" useSpecialFloats" );
1207+ valid_keys->insert (" emitUTF8" );
11921208 valid_keys->insert (" precision" );
11931209 valid_keys->insert (" precisionType" );
11941210}
@@ -1220,6 +1236,7 @@ void StreamWriterBuilder::setDefaults(Json::Value* settings) {
12201236 (*settings)[" enableYAMLCompatibility" ] = false ;
12211237 (*settings)[" dropNullPlaceholders" ] = false ;
12221238 (*settings)[" useSpecialFloats" ] = false ;
1239+ (*settings)[" emitUTF8" ] = false ;
12231240 (*settings)[" precision" ] = 17 ;
12241241 (*settings)[" precisionType" ] = " significant" ;
12251242 // ! [StreamWriterBuilderDefaults]
0 commit comments