Skip to content

Commit c82d312

Browse files
authored
Add support for JSON-compatible string escapes (jbeder#485)
For completeness I've implemented escaping for characters outside the basic multilingual plane, but it doesn't get used (as there's no EscapeAsAsciiJson emitter option implemented).
1 parent 370acee commit c82d312

File tree

6 files changed

+97
-18
lines changed

6 files changed

+97
-18
lines changed

include/yaml-cpp/emittermanip.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ enum EMITTER_MANIP {
1919
// output character set
2020
EmitNonAscii,
2121
EscapeNonAscii,
22+
EscapeAsJson,
2223

2324
// string manipulators
2425
// Auto, // duplicate

src/emitter.cpp

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -686,14 +686,27 @@ void Emitter::StartedScalar() { m_pState->StartedScalar(); }
686686
// *******************************************************************************************
687687
// overloads of Write
688688

689+
StringEscaping::value GetStringEscapingStyle(const EMITTER_MANIP emitterManip) {
690+
switch (emitterManip) {
691+
case EscapeNonAscii:
692+
return StringEscaping::NonAscii;
693+
case EscapeAsJson:
694+
return StringEscaping::JSON;
695+
default:
696+
return StringEscaping::None;
697+
break;
698+
}
699+
}
700+
689701
Emitter& Emitter::Write(const std::string& str) {
690702
if (!good())
691703
return *this;
692704

693-
const bool escapeNonAscii = m_pState->GetOutputCharset() == EscapeNonAscii;
705+
StringEscaping::value stringEscaping = GetStringEscapingStyle(m_pState->GetOutputCharset());
706+
694707
const StringFormat::value strFormat =
695708
Utils::ComputeStringFormat(str, m_pState->GetStringFormat(),
696-
m_pState->CurGroupFlowType(), escapeNonAscii);
709+
m_pState->CurGroupFlowType(), stringEscaping == StringEscaping::NonAscii);
697710

698711
if (strFormat == StringFormat::Literal)
699712
m_pState->SetMapKeyFormat(YAML::LongKey, FmtScope::Local);
@@ -708,7 +721,7 @@ Emitter& Emitter::Write(const std::string& str) {
708721
Utils::WriteSingleQuotedString(m_stream, str);
709722
break;
710723
case StringFormat::DoubleQuoted:
711-
Utils::WriteDoubleQuotedString(m_stream, str, escapeNonAscii);
724+
Utils::WriteDoubleQuotedString(m_stream, str, stringEscaping);
712725
break;
713726
case StringFormat::Literal:
714727
Utils::WriteLiteralString(m_stream, str,
@@ -814,8 +827,10 @@ Emitter& Emitter::Write(char ch) {
814827
if (!good())
815828
return *this;
816829

830+
831+
817832
PrepareNode(EmitterNodeType::Scalar);
818-
Utils::WriteChar(m_stream, ch);
833+
Utils::WriteChar(m_stream, ch, GetStringEscapingStyle(m_pState->GetOutputCharset()));
819834
StartedScalar();
820835

821836
return *this;

src/emitterstate.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -231,6 +231,7 @@ bool EmitterState::SetOutputCharset(EMITTER_MANIP value,
231231
switch (value) {
232232
case EmitNonAscii:
233233
case EscapeNonAscii:
234+
case EscapeAsJson:
234235
_Set(m_charset, value, scope);
235236
return true;
236237
default:

src/emitterutils.cpp

Lines changed: 32 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -218,20 +218,34 @@ bool IsValidLiteralScalar(const std::string& str, FlowType::value flowType,
218218
});
219219
}
220220

221-
void WriteDoubleQuoteEscapeSequence(ostream_wrapper& out, int codePoint) {
221+
std::pair<uint16_t, uint16_t> EncodeUTF16SurrogatePair(int codePoint) {
222+
const uint32_t leadOffset = 0xD800 - (0x10000 >> 10);
223+
224+
return {
225+
leadOffset | (codePoint >> 10),
226+
0xDC00 | (codePoint & 0x3FF),
227+
};
228+
}
229+
230+
void WriteDoubleQuoteEscapeSequence(ostream_wrapper& out, int codePoint, StringEscaping::value stringEscapingStyle) {
222231
static const char hexDigits[] = "0123456789abcdef";
223232

224233
out << "\\";
225234
int digits = 8;
226-
if (codePoint < 0xFF) {
235+
if (codePoint < 0xFF && stringEscapingStyle != StringEscaping::JSON) {
227236
out << "x";
228237
digits = 2;
229238
} else if (codePoint < 0xFFFF) {
230239
out << "u";
231240
digits = 4;
232-
} else {
241+
} else if (stringEscapingStyle != StringEscaping::JSON) {
233242
out << "U";
234243
digits = 8;
244+
} else {
245+
auto surrogatePair = EncodeUTF16SurrogatePair(codePoint);
246+
WriteDoubleQuoteEscapeSequence(out, surrogatePair.first, stringEscapingStyle);
247+
WriteDoubleQuoteEscapeSequence(out, surrogatePair.second, stringEscapingStyle);
248+
return;
235249
}
236250

237251
// Write digits into the escape sequence
@@ -303,7 +317,7 @@ bool WriteSingleQuotedString(ostream_wrapper& out, const std::string& str) {
303317
}
304318

305319
bool WriteDoubleQuotedString(ostream_wrapper& out, const std::string& str,
306-
bool escapeNonAscii) {
320+
StringEscaping::value stringEscaping) {
307321
out << "\"";
308322
int codePoint;
309323
for (std::string::const_iterator i = str.begin();
@@ -327,16 +341,19 @@ bool WriteDoubleQuotedString(ostream_wrapper& out, const std::string& str,
327341
case '\b':
328342
out << "\\b";
329343
break;
344+
case '\f':
345+
out << "\\f";
346+
break;
330347
default:
331348
if (codePoint < 0x20 ||
332349
(codePoint >= 0x80 &&
333350
codePoint <= 0xA0)) { // Control characters and non-breaking space
334-
WriteDoubleQuoteEscapeSequence(out, codePoint);
351+
WriteDoubleQuoteEscapeSequence(out, codePoint, stringEscaping);
335352
} else if (codePoint == 0xFEFF) { // Byte order marks (ZWNS) should be
336353
// escaped (YAML 1.2, sec. 5.2)
337-
WriteDoubleQuoteEscapeSequence(out, codePoint);
338-
} else if (escapeNonAscii && codePoint > 0x7E) {
339-
WriteDoubleQuoteEscapeSequence(out, codePoint);
354+
WriteDoubleQuoteEscapeSequence(out, codePoint, stringEscaping);
355+
} else if (stringEscaping == StringEscaping::NonAscii && codePoint > 0x7E) {
356+
WriteDoubleQuoteEscapeSequence(out, codePoint, stringEscaping);
340357
} else {
341358
WriteCodePoint(out, codePoint);
342359
}
@@ -362,7 +379,7 @@ bool WriteLiteralString(ostream_wrapper& out, const std::string& str,
362379
return true;
363380
}
364381

365-
bool WriteChar(ostream_wrapper& out, char ch) {
382+
bool WriteChar(ostream_wrapper& out, char ch, StringEscaping::value stringEscapingStyle) {
366383
if (('a' <= ch && ch <= 'z') || ('A' <= ch && ch <= 'Z')) {
367384
out << ch;
368385
} else if (ch == '\"') {
@@ -373,13 +390,17 @@ bool WriteChar(ostream_wrapper& out, char ch) {
373390
out << R"("\n")";
374391
} else if (ch == '\b') {
375392
out << R"("\b")";
393+
} else if (ch == '\r') {
394+
out << R"("\r")";
395+
} else if (ch == '\f') {
396+
out << R"("\f")";
376397
} else if (ch == '\\') {
377398
out << R"("\\")";
378399
} else if (0x20 <= ch && ch <= 0x7e) {
379400
out << "\"" << ch << "\"";
380401
} else {
381402
out << "\"";
382-
WriteDoubleQuoteEscapeSequence(out, ch);
403+
WriteDoubleQuoteEscapeSequence(out, ch, stringEscapingStyle);
383404
out << "\"";
384405
}
385406
return true;
@@ -469,7 +490,7 @@ bool WriteTagWithPrefix(ostream_wrapper& out, const std::string& prefix,
469490

470491
bool WriteBinary(ostream_wrapper& out, const Binary& binary) {
471492
WriteDoubleQuotedString(out, EncodeBase64(binary.data(), binary.size()),
472-
false);
493+
StringEscaping::None);
473494
return true;
474495
}
475496
} // namespace Utils

src/emitterutils.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@ struct StringFormat {
2424
enum value { Plain, SingleQuoted, DoubleQuoted, Literal };
2525
};
2626

27+
struct StringEscaping {
28+
enum value { None, NonAscii, JSON };
29+
};
30+
2731
namespace Utils {
2832
StringFormat::value ComputeStringFormat(const std::string& str,
2933
EMITTER_MANIP strFormat,
@@ -32,10 +36,11 @@ StringFormat::value ComputeStringFormat(const std::string& str,
3236

3337
bool WriteSingleQuotedString(ostream_wrapper& out, const std::string& str);
3438
bool WriteDoubleQuotedString(ostream_wrapper& out, const std::string& str,
35-
bool escapeNonAscii);
39+
StringEscaping::value stringEscaping);
3640
bool WriteLiteralString(ostream_wrapper& out, const std::string& str,
3741
std::size_t indent);
38-
bool WriteChar(ostream_wrapper& out, char ch);
42+
bool WriteChar(ostream_wrapper& out, char ch,
43+
StringEscaping::value stringEscapingStyle);
3944
bool WriteComment(ostream_wrapper& out, const std::string& str,
4045
std::size_t postCommentIndent);
4146
bool WriteAlias(ostream_wrapper& out, const std::string& str);

test/integration/emitter_test.cpp

Lines changed: 37 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -813,7 +813,43 @@ TEST_F(EmitterTest, Unicode) {
813813

814814
TEST_F(EmitterTest, DoubleQuotedUnicode) {
815815
out << DoubleQuoted << "\x24 \xC2\xA2 \xE2\x82\xAC \xF0\xA4\xAD\xA2";
816-
ExpectEmit("\"\x24 \xC2\xA2 \xE2\x82\xAC \xF0\xA4\xAD\xA2\"");
816+
ExpectEmit("\"\x24 \xC2\xA2 \xE2\x82\xAC \xF0\xA4\xAD\xA2\"");
817+
}
818+
819+
TEST_F(EmitterTest, EscapedJsonString) {
820+
out.SetStringFormat(DoubleQuoted);
821+
out.SetOutputCharset(EscapeAsJson);
822+
out << "\" \\ "
823+
"\x01 \x02 \x03 \x04 \x05 \x06 \x07 \x08 \x09 \x0A \x0B \x0C \x0D \x0E \x0F "
824+
"\x10 \x11 \x12 \x13 \x14 \x15 \x16 \x17 \x18 \x19 \x1A \x1B \x1C \x1D \x1E \x1F "
825+
"\x24 \xC2\xA2 \xE2\x82\xAC \xF0\xA4\xAD\xA2";
826+
827+
ExpectEmit(R"("\" \\ \u0001 \u0002 \u0003 \u0004 \u0005 \u0006 \u0007 \b \t )"
828+
R"(\n \u000b \f \r \u000e \u000f \u0010 \u0011 \u0012 \u0013 )"
829+
R"(\u0014 \u0015 \u0016 \u0017 \u0018 \u0019 \u001a \u001b )"
830+
R"(\u001c \u001d \u001e \u001f )"
831+
"$ \xC2\xA2 \xE2\x82\xAC \xF0\xA4\xAD\xA2\"");
832+
}
833+
834+
TEST_F(EmitterTest, EscapedCharacters) {
835+
out << BeginSeq
836+
<< '\x00'
837+
<< '\x0C'
838+
<< '\x0D'
839+
<< EndSeq;
840+
841+
ExpectEmit("- \"\\x00\"\n- \"\\f\"\n- \"\\r\"");
842+
}
843+
844+
TEST_F(EmitterTest, CharactersEscapedAsJson) {
845+
out.SetOutputCharset(EscapeAsJson);
846+
out << BeginSeq
847+
<< '\x00'
848+
<< '\x0C'
849+
<< '\x0D'
850+
<< EndSeq;
851+
852+
ExpectEmit("- \"\\u0000\"\n- \"\\f\"\n- \"\\r\"");
817853
}
818854

819855
TEST_F(EmitterTest, DoubleQuotedString) {

0 commit comments

Comments
 (0)