Skip to content

Commit aa91b31

Browse files
authored
Fix Formatter UTF8 encoding (#2944)
1 parent 22e244c commit aa91b31

File tree

3 files changed

+12
-10
lines changed

3 files changed

+12
-10
lines changed

Sming/Core/Data/Format/Formatter.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -58,14 +58,14 @@ unsigned escapeControls(String& value, Options options)
5858
if(escapeChar(c, options)) {
5959
extra += 1; // "\"
6060
} else if(options[Option::unicode]) {
61-
if(uint8_t(c) < 0x20 || (c & 0x80)) {
61+
if(uint8_t(c) < 0x20) {
6262
extra += 5; // "\uNNNN"
6363
}
6464
} else if(uint8_t(c) < 0x20) {
6565
extra += 3; // "\xnn"
6666
} else if((c & 0x80) && options[Option::utf8]) {
67-
// Characters such as £ (0xa3) are escaped to 0xc2 0xa3 in UTF-8
68-
extra += 1; // 0xc2
67+
// Characters from U+0080 to U+07FF are encoded in two bytes in UTF-8
68+
extra += 1;
6969
}
7070
}
7171
if(extra == 0) {
@@ -86,7 +86,7 @@ unsigned escapeControls(String& value, Options options)
8686
*out++ = '\\';
8787
c = esc;
8888
} else if(options[Option::unicode]) {
89-
if(uint8_t(c) < 0x20 || (c & 0x80)) {
89+
if(uint8_t(c) < 0x20) {
9090
*out++ = '\\';
9191
*out++ = 'u';
9292
*out++ = '0';
@@ -100,7 +100,8 @@ unsigned escapeControls(String& value, Options options)
100100
*out++ = hexchar(uint8_t(c) >> 4);
101101
c = hexchar(uint8_t(c) & 0x0f);
102102
} else if((c & 0x80) && options[Option::utf8]) {
103-
*out++ = 0xc2;
103+
*out++ = 0xc0 | (c >> 6);
104+
c = 0x80 | (c & 0x3f);
104105
}
105106
*out++ = c;
106107
}

Sming/Core/Data/Format/Json.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,11 +18,12 @@ namespace Format
1818
Json json;
1919

2020
/*
21-
* Check for invalid characters and replace them - can break browser
22-
* operation otherwise.
21+
* JSON requires control characters, quotes and reverse solidus (backslash) to be escaped.
2322
*
24-
* This can occur if filenames become corrupted, so here we just
25-
* substitute an underscore _ for anything which fails to match UTF8.
23+
* All other codepoints from 0x20 to 0xff are left unchanged.
24+
* This is typically UTF8 but it could be binary or some other application-defined encoding.
25+
*
26+
* Therefore no validation is performed on the data.
2627
*/
2728
void Json::escape(String& value) const
2829
{

tests/HostTests/modules/Formatter.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ class FormatterTest : public TestGroup
1717
TEST_CASE("JSON")
1818
{
1919
DEFINE_FSTR_LOCAL(text1b, "A JSON\\ntest string\\twith escapes\\u0012\\u0000\\n"
20-
"Worth \\\"maybe\\\" \\u00a3 0.53. Yen \\u00a5 5bn.")
20+
"Worth \\\"maybe\\\" \xa3 0.53. Yen \xa5 5bn.")
2121

2222
Serial << text1 << endl;
2323
String s(text1);

0 commit comments

Comments
 (0)