Skip to content

Commit a12de94

Browse files
authored
[Strings] Unescape in JSON parsing, so StringLifting can read escaped strings (#7410)
1 parent 43d635c commit a12de94

File tree

4 files changed

+111
-9
lines changed

4 files changed

+111
-9
lines changed

src/support/json.h

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939

4040
#include "support/istring.h"
4141
#include "support/safe_integer.h"
42+
#include "support/string.h"
4243

4344
namespace json {
4445

@@ -260,11 +261,17 @@ struct Value {
260261
skip();
261262
if (*curr == '"') {
262263
// String
263-
curr++;
264-
char* close = strchr(curr, '"');
264+
// Start |close| at the opening ", and in the loop below we will always
265+
// begin looking at the first character after.
266+
char* close = curr;
267+
// Skip escaped "
268+
do {
269+
close = strchr(close + 1, '"');
270+
} while (*(close - 1) == '\\');
265271
assert(close);
266272
*close = 0; // end this string, and reuse it straight from the input
267-
setString(curr);
273+
char* raw = curr + 1;
274+
unescapeAndSetString(raw);
268275
curr = close + 1;
269276
} else if (*curr == '[') {
270277
// Array
@@ -403,6 +410,23 @@ struct Value {
403410
assert(isObject());
404411
return obj->count(x) > 0;
405412
}
413+
414+
private:
415+
// If the string has no escaped characters, setString() the char* directly. If
416+
// it does require escaping, do that and intern a new string with those
417+
// contents.
418+
void unescapeAndSetString(char* str) {
419+
if (!strchr(str, '\\')) {
420+
// No escaping slash.
421+
setString(str);
422+
return;
423+
}
424+
425+
auto unescaped = wasm::String::unescapeJSONToWTF8(str);
426+
427+
setString(
428+
IString(std::string_view(unescaped.data(), unescaped.size()), false));
429+
}
406430
};
407431

408432
using Ref = Value::Ref;

src/support/string.cpp

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -432,4 +432,65 @@ bool isUTF8(std::string_view str) {
432432
return true;
433433
}
434434

435+
std::vector<char> unescapeJSONToWTF8(const char* str) {
436+
std::vector<char> unescaped;
437+
size_t i = 0;
438+
while (str[i]) {
439+
if (str[i] != '\\') {
440+
// Normal character.
441+
unescaped.push_back(str[i]);
442+
i++;
443+
continue;
444+
}
445+
446+
// Escaped character.
447+
char c = str[i + 1];
448+
if (c != 'u') {
449+
switch (c) {
450+
case 'b':
451+
c = '\b';
452+
break;
453+
case 'f':
454+
c = '\f';
455+
break;
456+
case 'n':
457+
c = '\n';
458+
break;
459+
case 'r':
460+
c = '\r';
461+
break;
462+
case 't':
463+
c = '\t';
464+
break;
465+
case 0:
466+
Fatal() << "Invalid escaped JSON ends in slash";
467+
}
468+
unescaped.push_back(c);
469+
i += 2;
470+
continue;
471+
}
472+
473+
// \uXXXX, 4-digit hex number. First, read the hex.
474+
unsigned int x;
475+
std::stringstream unhex;
476+
if (!str[i + 2] || !str[i + 3] || !str[i + 4] || !str[i + 5]) {
477+
Fatal() << "Invalid escaped JSON \\uXXXX";
478+
}
479+
unhex << std::hex << std::string_view(str + i + 2, 4);
480+
unhex >> x;
481+
482+
// Write out the results.
483+
unescaped.push_back(x & 0xff);
484+
x >>= 8;
485+
if (x) {
486+
unescaped.push_back(x);
487+
}
488+
// TODO UTF stuff
489+
490+
i += 6;
491+
}
492+
493+
return unescaped;
494+
}
495+
435496
} // namespace wasm::String

src/support/string.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,9 @@ bool convertUTF16ToUTF8(std::ostream& os, std::string_view str);
102102
// Whether the string is valid UTF-8.
103103
bool isUTF8(std::string_view str);
104104

105+
// Given a string of properly-escaped JSON, unescape it.
106+
std::vector<char> unescapeJSONToWTF8(const char* str);
107+
105108
} // namespace wasm::String
106109

107110
#endif // wasm_support_string_h

test/lit/passes/string-lifting-section.wast

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,9 @@
77
(module
88
;; CHECK: (type $0 (array (mut i16)))
99

10-
;; CHECK: (type $1 (func (param externref externref) (result i32)))
10+
;; CHECK: (type $1 (func))
1111

12-
;; CHECK: (type $2 (func))
12+
;; CHECK: (type $2 (func (param externref externref) (result i32)))
1313

1414
;; CHECK: (type $3 (func (param (ref null $0) i32 i32) (result (ref extern))))
1515

@@ -29,6 +29,8 @@
2929

3030
;; CHECK: (import "string.const" "1" (global $"string.const_\"foo\"" (ref extern)))
3131

32+
;; CHECK: (import "string.const" "2" (global $"string.const_\"needs\\tescaping\\00.\\\'#%\\\"\"" (ref extern)))
33+
3234
;; CHECK: (import "wasm:js-string" "fromCharCodeArray" (func $fromCharCodeArray (type $3) (param (ref null $0) i32 i32) (result (ref extern))))
3335

3436
;; CHECK: (import "wasm:js-string" "fromCodePoint" (func $fromCodePoint (type $4) (param i32) (result (ref extern))))
@@ -37,17 +39,17 @@
3739

3840
;; CHECK: (import "wasm:js-string" "intoCharCodeArray" (func $intoCharCodeArray (type $6) (param externref (ref null $0) i32) (result i32)))
3941

40-
;; CHECK: (import "wasm:js-string" "equals" (func $equals (type $1) (param externref externref) (result i32)))
42+
;; CHECK: (import "wasm:js-string" "equals" (func $equals (type $2) (param externref externref) (result i32)))
4143

42-
;; CHECK: (import "wasm:js-string" "compare" (func $compare (type $1) (param externref externref) (result i32)))
44+
;; CHECK: (import "wasm:js-string" "compare" (func $compare (type $2) (param externref externref) (result i32)))
4345

4446
;; CHECK: (import "wasm:js-string" "length" (func $length (type $7) (param externref) (result i32)))
4547

4648
;; CHECK: (import "wasm:js-string" "charCodeAt" (func $charCodeAt (type $8) (param externref i32) (result i32)))
4749

4850
;; CHECK: (import "wasm:js-string" "substring" (func $substring (type $9) (param externref i32 i32) (result (ref extern))))
4951

50-
;; CHECK: (func $consts (type $2)
52+
;; CHECK: (func $consts (type $1)
5153
;; CHECK-NEXT: (drop
5254
;; CHECK-NEXT: (string.const "foo")
5355
;; CHECK-NEXT: )
@@ -71,7 +73,19 @@
7173
(drop
7274
(string.const "foo")
7375
)
74-
;; TODO: test utf-8 etc.
76+
)
77+
78+
;; CHECK: (func $tricky-consts (type $1)
79+
;; CHECK-NEXT: (drop
80+
;; CHECK-NEXT: (string.const "needs\tescaping\00.\'#%\"")
81+
;; CHECK-NEXT: )
82+
;; CHECK-NEXT: )
83+
(func $tricky-consts
84+
;; This tricky string should remain exactly the same after lowering and
85+
;; lifting.
86+
(drop
87+
(string.const "needs\tescaping\00.'#%\"")
88+
)
7589
)
7690
)
7791

0 commit comments

Comments
 (0)