Skip to content

Commit 2eb60c2

Browse files
authored
Fix decoding of HTML entities in TSX/JSX (microsoft#35739)
1 parent cafa175 commit 2eb60c2

File tree

6 files changed

+45
-22
lines changed

6 files changed

+45
-22
lines changed

src/compiler/scanner.ts

Lines changed: 21 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1337,20 +1337,6 @@ namespace ts {
13371337
return utf16EncodeAsString(escapedValue);
13381338
}
13391339

1340-
// Derived from the 10.1.1 UTF16Encoding of the ES6 Spec.
1341-
function utf16EncodeAsString(codePoint: number): string {
1342-
Debug.assert(0x0 <= codePoint && codePoint <= 0x10FFFF);
1343-
1344-
if (codePoint <= 65535) {
1345-
return String.fromCharCode(codePoint);
1346-
}
1347-
1348-
const codeUnit1 = Math.floor((codePoint - 65536) / 1024) + 0xD800;
1349-
const codeUnit2 = ((codePoint - 65536) % 1024) + 0xDC00;
1350-
1351-
return String.fromCharCode(codeUnit1, codeUnit2);
1352-
}
1353-
13541340
// Current character is known to be a backslash. Check for Unicode escape of the form '\uXXXX'
13551341
// and return code point value if valid Unicode escape is found. Otherwise return -1.
13561342
function peekUnicodeEscape(): number {
@@ -2339,4 +2325,25 @@ namespace ts {
23392325
}
23402326
return 1;
23412327
}
2328+
2329+
// Derived from the 10.1.1 UTF16Encoding of the ES6 Spec.
2330+
function utf16EncodeAsStringFallback(codePoint: number) {
2331+
Debug.assert(0x0 <= codePoint && codePoint <= 0x10FFFF);
2332+
2333+
if (codePoint <= 65535) {
2334+
return String.fromCharCode(codePoint);
2335+
}
2336+
2337+
const codeUnit1 = Math.floor((codePoint - 65536) / 1024) + 0xD800;
2338+
const codeUnit2 = ((codePoint - 65536) % 1024) + 0xDC00;
2339+
2340+
return String.fromCharCode(codeUnit1, codeUnit2);
2341+
}
2342+
2343+
const utf16EncodeAsStringWorker: (codePoint: number) => string = (String as any).fromCodePoint ? codePoint => String.fromCodePoint(codePoint) : utf16EncodeAsStringFallback;
2344+
2345+
/* @internal */
2346+
export function utf16EncodeAsString(codePoint: number) {
2347+
return utf16EncodeAsStringWorker(codePoint);
2348+
}
23422349
}

src/compiler/transformers/jsx.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -253,15 +253,15 @@ namespace ts {
253253
function decodeEntities(text: string): string {
254254
return text.replace(/&((#((\d+)|x([\da-fA-F]+)))|(\w+));/g, (match, _all, _number, _digits, decimal, hex, word) => {
255255
if (decimal) {
256-
return String.fromCharCode(parseInt(decimal, 10));
256+
return utf16EncodeAsString(parseInt(decimal, 10));
257257
}
258258
else if (hex) {
259-
return String.fromCharCode(parseInt(hex, 16));
259+
return utf16EncodeAsString(parseInt(hex, 16));
260260
}
261261
else {
262262
const ch = entities.get(word);
263263
// If this is not a valid entity, then just use `match` (replace it with itself, i.e. don't replace)
264-
return ch ? String.fromCharCode(ch) : match;
264+
return ch ? utf16EncodeAsString(ch) : match;
265265
}
266266
});
267267
}

tests/baselines/reference/tsxReactEmitEntities.js

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,9 @@ declare var React: any;
1818
// Does not happen for a string literal that happens to be inside an attribute (and escapes then work)
1919
<div attr={"&#0123;&hellip;&#x7D;\""}></div>;
2020
// Preserves single quotes
21-
<div attr='"'></div>
22-
21+
<div attr='"'></div>;
22+
// https://github.com/microsoft/TypeScript/issues/35732
23+
<div>&#x1F408;&#x1F415;&#128007;&#128017;</div>;
2324

2425
//// [file.js]
2526
React.createElement("div", null, "Dot goes here: \u00B7 &notAnEntity; ");
@@ -33,3 +34,5 @@ React.createElement("div", { attr: "{\u2026}\\" });
3334
React.createElement("div", { attr: "&#0123;&hellip;&#x7D;\"" });
3435
// Preserves single quotes
3536
React.createElement("div", { attr: '"' });
37+
// https://github.com/microsoft/TypeScript/issues/35732
38+
React.createElement("div", null, "\uD83D\uDC08\uD83D\uDC15\uD83D\uDC07\uD83D\uDC11");

tests/baselines/reference/tsxReactEmitEntities.symbols

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,13 @@ declare var React: any;
4545
>div : Symbol(JSX.IntrinsicElements, Decl(file.tsx, 1, 22))
4646

4747
// Preserves single quotes
48-
<div attr='"'></div>
48+
<div attr='"'></div>;
4949
>div : Symbol(JSX.IntrinsicElements, Decl(file.tsx, 1, 22))
5050
>attr : Symbol(attr, Decl(file.tsx, 19, 4))
5151
>div : Symbol(JSX.IntrinsicElements, Decl(file.tsx, 1, 22))
5252

53+
// https://github.com/microsoft/TypeScript/issues/35732
54+
<div>&#x1F408;&#x1F415;&#128007;&#128017;</div>;
55+
>div : Symbol(JSX.IntrinsicElements, Decl(file.tsx, 1, 22))
56+
>div : Symbol(JSX.IntrinsicElements, Decl(file.tsx, 1, 22))
57+

tests/baselines/reference/tsxReactEmitEntities.types

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,9 +46,15 @@ declare var React: any;
4646
>div : any
4747

4848
// Preserves single quotes
49-
<div attr='"'></div>
49+
<div attr='"'></div>;
5050
><div attr='"'></div> : JSX.Element
5151
>div : any
5252
>attr : string
5353
>div : any
5454

55+
// https://github.com/microsoft/TypeScript/issues/35732
56+
<div>&#x1F408;&#x1F415;&#128007;&#128017;</div>;
57+
><div>&#x1F408;&#x1F415;&#128007;&#128017;</div> : JSX.Element
58+
>div : any
59+
>div : any
60+

tests/cases/conformance/jsx/tsxReactEmitEntities.tsx

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,4 +19,6 @@ declare var React: any;
1919
// Does not happen for a string literal that happens to be inside an attribute (and escapes then work)
2020
<div attr={"&#0123;&hellip;&#x7D;\""}></div>;
2121
// Preserves single quotes
22-
<div attr='"'></div>
22+
<div attr='"'></div>;
23+
// https://github.com/microsoft/TypeScript/issues/35732
24+
<div>&#x1F408;&#x1F415;&#128007;&#128017;</div>;

0 commit comments

Comments
 (0)