Skip to content

Commit 2ad7162

Browse files
author
Andy
authored
Merge pull request #10747 from Microsoft/react_emit_entities_2
When emitting react code, replace HTML numeric entities with their encoded characters
2 parents d248358 + a8eb4a2 commit 2ad7162

File tree

8 files changed

+134
-12
lines changed

8 files changed

+134
-12
lines changed

src/compiler/emitter.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2030,7 +2030,7 @@ const _super = (function (geti, seti) {
20302030
emitTrailingCommentsOfPosition(commentRange.pos);
20312031
}
20322032

2033-
emitExpression(node.initializer);
2033+
emitExpression(initializer);
20342034
}
20352035

20362036
function emitShorthandPropertyAssignment(node: ShorthandPropertyAssignment) {

src/compiler/parser.ts

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -905,6 +905,10 @@ namespace ts {
905905
return currentToken = scanner.scanJsxToken();
906906
}
907907

908+
function scanJsxAttributeValue(): SyntaxKind {
909+
return currentToken = scanner.scanJsxAttributeValue();
910+
}
911+
908912
function speculationHelper<T>(callback: () => T, isLookAhead: boolean): T {
909913
// Keep track of the state we'll need to rollback to if lookahead fails (or if the
910914
// caller asked us to always reset our state).
@@ -3831,8 +3835,8 @@ namespace ts {
38313835
scanJsxIdentifier();
38323836
const node = <JsxAttribute>createNode(SyntaxKind.JsxAttribute);
38333837
node.name = parseIdentifierName();
3834-
if (parseOptional(SyntaxKind.EqualsToken)) {
3835-
switch (token()) {
3838+
if (token() === SyntaxKind.EqualsToken) {
3839+
switch (scanJsxAttributeValue()) {
38363840
case SyntaxKind.StringLiteral:
38373841
node.initializer = parseLiteralNode();
38383842
break;

src/compiler/scanner.ts

Lines changed: 18 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ namespace ts {
2727
reScanSlashToken(): SyntaxKind;
2828
reScanTemplateToken(): SyntaxKind;
2929
scanJsxIdentifier(): SyntaxKind;
30+
scanJsxAttributeValue(): SyntaxKind;
3031
reScanJsxToken(): SyntaxKind;
3132
scanJsxToken(): SyntaxKind;
3233
scanJSDocToken(): SyntaxKind;
@@ -817,6 +818,7 @@ namespace ts {
817818
reScanSlashToken,
818819
reScanTemplateToken,
819820
scanJsxIdentifier,
821+
scanJsxAttributeValue,
820822
reScanJsxToken,
821823
scanJsxToken,
822824
scanJSDocToken,
@@ -911,7 +913,7 @@ namespace ts {
911913
return value;
912914
}
913915

914-
function scanString(): string {
916+
function scanString(allowEscapes = true): string {
915917
const quote = text.charCodeAt(pos);
916918
pos++;
917919
let result = "";
@@ -929,7 +931,7 @@ namespace ts {
929931
pos++;
930932
break;
931933
}
932-
if (ch === CharacterCodes.backslash) {
934+
if (ch === CharacterCodes.backslash && allowEscapes) {
933935
result += text.substring(start, pos);
934936
result += scanEscapeSequence();
935937
start = pos;
@@ -1737,6 +1739,20 @@ namespace ts {
17371739
return token;
17381740
}
17391741

1742+
function scanJsxAttributeValue(): SyntaxKind {
1743+
startPos = pos;
1744+
1745+
switch (text.charCodeAt(pos)) {
1746+
case CharacterCodes.doubleQuote:
1747+
case CharacterCodes.singleQuote:
1748+
tokenValue = scanString(/*allowEscapes*/ false);
1749+
return token = SyntaxKind.StringLiteral;
1750+
default:
1751+
// If this scans anything other than `{`, it's a parse error.
1752+
return scan();
1753+
}
1754+
}
1755+
17401756
function scanJSDocToken(): SyntaxKind {
17411757
if (pos >= end) {
17421758
return token = SyntaxKind.EndOfFileToken;

src/compiler/transformers/jsx.ts

Lines changed: 20 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,8 @@ namespace ts {
140140
return createLiteral(true);
141141
}
142142
else if (node.kind === SyntaxKind.StringLiteral) {
143-
return node;
143+
const decoded = tryDecodeEntities((<StringLiteral>node).text);
144+
return decoded ? createLiteral(decoded, /*location*/ node) : node;
144145
}
145146
else if (node.kind === SyntaxKind.JsxExpression) {
146147
return visitJsxExpression(<JsxExpression>node);
@@ -210,19 +211,31 @@ namespace ts {
210211
}
211212

212213
/**
213-
* Decodes JSX entities.
214+
* Replace entities like "&nbsp;", "&#123;", and "&#xDEADBEEF;" with the characters they encode.
215+
* See https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references
214216
*/
215-
function decodeEntities(text: string) {
216-
return text.replace(/&(\w+);/g, function(s: any, m: string) {
217-
if (entities[m] !== undefined) {
218-
return String.fromCharCode(entities[m]);
217+
function decodeEntities(text: string): string {
218+
return text.replace(/&((#((\d+)|x([\da-fA-F]+)))|(\w+));/g, (match, _all, _number, _digits, decimal, hex, word) => {
219+
if (decimal) {
220+
return String.fromCharCode(parseInt(decimal, 10));
221+
}
222+
else if (hex) {
223+
return String.fromCharCode(parseInt(hex, 16));
219224
}
220225
else {
221-
return s;
226+
const ch = entities[word];
227+
// If this is not a valid entity, then just use `match` (replace it with itself, i.e. don't replace)
228+
return ch ? String.fromCharCode(ch) : match;
222229
}
223230
});
224231
}
225232

233+
/** Like `decodeEntities` but returns `undefined` if there were no entities to decode. */
234+
function tryDecodeEntities(text: string): string | undefined {
235+
const decoded = decodeEntities(text);
236+
return decoded === text ? undefined : decoded;
237+
}
238+
226239
function getTagName(node: JsxElement | JsxOpeningLikeElement): Expression {
227240
if (node.kind === SyntaxKind.JsxElement) {
228241
return getTagName((<JsxElement>node).openingElement);

tests/baselines/reference/tsxReactEmitEntities.js

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,27 @@ declare var React: any;
99

1010
<div>Dot goes here: &middot; &notAnEntity; </div>;
1111
<div>Be careful of &quot;-ed strings!</div>;
12+
<div>&#0123;&#123;braces&#x7d;&#x7D;</div>;
13+
// Escapes do nothing
14+
<div>\n</div>;
15+
16+
// Also works in string literal attributes
17+
<div attr="&#0123;&hellip;&#x7D;\"></div>;
18+
// Does not happen for a string literal that happens to be inside an attribute (and escapes then work)
19+
<div attr={"&#0123;&hellip;&#x7D;\""}></div>;
20+
// Preserves single quotes
21+
<div attr='"'></div>
1222

1323

1424
//// [file.js]
1525
React.createElement("div", null, "Dot goes here: \u00B7 &notAnEntity; ");
1626
React.createElement("div", null, "Be careful of \"-ed strings!");
27+
React.createElement("div", null, "{{braces}}");
28+
// Escapes do nothing
29+
React.createElement("div", null, "\\n");
30+
// Also works in string literal attributes
31+
React.createElement("div", { attr: "{\u2026}\\" });
32+
// Does not happen for a string literal that happens to be inside an attribute (and escapes then work)
33+
React.createElement("div", { attr: "&#0123;&hellip;&#x7D;\"" });
34+
// Preserves single quotes
35+
React.createElement("div", { attr: '"' });

tests/baselines/reference/tsxReactEmitEntities.symbols

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,3 +23,30 @@ declare var React: any;
2323
>div : Symbol(JSX.IntrinsicElements, Decl(file.tsx, 1, 22))
2424
>div : Symbol(JSX.IntrinsicElements, Decl(file.tsx, 1, 22))
2525

26+
<div>&#0123;&#123;braces&#x7d;&#x7D;</div>;
27+
>div : Symbol(JSX.IntrinsicElements, Decl(file.tsx, 1, 22))
28+
>div : Symbol(JSX.IntrinsicElements, Decl(file.tsx, 1, 22))
29+
30+
// Escapes do nothing
31+
<div>\n</div>;
32+
>div : Symbol(JSX.IntrinsicElements, Decl(file.tsx, 1, 22))
33+
>div : Symbol(JSX.IntrinsicElements, Decl(file.tsx, 1, 22))
34+
35+
// Also works in string literal attributes
36+
<div attr="&#0123;&hellip;&#x7D;\"></div>;
37+
>div : Symbol(JSX.IntrinsicElements, Decl(file.tsx, 1, 22))
38+
>attr : Symbol(unknown)
39+
>div : Symbol(JSX.IntrinsicElements, Decl(file.tsx, 1, 22))
40+
41+
// Does not happen for a string literal that happens to be inside an attribute (and escapes then work)
42+
<div attr={"&#0123;&hellip;&#x7D;\""}></div>;
43+
>div : Symbol(JSX.IntrinsicElements, Decl(file.tsx, 1, 22))
44+
>attr : Symbol(unknown)
45+
>div : Symbol(JSX.IntrinsicElements, Decl(file.tsx, 1, 22))
46+
47+
// Preserves single quotes
48+
<div attr='"'></div>
49+
>div : Symbol(JSX.IntrinsicElements, Decl(file.tsx, 1, 22))
50+
>attr : Symbol(unknown)
51+
>div : Symbol(JSX.IntrinsicElements, Decl(file.tsx, 1, 22))
52+

tests/baselines/reference/tsxReactEmitEntities.types

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,36 @@ declare var React: any;
2525
>div : any
2626
>div : any
2727

28+
<div>&#0123;&#123;braces&#x7d;&#x7D;</div>;
29+
><div>&#0123;&#123;braces&#x7d;&#x7D;</div> : JSX.Element
30+
>div : any
31+
>div : any
32+
33+
// Escapes do nothing
34+
<div>\n</div>;
35+
><div>\n</div> : JSX.Element
36+
>div : any
37+
>div : any
38+
39+
// Also works in string literal attributes
40+
<div attr="&#0123;&hellip;&#x7D;\"></div>;
41+
><div attr="&#0123;&hellip;&#x7D;\"></div> : JSX.Element
42+
>div : any
43+
>attr : any
44+
>div : any
45+
46+
// Does not happen for a string literal that happens to be inside an attribute (and escapes then work)
47+
<div attr={"&#0123;&hellip;&#x7D;\""}></div>;
48+
><div attr={"&#0123;&hellip;&#x7D;\""}></div> : JSX.Element
49+
>div : any
50+
>attr : any
51+
>"&#0123;&hellip;&#x7D;\"" : string
52+
>div : any
53+
54+
// Preserves single quotes
55+
<div attr='"'></div>
56+
><div attr='"'></div> : JSX.Element
57+
>div : any
58+
>attr : any
59+
>div : any
60+

tests/cases/conformance/jsx/tsxReactEmitEntities.tsx

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,3 +10,13 @@ declare var React: any;
1010

1111
<div>Dot goes here: &middot; &notAnEntity; </div>;
1212
<div>Be careful of &quot;-ed strings!</div>;
13+
<div>&#0123;&#123;braces&#x7d;&#x7D;</div>;
14+
// Escapes do nothing
15+
<div>\n</div>;
16+
17+
// Also works in string literal attributes
18+
<div attr="&#0123;&hellip;&#x7D;\"></div>;
19+
// Does not happen for a string literal that happens to be inside an attribute (and escapes then work)
20+
<div attr={"&#0123;&hellip;&#x7D;\""}></div>;
21+
// Preserves single quotes
22+
<div attr='"'></div>

0 commit comments

Comments
 (0)