Skip to content

Commit f333684

Browse files
authored
Fix unicode escapes in jsx identifiers and extended unicode characters in jsdoc (microsoft#32716)
* Fix unicode escapes in jsx identifiers and extended unicode characters in jsdoc * Support unicode escapes in JSDoc * Add tests for extended escapes
1 parent 480b739 commit f333684

20 files changed

+455
-13
lines changed

src/compiler/parser.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7305,10 +7305,14 @@ namespace ts {
73057305
return createMissingNode<Identifier>(SyntaxKind.Identifier, /*reportAtCurrentPosition*/ !message, message || Diagnostics.Identifier_expected);
73067306
}
73077307

7308+
identifierCount++;
73087309
const pos = scanner.getTokenPos();
73097310
const end = scanner.getTextPos();
73107311
const result = <Identifier>createNode(SyntaxKind.Identifier, pos);
7311-
result.escapedText = escapeLeadingUnderscores(scanner.getTokenText());
7312+
if (token() !== SyntaxKind.Identifier) {
7313+
result.originalKeywordKind = token();
7314+
}
7315+
result.escapedText = escapeLeadingUnderscores(internIdentifier(scanner.getTokenValue()));
73127316
finishNode(result, end);
73137317

73147318
nextTokenJSDoc();

src/compiler/scanner.ts

Lines changed: 38 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1015,7 +1015,7 @@ namespace ts {
10151015
}
10161016

10171017
function checkForIdentifierStartAfterNumericLiteral(numericStart: number, isScientific?: boolean) {
1018-
if (!isIdentifierStart(text.charCodeAt(pos), languageVersion)) {
1018+
if (!isIdentifierStart(codePointAt(text, pos), languageVersion)) {
10191019
return;
10201020
}
10211021

@@ -2063,17 +2063,22 @@ namespace ts {
20632063
// they allow dashes
20642064
function scanJsxIdentifier(): SyntaxKind {
20652065
if (tokenIsIdentifierOrKeyword(token)) {
2066-
const firstCharPosition = pos;
2066+
// An identifier or keyword has already been parsed - check for a `-` and then append it and everything after it to the token
2067+
// Do note that this means that `scanJsxIdentifier` effectively _mutates_ the visible token without advancing to a new token
2068+
// Any caller should be expecting this behavior and should only read the pos or token value after calling it.
20672069
while (pos < end) {
20682070
const ch = text.charCodeAt(pos);
2069-
if (ch === CharacterCodes.minus || ((firstCharPosition === pos) ? isIdentifierStart(ch, languageVersion) : isIdentifierPart(ch, languageVersion))) {
2071+
if (ch === CharacterCodes.minus) {
2072+
tokenValue += "-";
20702073
pos++;
2074+
continue;
20712075
}
2072-
else {
2076+
const oldPos = pos;
2077+
tokenValue += scanIdentifierParts(); // reuse `scanIdentifierParts` so unicode escapes are handled
2078+
if (pos === oldPos) {
20732079
break;
20742080
}
20752081
}
2076-
tokenValue += text.substring(firstCharPosition, pos);
20772082
}
20782083
return token;
20792084
}
@@ -2099,8 +2104,8 @@ namespace ts {
20992104
return token = SyntaxKind.EndOfFileToken;
21002105
}
21012106

2102-
const ch = text.charCodeAt(pos);
2103-
pos++;
2107+
const ch = codePointAt(text, pos);
2108+
pos += charSize(ch);
21042109
switch (ch) {
21052110
case CharacterCodes.tab:
21062111
case CharacterCodes.verticalTab:
@@ -2138,13 +2143,34 @@ namespace ts {
21382143
return token = SyntaxKind.DotToken;
21392144
case CharacterCodes.backtick:
21402145
return token = SyntaxKind.BacktickToken;
2141-
}
2146+
case CharacterCodes.backslash:
2147+
pos--;
2148+
const extendedCookedChar = peekExtendedUnicodeEscape();
2149+
if (extendedCookedChar >= 0 && isIdentifierStart(extendedCookedChar, languageVersion)) {
2150+
pos += 3;
2151+
tokenFlags |= TokenFlags.ExtendedUnicodeEscape;
2152+
tokenValue = scanExtendedUnicodeEscape() + scanIdentifierParts();
2153+
return token = getIdentifierToken();
2154+
}
21422155

2143-
if (isIdentifierStart(ch, ScriptTarget.Latest)) {
2144-
while (isIdentifierPart(text.charCodeAt(pos), ScriptTarget.Latest) && pos < end) {
2156+
const cookedChar = peekUnicodeEscape();
2157+
if (cookedChar >= 0 && isIdentifierStart(cookedChar, languageVersion)) {
2158+
pos += 6;
2159+
tokenValue = String.fromCharCode(cookedChar) + scanIdentifierParts();
2160+
return token = getIdentifierToken();
2161+
}
2162+
error(Diagnostics.Invalid_character);
21452163
pos++;
2146-
}
2164+
return token = SyntaxKind.Unknown;
2165+
}
2166+
2167+
if (isIdentifierStart(ch, languageVersion)) {
2168+
let char = ch;
2169+
while (pos < end && isIdentifierPart(char = codePointAt(text, pos), languageVersion)) pos += charSize(char);
21472170
tokenValue = text.substring(tokenPos, pos);
2171+
if (char === CharacterCodes.backslash) {
2172+
tokenValue += scanIdentifierParts();
2173+
}
21482174
return token = getIdentifierToken();
21492175
}
21502176
else {
@@ -2265,7 +2291,7 @@ namespace ts {
22652291

22662292
/* @internal */
22672293
function charSize(ch: number) {
2268-
if (ch > 0x10000) {
2294+
if (ch >= 0x10000) {
22692295
return 2;
22702296
}
22712297
return 1;

tests/baselines/reference/JSDocParsing/DocComments.parsesCorrectly.leadingAsterisk.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
"end": 13,
1818
"modifierFlagsCache": 0,
1919
"transformFlags": 0,
20+
"originalKeywordKind": "TypeKeyword",
2021
"escapedText": "type"
2122
},
2223
"typeExpression": {

tests/baselines/reference/JSDocParsing/DocComments.parsesCorrectly.noLeadingAsterisk.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
"end": 13,
1818
"modifierFlagsCache": 0,
1919
"transformFlags": 0,
20+
"originalKeywordKind": "TypeKeyword",
2021
"escapedText": "type"
2122
},
2223
"typeExpression": {

tests/baselines/reference/JSDocParsing/DocComments.parsesCorrectly.noReturnType.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
"end": 15,
1818
"modifierFlagsCache": 0,
1919
"transformFlags": 0,
20+
"originalKeywordKind": "ReturnKeyword",
2021
"escapedText": "return"
2122
}
2223
},

tests/baselines/reference/JSDocParsing/DocComments.parsesCorrectly.returnTag1.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
"end": 15,
1818
"modifierFlagsCache": 0,
1919
"transformFlags": 0,
20+
"originalKeywordKind": "ReturnKeyword",
2021
"escapedText": "return"
2122
},
2223
"typeExpression": {

tests/baselines/reference/JSDocParsing/DocComments.parsesCorrectly.returnTag2.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
"end": 15,
1818
"modifierFlagsCache": 0,
1919
"transformFlags": 0,
20+
"originalKeywordKind": "ReturnKeyword",
2021
"escapedText": "return"
2122
},
2223
"typeExpression": {

tests/baselines/reference/JSDocParsing/DocComments.parsesCorrectly.typeTag.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
"end": 13,
1818
"modifierFlagsCache": 0,
1919
"transformFlags": 0,
20+
"originalKeywordKind": "TypeKeyword",
2021
"escapedText": "type"
2122
},
2223
"typeExpression": {
Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
//// [file.js]
2+
/**
3+
* Adds
4+
* @param {number} 𝑚
5+
* @param {number} 𝑀
6+
*/
7+
function foo(𝑚, 𝑀) {
8+
console.log(𝑀 + 𝑚);
9+
}
10+
11+
//// [file.js]
12+
/**
13+
* Adds
14+
* @param {number} 𝑚
15+
* @param {number} 𝑀
16+
*/
17+
function foo(𝑚, 𝑀) {
18+
console.log(𝑀 + 𝑚);
19+
}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
=== tests/cases/compiler/file.js ===
2+
/**
3+
* Adds
4+
* @param {number} 𝑚
5+
* @param {number} 𝑀
6+
*/
7+
function foo(𝑚, 𝑀) {
8+
>foo : Symbol(foo, Decl(file.js, 0, 0))
9+
>𝑚 : Symbol(𝑚, Decl(file.js, 5, 13))
10+
>𝑀 : Symbol(𝑀, Decl(file.js, 5, 16))
11+
12+
console.log(𝑀 + 𝑚);
13+
>console.log : Symbol(Console.log, Decl(lib.dom.d.ts, --, --))
14+
>console : Symbol(console, Decl(lib.dom.d.ts, --, --))
15+
>log : Symbol(Console.log, Decl(lib.dom.d.ts, --, --))
16+
>𝑀 : Symbol(𝑀, Decl(file.js, 5, 16))
17+
>𝑚 : Symbol(𝑚, Decl(file.js, 5, 13))
18+
}

0 commit comments

Comments
 (0)