Skip to content

Commit 3ab3e24

Browse files
committed
✨ allow surrogate pairs in capture group names (fixes #10)
1 parent b4a2ad2 commit 3ab3e24

File tree

4 files changed

+523
-24
lines changed

4 files changed

+523
-24
lines changed

src/unicode/index.ts

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,3 +119,15 @@ export function digitToInt(code: number): number {
119119
}
120120
return code - DigitZero
121121
}
122+
123+
export function isLeadSurrogate(code: number): boolean {
124+
return code >= 0xd800 && code <= 0xdbff
125+
}
126+
127+
export function isTrailSurrogate(code: number): boolean {
128+
return code >= 0xdc00 && code <= 0xdfff
129+
}
130+
131+
export function combineSurrogatePair(lead: number, trail: number): number {
132+
return (lead - 0xd800) * 0x400 + (trail - 0xdc00) + 0x10000
133+
}

src/validator.ts

Lines changed: 51 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -20,16 +20,6 @@ import {
2020
FullStop,
2121
GreaterThanSign,
2222
HyphenMinus,
23-
isDecimalDigit,
24-
isHexDigit,
25-
isIdContinue,
26-
isIdStart,
27-
isLatinLetter,
28-
isLineTerminator,
29-
isOctalDigit,
30-
isValidLoneUnicodeProperty,
31-
isValidUnicodeProperty,
32-
isValidUnicode,
3323
LatinCapitalLetterB,
3424
LatinCapitalLetterD,
3525
LatinCapitalLetterP,
@@ -70,6 +60,19 @@ import {
7060
VerticalLine,
7161
ZeroWidthJoiner,
7262
ZeroWidthNonJoiner,
63+
combineSurrogatePair,
64+
isDecimalDigit,
65+
isHexDigit,
66+
isIdContinue,
67+
isIdStart,
68+
isLatinLetter,
69+
isLeadSurrogate,
70+
isLineTerminator,
71+
isOctalDigit,
72+
isTrailSurrogate,
73+
isValidLoneUnicodeProperty,
74+
isValidUnicodeProperty,
75+
isValidUnicode,
7376
} from "./unicode"
7477

7578
function isSyntaxCharacter(cp: number): boolean {
@@ -1861,18 +1864,31 @@ export class RegExpValidator {
18611864
* UnicodeIDStart
18621865
* `$`
18631866
* `_`
1864-
* `\` RegExpUnicodeEscapeSequence[?U]
1867+
* `\` RegExpUnicodeEscapeSequence[+U]
1868+
* [~U] UnicodeLeadSurrogate UnicodeTrailSurrogate
18651869
* ```
18661870
* @returns `true` if it ate the next characters successfully.
18671871
*/
18681872
private eatRegExpIdentifierStart(): boolean {
18691873
const start = this.index
1874+
const forceUFlag = !this._uFlag && this.ecmaVersion >= 2020
18701875
let cp = this.currentCodePoint
18711876
this.advance()
18721877

1873-
if (cp === ReverseSolidus && this.eatRegExpUnicodeEscapeSequence()) {
1878+
if (
1879+
cp === ReverseSolidus &&
1880+
this.eatRegExpUnicodeEscapeSequence(forceUFlag)
1881+
) {
18741882
cp = this._lastIntValue
1883+
} else if (
1884+
forceUFlag &&
1885+
isLeadSurrogate(cp) &&
1886+
isTrailSurrogate(this.currentCodePoint)
1887+
) {
1888+
cp = combineSurrogatePair(cp, this.currentCodePoint)
1889+
this.advance()
18751890
}
1891+
18761892
if (isRegExpIdentifierStart(cp)) {
18771893
this._lastIntValue = cp
18781894
return true
@@ -1893,20 +1909,33 @@ export class RegExpValidator {
18931909
* UnicodeIDContinue
18941910
* `$`
18951911
* `_`
1896-
* `\` RegExpUnicodeEscapeSequence[?U]
1912+
* `\` RegExpUnicodeEscapeSequence[+U]
1913+
* [~U] UnicodeLeadSurrogate UnicodeTrailSurrogate
18971914
* <ZWNJ>
18981915
* <ZWJ>
18991916
* ```
19001917
* @returns `true` if it ate the next characters successfully.
19011918
*/
19021919
private eatRegExpIdentifierPart(): boolean {
19031920
const start = this.index
1921+
const forceUFlag = !this._uFlag && this.ecmaVersion >= 2020
19041922
let cp = this.currentCodePoint
19051923
this.advance()
19061924

1907-
if (cp === ReverseSolidus && this.eatRegExpUnicodeEscapeSequence()) {
1925+
if (
1926+
cp === ReverseSolidus &&
1927+
this.eatRegExpUnicodeEscapeSequence(forceUFlag)
1928+
) {
19081929
cp = this._lastIntValue
1930+
} else if (
1931+
forceUFlag &&
1932+
isLeadSurrogate(cp) &&
1933+
isTrailSurrogate(this.currentCodePoint)
1934+
) {
1935+
cp = combineSurrogatePair(cp, this.currentCodePoint)
1936+
this.advance()
19091937
}
1938+
19101939
if (isRegExpIdentifierPart(cp)) {
19111940
this._lastIntValue = cp
19121941
return true
@@ -2027,19 +2056,19 @@ export class RegExpValidator {
20272056
* ```
20282057
* @returns `true` if it ate the next characters successfully.
20292058
*/
2030-
private eatRegExpUnicodeEscapeSequence(): boolean {
2059+
private eatRegExpUnicodeEscapeSequence(forceUFlag = false): boolean {
20312060
const start = this.index
2061+
const uFlag = forceUFlag || this._uFlag
20322062

20332063
if (this.eat(LatinSmallLetterU)) {
20342064
if (
2035-
(this._uFlag && this.eatRegExpUnicodeSurrogatePairEscape()) ||
2065+
(uFlag && this.eatRegExpUnicodeSurrogatePairEscape()) ||
20362066
this.eatFixedHexDigits(4) ||
2037-
(this._uFlag && this.eatRegExpUnicodeCodePointEscape())
2067+
(uFlag && this.eatRegExpUnicodeCodePointEscape())
20382068
) {
20392069
return true
20402070
}
2041-
2042-
if (this.strict || this._uFlag) {
2071+
if (this.strict || uFlag) {
20432072
this.raise("Invalid unicode escape")
20442073
}
20452074
this.rewind(start)
@@ -2062,16 +2091,14 @@ export class RegExpValidator {
20622091
if (this.eatFixedHexDigits(4)) {
20632092
const lead = this._lastIntValue
20642093
if (
2065-
lead >= 0xd800 &&
2066-
lead <= 0xdbff &&
2094+
isLeadSurrogate(lead) &&
20672095
this.eat(ReverseSolidus) &&
20682096
this.eat(LatinSmallLetterU) &&
20692097
this.eatFixedHexDigits(4)
20702098
) {
20712099
const trail = this._lastIntValue
2072-
if (trail >= 0xdc00 && trail <= 0xdfff) {
2073-
this._lastIntValue =
2074-
(lead - 0xd800) * 0x400 + (trail - 0xdc00) + 0x10000
2100+
if (isTrailSurrogate(trail)) {
2101+
this._lastIntValue = combineSurrogatePair(lead, trail)
20752102
return true
20762103
}
20772104
}
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
{
2+
"options": {
3+
"ecmaVersion": 2020,
4+
"strict": false
5+
},
6+
"patterns": {
7+
"/(?<\\ud83d\\ude80>.)/": {
8+
"error": {
9+
"message": "Invalid regular expression: /(?<\\ud83d\\ude80>.)/: Invalid capture group name",
10+
"index": 4
11+
}
12+
},
13+
"/(?<\\ud83d\\ude80>.)/u": {
14+
"error": {
15+
"message": "Invalid regular expression: /(?<\\ud83d\\ude80>.)/u: Invalid capture group name",
16+
"index": 4
17+
}
18+
},
19+
"/(?<\\u{1f680}>.)/": {
20+
"error": {
21+
"message": "Invalid regular expression: /(?<\\u{1f680}>.)/: Invalid capture group name",
22+
"index": 4
23+
}
24+
},
25+
"/(?<\\u{1f680}>.)/u": {
26+
"error": {
27+
"message": "Invalid regular expression: /(?<\\u{1f680}>.)/u: Invalid capture group name",
28+
"index": 4
29+
}
30+
},
31+
"/(?<🚀>.)/": {
32+
"error": {
33+
"message": "Invalid regular expression: /(?<🚀>.)/: Invalid capture group name",
34+
"index": 4
35+
}
36+
},
37+
"/(?<🚀>.)/u": {
38+
"error": {
39+
"message": "Invalid regular expression: /(?<🚀>.)/u: Invalid capture group name",
40+
"index": 4
41+
}
42+
}
43+
}
44+
}

0 commit comments

Comments
 (0)