@@ -20,16 +20,6 @@ import {
20
20
FullStop ,
21
21
GreaterThanSign ,
22
22
HyphenMinus ,
23
- isDecimalDigit ,
24
- isHexDigit ,
25
- isIdContinue ,
26
- isIdStart ,
27
- isLatinLetter ,
28
- isLineTerminator ,
29
- isOctalDigit ,
30
- isValidLoneUnicodeProperty ,
31
- isValidUnicodeProperty ,
32
- isValidUnicode ,
33
23
LatinCapitalLetterB ,
34
24
LatinCapitalLetterD ,
35
25
LatinCapitalLetterP ,
@@ -70,6 +60,19 @@ import {
70
60
VerticalLine ,
71
61
ZeroWidthJoiner ,
72
62
ZeroWidthNonJoiner ,
63
+ combineSurrogatePair ,
64
+ isDecimalDigit ,
65
+ isHexDigit ,
66
+ isIdContinue ,
67
+ isIdStart ,
68
+ isLatinLetter ,
69
+ isLeadSurrogate ,
70
+ isLineTerminator ,
71
+ isOctalDigit ,
72
+ isTrailSurrogate ,
73
+ isValidLoneUnicodeProperty ,
74
+ isValidUnicodeProperty ,
75
+ isValidUnicode ,
73
76
} from "./unicode"
74
77
75
78
function isSyntaxCharacter ( cp : number ) : boolean {
@@ -1861,18 +1864,31 @@ export class RegExpValidator {
1861
1864
* UnicodeIDStart
1862
1865
* `$`
1863
1866
* `_`
1864
- * `\` RegExpUnicodeEscapeSequence[?U]
1867
+ * `\` RegExpUnicodeEscapeSequence[+U]
1868
+ * [~U] UnicodeLeadSurrogate UnicodeTrailSurrogate
1865
1869
* ```
1866
1870
* @returns `true` if it ate the next characters successfully.
1867
1871
*/
1868
1872
private eatRegExpIdentifierStart ( ) : boolean {
1869
1873
const start = this . index
1874
+ const forceUFlag = ! this . _uFlag && this . ecmaVersion >= 2020
1870
1875
let cp = this . currentCodePoint
1871
1876
this . advance ( )
1872
1877
1873
- if ( cp === ReverseSolidus && this . eatRegExpUnicodeEscapeSequence ( ) ) {
1878
+ if (
1879
+ cp === ReverseSolidus &&
1880
+ this . eatRegExpUnicodeEscapeSequence ( forceUFlag )
1881
+ ) {
1874
1882
cp = this . _lastIntValue
1883
+ } else if (
1884
+ forceUFlag &&
1885
+ isLeadSurrogate ( cp ) &&
1886
+ isTrailSurrogate ( this . currentCodePoint )
1887
+ ) {
1888
+ cp = combineSurrogatePair ( cp , this . currentCodePoint )
1889
+ this . advance ( )
1875
1890
}
1891
+
1876
1892
if ( isRegExpIdentifierStart ( cp ) ) {
1877
1893
this . _lastIntValue = cp
1878
1894
return true
@@ -1893,20 +1909,33 @@ export class RegExpValidator {
1893
1909
* UnicodeIDContinue
1894
1910
* `$`
1895
1911
* `_`
1896
- * `\` RegExpUnicodeEscapeSequence[?U]
1912
+ * `\` RegExpUnicodeEscapeSequence[+U]
1913
+ * [~U] UnicodeLeadSurrogate UnicodeTrailSurrogate
1897
1914
* <ZWNJ>
1898
1915
* <ZWJ>
1899
1916
* ```
1900
1917
* @returns `true` if it ate the next characters successfully.
1901
1918
*/
1902
1919
private eatRegExpIdentifierPart ( ) : boolean {
1903
1920
const start = this . index
1921
+ const forceUFlag = ! this . _uFlag && this . ecmaVersion >= 2020
1904
1922
let cp = this . currentCodePoint
1905
1923
this . advance ( )
1906
1924
1907
- if ( cp === ReverseSolidus && this . eatRegExpUnicodeEscapeSequence ( ) ) {
1925
+ if (
1926
+ cp === ReverseSolidus &&
1927
+ this . eatRegExpUnicodeEscapeSequence ( forceUFlag )
1928
+ ) {
1908
1929
cp = this . _lastIntValue
1930
+ } else if (
1931
+ forceUFlag &&
1932
+ isLeadSurrogate ( cp ) &&
1933
+ isTrailSurrogate ( this . currentCodePoint )
1934
+ ) {
1935
+ cp = combineSurrogatePair ( cp , this . currentCodePoint )
1936
+ this . advance ( )
1909
1937
}
1938
+
1910
1939
if ( isRegExpIdentifierPart ( cp ) ) {
1911
1940
this . _lastIntValue = cp
1912
1941
return true
@@ -2027,19 +2056,19 @@ export class RegExpValidator {
2027
2056
* ```
2028
2057
* @returns `true` if it ate the next characters successfully.
2029
2058
*/
2030
- private eatRegExpUnicodeEscapeSequence ( ) : boolean {
2059
+ private eatRegExpUnicodeEscapeSequence ( forceUFlag = false ) : boolean {
2031
2060
const start = this . index
2061
+ const uFlag = forceUFlag || this . _uFlag
2032
2062
2033
2063
if ( this . eat ( LatinSmallLetterU ) ) {
2034
2064
if (
2035
- ( this . _uFlag && this . eatRegExpUnicodeSurrogatePairEscape ( ) ) ||
2065
+ ( uFlag && this . eatRegExpUnicodeSurrogatePairEscape ( ) ) ||
2036
2066
this . eatFixedHexDigits ( 4 ) ||
2037
- ( this . _uFlag && this . eatRegExpUnicodeCodePointEscape ( ) )
2067
+ ( uFlag && this . eatRegExpUnicodeCodePointEscape ( ) )
2038
2068
) {
2039
2069
return true
2040
2070
}
2041
-
2042
- if ( this . strict || this . _uFlag ) {
2071
+ if ( this . strict || uFlag ) {
2043
2072
this . raise ( "Invalid unicode escape" )
2044
2073
}
2045
2074
this . rewind ( start )
@@ -2062,16 +2091,14 @@ export class RegExpValidator {
2062
2091
if ( this . eatFixedHexDigits ( 4 ) ) {
2063
2092
const lead = this . _lastIntValue
2064
2093
if (
2065
- lead >= 0xd800 &&
2066
- lead <= 0xdbff &&
2094
+ isLeadSurrogate ( lead ) &&
2067
2095
this . eat ( ReverseSolidus ) &&
2068
2096
this . eat ( LatinSmallLetterU ) &&
2069
2097
this . eatFixedHexDigits ( 4 )
2070
2098
) {
2071
2099
const trail = this . _lastIntValue
2072
- if ( trail >= 0xdc00 && trail <= 0xdfff ) {
2073
- this . _lastIntValue =
2074
- ( lead - 0xd800 ) * 0x400 + ( trail - 0xdc00 ) + 0x10000
2100
+ if ( isTrailSurrogate ( trail ) ) {
2101
+ this . _lastIntValue = combineSurrogatePair ( lead , trail )
2075
2102
return true
2076
2103
}
2077
2104
}
0 commit comments