4
4
enum TokenType {
5
5
AUTOMATIC_SEMICOLON ,
6
6
TEMPLATE_CHARS ,
7
+ TERNARY_QMARK ,
7
8
BINARY_OPERATORS ,
8
9
FUNCTION_SIGNATURE_AUTOMATIC_SEMICOLON ,
9
10
};
10
11
11
12
static void advance (TSLexer * lexer ) { lexer -> advance (lexer , false); }
13
+ static void skip (TSLexer * lexer ) { lexer -> advance (lexer , true); }
14
+
15
+ static bool scan_template_chars (TSLexer * lexer ) {
16
+ lexer -> result_symbol = TEMPLATE_CHARS ;
17
+ for (bool has_content = false;; has_content = true) {
18
+ lexer -> mark_end (lexer );
19
+ switch (lexer -> lookahead ) {
20
+ case '`' :
21
+ return has_content ;
22
+ case '\0' :
23
+ return false;
24
+ case '$' :
25
+ advance (lexer );
26
+ if (lexer -> lookahead == '{' ) return has_content ;
27
+ break ;
28
+ case '\\' :
29
+ return has_content ;
30
+ default :
31
+ advance (lexer );
32
+ }
33
+ }
34
+ }
12
35
13
36
static bool scan_whitespace_and_comments (TSLexer * lexer ) {
14
37
for (;;) {
15
38
while (iswspace (lexer -> lookahead )) {
16
- advance (lexer );
39
+ skip (lexer );
17
40
}
18
41
19
42
if (lexer -> lookahead == '/' ) {
20
- advance (lexer );
43
+ skip (lexer );
21
44
22
45
if (lexer -> lookahead == '/' ) {
23
- advance (lexer );
46
+ skip (lexer );
24
47
while (lexer -> lookahead != 0 && lexer -> lookahead != '\n' ) {
25
- advance (lexer );
48
+ skip (lexer );
26
49
}
27
50
} else if (lexer -> lookahead == '*' ) {
28
- advance (lexer );
51
+ skip (lexer );
29
52
while (lexer -> lookahead != 0 ) {
30
53
if (lexer -> lookahead == '*' ) {
31
- advance (lexer );
54
+ skip (lexer );
32
55
if (lexer -> lookahead == '/' ) {
33
- advance (lexer );
56
+ skip (lexer );
34
57
break ;
35
58
}
36
59
} else {
37
- advance (lexer );
60
+ skip (lexer );
38
61
}
39
62
}
40
63
} else {
@@ -46,121 +69,149 @@ static bool scan_whitespace_and_comments(TSLexer *lexer) {
46
69
}
47
70
}
48
71
49
- static inline bool external_scanner_scan (void * payload , TSLexer * lexer , const bool * valid_symbols ) {
50
- if (valid_symbols [TEMPLATE_CHARS ]) {
51
- if (valid_symbols [AUTOMATIC_SEMICOLON ]) return false;
52
- lexer -> result_symbol = TEMPLATE_CHARS ;
53
- for (bool notfirst = false;; notfirst = true) {
54
- lexer -> mark_end (lexer );
55
- switch (lexer -> lookahead ) {
56
- case '`' :
57
- return notfirst ;
58
- case '\0' :
59
- return false;
60
- case '$' :
61
- advance (lexer );
62
- if (lexer -> lookahead == '{' ) return notfirst ;
63
- break ;
64
- case '\\' :
65
- advance (lexer );
66
- advance (lexer );
67
- break ;
68
- default :
69
- advance (lexer );
70
- }
72
+ static bool scan_automatic_semicolon (TSLexer * lexer , const bool * valid_symbols ){
73
+ lexer -> result_symbol = AUTOMATIC_SEMICOLON ;
74
+ lexer -> mark_end (lexer );
75
+
76
+ for (;;) {
77
+ if (lexer -> lookahead == 0 ) return true;
78
+ if (lexer -> lookahead == '}' ) {
79
+ // Automatic semicolon insertion breaks detection of object patterns
80
+ // in a typed context:
81
+ // type F = ({a}: {a: number}) => number;
82
+ // Therefore, disable automatic semicolons when followed by typing
83
+ do {
84
+ skip (lexer );
85
+ } while (iswspace (lexer -> lookahead ));
86
+ if (lexer -> lookahead == ':' ) return false;
87
+ return true;
71
88
}
72
- } else if (
73
- valid_symbols [AUTOMATIC_SEMICOLON ] ||
74
- valid_symbols [FUNCTION_SIGNATURE_AUTOMATIC_SEMICOLON ]
75
- ) {
76
- lexer -> result_symbol = AUTOMATIC_SEMICOLON ;
77
- lexer -> mark_end (lexer );
89
+ if (!iswspace (lexer -> lookahead )) return false;
90
+ if (lexer -> lookahead == '\n' ) break ;
91
+ skip (lexer );
92
+ }
78
93
79
- for (;;) {
80
- if (lexer -> lookahead == 0 ) return true;
81
- if (lexer -> lookahead == '}' ) {
82
- // Automatic semicolon insertion breaks detection of object patterns
83
- // in a typed context:
84
- // type F = ({a}: {a: number}) => number;
85
- // Therefore, disable automatic semicolons when followed by typing
86
- do {
87
- advance (lexer );
88
- } while (iswspace (lexer -> lookahead ));
89
- if (lexer -> lookahead == ':' ) return false;
90
- return true;
91
- }
92
- if (!iswspace (lexer -> lookahead )) return false;
93
- if (lexer -> lookahead == '\n' ) break ;
94
- advance (lexer );
94
+ skip (lexer );
95
+
96
+ if (!scan_whitespace_and_comments (lexer )) return false;
97
+
98
+ switch (lexer -> lookahead ) {
99
+ case ',' :
100
+ case '.' :
101
+ case ';' :
102
+ case '*' :
103
+ case '%' :
104
+ case '>' :
105
+ case '<' :
106
+ case '=' :
107
+ case '?' :
108
+ case '^' :
109
+ case '|' :
110
+ case '&' :
111
+ case '/' :
112
+ case ':' :
113
+ return false;
114
+
115
+ case '{' :
116
+ if (valid_symbols [FUNCTION_SIGNATURE_AUTOMATIC_SEMICOLON ]) return false;
117
+ break ;
118
+
119
+ // Don't insert a semicolon before a '[' or '(', unless we're parsing
120
+ // a type. Detect whether we're parsing a type or an expression using
121
+ // the validity of a binary operator token.
122
+ case '(' :
123
+ case '[' :
124
+ if (valid_symbols [BINARY_OPERATORS ]) return false;
125
+ break ;
126
+
127
+ // Insert a semicolon before `--` and `++`, but not before binary `+` or `-`.
128
+ case '+' :
129
+ skip (lexer );
130
+ return lexer -> lookahead == '+' ;
131
+ case '-' :
132
+ skip (lexer );
133
+ return lexer -> lookahead == '-' ;
134
+
135
+ // Don't insert a semicolon before `!=`, but do insert one before a unary `!`.
136
+ case '!' :
137
+ skip (lexer );
138
+ return lexer -> lookahead != '=' ;
139
+
140
+ // Don't insert a semicolon before `in` or `instanceof`, but do insert one
141
+ // before an identifier.
142
+ case 'i' :
143
+ skip (lexer );
144
+
145
+ if (lexer -> lookahead != 'n' ) return true;
146
+ skip (lexer );
147
+
148
+ if (!iswalpha (lexer -> lookahead )) return false;
149
+
150
+ for (unsigned i = 0 ; i < 8 ; i ++ ) {
151
+ if (lexer -> lookahead != "stanceof" [i ]) return true;
152
+ skip (lexer );
95
153
}
96
154
97
- advance (lexer );
155
+ if (!iswalpha (lexer -> lookahead )) return false;
156
+ break ;
157
+ }
98
158
99
- if (!scan_whitespace_and_comments (lexer )) return false;
159
+ return true;
160
+ }
100
161
101
- switch (lexer -> lookahead ) {
102
- case ',' :
103
- case '.' :
104
- case ';' :
105
- case '*' :
106
- case '%' :
107
- case '>' :
108
- case '<' :
109
- case '=' :
110
- case '?' :
111
- case '^' :
112
- case '|' :
113
- case '&' :
114
- case '/' :
115
- case ':' :
116
- return false;
162
+ static bool scan_ternary_qmark (TSLexer * lexer ) {
163
+ for (;;) {
164
+ if (!iswspace (lexer -> lookahead )) break ;
165
+ skip (lexer );
166
+ }
117
167
118
- case '{' :
119
- if (valid_symbols [FUNCTION_SIGNATURE_AUTOMATIC_SEMICOLON ]) return false;
120
- break ;
121
-
122
- // Don't insert a semicolon before a '[' or '(', unless we're parsing
123
- // a type. Detect whether we're parsing a type or an expression using
124
- // the validity of a binary operator token.
125
- case '(' :
126
- case '[' :
127
- if (valid_symbols [BINARY_OPERATORS ]) return false;
128
- break ;
129
-
130
- // Insert a semicolon before `--` and `++`, but not before binary `+` or `-`.
131
- case '+' :
132
- advance (lexer );
133
- return lexer -> lookahead == '+' ;
134
- case '-' :
135
- advance (lexer );
136
- return lexer -> lookahead == '-' ;
137
-
138
- // Don't insert a semicolon before `!=`, but do insert one before a unary `!`.
139
- case '!' :
140
- advance (lexer );
141
- return lexer -> lookahead != '=' ;
142
-
143
- // Don't insert a semicolon before `in` or `instanceof`, but do insert one
144
- // before an identifier.
145
- case 'i' :
146
- advance (lexer );
147
-
148
- if (lexer -> lookahead != 'n' ) return true;
149
- advance (lexer );
150
-
151
- if (!iswalpha (lexer -> lookahead )) return false;
152
-
153
- for (unsigned i = 0 ; i < 8 ; i ++ ) {
154
- if (lexer -> lookahead != "stanceof" [i ]) return true;
155
- advance (lexer );
156
- }
168
+ if (lexer -> lookahead == '?' ) {
169
+ advance (lexer );
170
+
171
+ if (lexer -> lookahead == '?' ) return false;
172
+ /* Optional chaining. */
173
+ if (lexer -> lookahead == '.' ) return false;
157
174
158
- if (!iswalpha (lexer -> lookahead )) return false;
159
- break ;
175
+ /* TypeScript optional arguments contain the ?: sequence, possibly
176
+ with whitespace. */
177
+ for (;;) {
178
+ if (!iswspace (lexer -> lookahead )) break ;
179
+ skip (lexer );
160
180
}
181
+ if (lexer -> lookahead == ':' ) return false;
182
+ if (lexer -> lookahead == ')' ) return false;
183
+ if (lexer -> lookahead == ',' ) return false;
184
+
185
+ lexer -> mark_end (lexer );
186
+ lexer -> result_symbol = TERNARY_QMARK ;
161
187
188
+ if (lexer -> lookahead == '.' ) {
189
+ advance (lexer );
190
+ if (iswdigit (lexer -> lookahead )) return true;
191
+ return false;
192
+ }
162
193
return true;
163
- } else {
164
- return false;
165
194
}
195
+ return false;
196
+ }
197
+
198
+ static inline bool external_scanner_scan (void * payload , TSLexer * lexer , const bool * valid_symbols ) {
199
+ if (valid_symbols [TEMPLATE_CHARS ]) {
200
+ if (valid_symbols [AUTOMATIC_SEMICOLON ]) return false;
201
+ return scan_template_chars (lexer );
202
+ } else if (
203
+ valid_symbols [AUTOMATIC_SEMICOLON ] ||
204
+ valid_symbols [FUNCTION_SIGNATURE_AUTOMATIC_SEMICOLON ]
205
+ ) {
206
+ bool ret = scan_automatic_semicolon (lexer , valid_symbols );
207
+ if (!ret && valid_symbols [TERNARY_QMARK ] && lexer -> lookahead == '?' )
208
+ return scan_ternary_qmark (lexer );
209
+ return ret ;
210
+ }
211
+ if (valid_symbols [TERNARY_QMARK ]) {
212
+ return scan_ternary_qmark (lexer );
213
+ }
214
+
215
+ return false;
216
+
166
217
}
0 commit comments