Skip to content

Commit b3b5d84

Browse files
committed
Support ternary_qmark and reorganize external scanner
1 parent 1517cff commit b3b5d84

File tree

2 files changed

+134
-97
lines changed

2 files changed

+134
-97
lines changed

common/scanner.h

Lines changed: 133 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,13 @@
44
enum TokenType {
55
AUTOMATIC_SEMICOLON,
66
TEMPLATE_CHARS,
7+
TERNARY_QMARK,
78
BINARY_OPERATORS,
89
FUNCTION_SIGNATURE_AUTOMATIC_SEMICOLON,
910
};
1011

1112
static void advance(TSLexer *lexer) { lexer->advance(lexer, false); }
13+
static void skip(TSLexer *lexer) { lexer->advance(lexer, true); }
1214

1315
static bool scan_template_chars(TSLexer *lexer) {
1416
lexer->result_symbol = TEMPLATE_CHARS;
@@ -34,28 +36,28 @@ static bool scan_template_chars(TSLexer *lexer) {
3436
static bool scan_whitespace_and_comments(TSLexer *lexer) {
3537
for (;;) {
3638
while (iswspace(lexer->lookahead)) {
37-
advance(lexer);
39+
skip(lexer);
3840
}
3941

4042
if (lexer->lookahead == '/') {
41-
advance(lexer);
43+
skip(lexer);
4244

4345
if (lexer->lookahead == '/') {
44-
advance(lexer);
46+
skip(lexer);
4547
while (lexer->lookahead != 0 && lexer->lookahead != '\n') {
46-
advance(lexer);
48+
skip(lexer);
4749
}
4850
} else if (lexer->lookahead == '*') {
49-
advance(lexer);
51+
skip(lexer);
5052
while (lexer->lookahead != 0) {
5153
if (lexer->lookahead == '*') {
52-
advance(lexer);
54+
skip(lexer);
5355
if (lexer->lookahead == '/') {
54-
advance(lexer);
56+
skip(lexer);
5557
break;
5658
}
5759
} else {
58-
advance(lexer);
60+
skip(lexer);
5961
}
6062
}
6163
} else {
@@ -67,102 +69,137 @@ static bool scan_whitespace_and_comments(TSLexer *lexer) {
6769
}
6870
}
6971

70-
static inline bool external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
71-
if (valid_symbols[TEMPLATE_CHARS]) {
72-
if (valid_symbols[AUTOMATIC_SEMICOLON]) return false;
73-
return scan_template_chars(lexer);
74-
} else if (
75-
valid_symbols[AUTOMATIC_SEMICOLON] ||
76-
valid_symbols[FUNCTION_SIGNATURE_AUTOMATIC_SEMICOLON]
77-
) {
78-
lexer->result_symbol = AUTOMATIC_SEMICOLON;
79-
lexer->mark_end(lexer);
72+
static bool scan_automatic_semicolon(TSLexer *lexer, const bool *valid_symbols){
73+
lexer->result_symbol = AUTOMATIC_SEMICOLON;
74+
lexer->mark_end(lexer);
8075

81-
for (;;) {
82-
if (lexer->lookahead == 0) return true;
83-
if (lexer->lookahead == '}') {
84-
// Automatic semicolon insertion breaks detection of object patterns
85-
// in a typed context:
86-
// type F = ({a}: {a: number}) => number;
87-
// Therefore, disable automatic semicolons when followed by typing
88-
do {
89-
advance(lexer);
90-
} while (iswspace(lexer->lookahead));
91-
if (lexer->lookahead == ':') return false;
92-
return true;
93-
}
94-
if (!iswspace(lexer->lookahead)) return false;
95-
if (lexer->lookahead == '\n') break;
96-
advance(lexer);
76+
for (;;) {
77+
if (lexer->lookahead == 0) return true;
78+
if (lexer->lookahead == '}') {
79+
// Automatic semicolon insertion breaks detection of object patterns
80+
// in a typed context:
81+
// type F = ({a}: {a: number}) => number;
82+
// Therefore, disable automatic semicolons when followed by typing
83+
do {
84+
skip(lexer);
85+
} while (iswspace(lexer->lookahead));
86+
if (lexer->lookahead == ':') return false;
87+
return true;
9788
}
89+
if (!iswspace(lexer->lookahead)) return false;
90+
if (lexer->lookahead == '\n') break;
91+
skip(lexer);
92+
}
9893

99-
advance(lexer);
94+
skip(lexer);
95+
96+
if (!scan_whitespace_and_comments(lexer)) return false;
97+
98+
switch (lexer->lookahead) {
99+
case ',':
100+
case '.':
101+
case ';':
102+
case '*':
103+
case '%':
104+
case '>':
105+
case '<':
106+
case '=':
107+
case '?':
108+
case '^':
109+
case '|':
110+
case '&':
111+
case '/':
112+
case ':':
113+
return false;
100114

101-
if (!scan_whitespace_and_comments(lexer)) return false;
115+
case '{':
116+
if (valid_symbols[FUNCTION_SIGNATURE_AUTOMATIC_SEMICOLON]) return false;
117+
break;
118+
119+
// Don't insert a semicolon before a '[' or '(', unless we're parsing
120+
// a type. Detect whether we're parsing a type or an expression using
121+
// the validity of a binary operator token.
122+
case '(':
123+
case '[':
124+
if (valid_symbols[BINARY_OPERATORS]) return false;
125+
break;
126+
127+
// Insert a semicolon before `--` and `++`, but not before binary `+` or `-`.
128+
case '+':
129+
skip(lexer);
130+
return lexer->lookahead == '+';
131+
case '-':
132+
skip(lexer);
133+
return lexer->lookahead == '-';
134+
135+
// Don't insert a semicolon before `!=`, but do insert one before a unary `!`.
136+
case '!':
137+
skip(lexer);
138+
return lexer->lookahead != '=';
139+
140+
// Don't insert a semicolon before `in` or `instanceof`, but do insert one
141+
// before an identifier.
142+
case 'i':
143+
skip(lexer);
144+
145+
if (lexer->lookahead != 'n') return true;
146+
skip(lexer);
147+
148+
if (!iswalpha(lexer->lookahead)) return false;
149+
150+
for (unsigned i = 0; i < 8; i++) {
151+
if (lexer->lookahead != "stanceof"[i]) return true;
152+
skip(lexer);
153+
}
102154

103-
switch (lexer->lookahead) {
104-
case ',':
105-
case '.':
106-
case ';':
107-
case '*':
108-
case '%':
109-
case '>':
110-
case '<':
111-
case '=':
112-
case '?':
113-
case '^':
114-
case '|':
115-
case '&':
116-
case '/':
117-
case ':':
118-
return false;
155+
if (!iswalpha(lexer->lookahead)) return false;
156+
break;
157+
}
119158

120-
case '{':
121-
if (valid_symbols[FUNCTION_SIGNATURE_AUTOMATIC_SEMICOLON]) return false;
122-
break;
123-
124-
// Don't insert a semicolon before a '[' or '(', unless we're parsing
125-
// a type. Detect whether we're parsing a type or an expression using
126-
// the validity of a binary operator token.
127-
case '(':
128-
case '[':
129-
if (valid_symbols[BINARY_OPERATORS]) return false;
130-
break;
131-
132-
// Insert a semicolon before `--` and `++`, but not before binary `+` or `-`.
133-
case '+':
134-
advance(lexer);
135-
return lexer->lookahead == '+';
136-
case '-':
137-
advance(lexer);
138-
return lexer->lookahead == '-';
139-
140-
// Don't insert a semicolon before `!=`, but do insert one before a unary `!`.
141-
case '!':
142-
advance(lexer);
143-
return lexer->lookahead != '=';
144-
145-
// Don't insert a semicolon before `in` or `instanceof`, but do insert one
146-
// before an identifier.
147-
case 'i':
148-
advance(lexer);
149-
150-
if (lexer->lookahead != 'n') return true;
151-
advance(lexer);
152-
153-
if (!iswalpha(lexer->lookahead)) return false;
154-
155-
for (unsigned i = 0; i < 8; i++) {
156-
if (lexer->lookahead != "stanceof"[i]) return true;
157-
advance(lexer);
158-
}
159+
return true;
160+
}
159161

160-
if (!iswalpha(lexer->lookahead)) return false;
161-
break;
162-
}
162+
static bool scan_ternary_qmark(TSLexer *lexer) {
163+
for(;;) {
164+
if (!iswspace(lexer->lookahead)) break;
165+
skip(lexer);
166+
}
167+
168+
if (lexer->lookahead == '?') {
169+
advance(lexer);
170+
171+
if (lexer->lookahead == '?') return false;
172+
173+
lexer->mark_end(lexer);
174+
lexer->result_symbol = TERNARY_QMARK;
163175

176+
if (lexer->lookahead == '.') {
177+
advance(lexer);
178+
if (iswdigit(lexer->lookahead)) return true;
179+
return false;
180+
}
164181
return true;
165-
} else {
166-
return false;
167182
}
183+
return false;
184+
}
185+
186+
static inline bool external_scanner_scan(void *payload, TSLexer *lexer, const bool *valid_symbols) {
187+
if (valid_symbols[TEMPLATE_CHARS]) {
188+
if (valid_symbols[AUTOMATIC_SEMICOLON]) return false;
189+
return scan_template_chars(lexer);
190+
} else if (
191+
valid_symbols[AUTOMATIC_SEMICOLON] ||
192+
valid_symbols[FUNCTION_SIGNATURE_AUTOMATIC_SEMICOLON]
193+
) {
194+
bool ret = scan_automatic_semicolon(lexer, valid_symbols);
195+
if (!ret && valid_symbols[TERNARY_QMARK] && lexer->lookahead == '?')
196+
return scan_ternary_qmark(lexer);
197+
return ret;
198+
}
199+
if (valid_symbols[TERNARY_QMARK]) {
200+
return scan_ternary_qmark(lexer);
201+
}
202+
203+
return false;
204+
168205
}

package.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@
1919
"main": "./bindings/node",
2020
"devDependencies": {
2121
"tree-sitter-cli": "^0.20.0",
22-
"tree-sitter-javascript": "github:tree-sitter/tree-sitter-javascript#2cc5803"
22+
"tree-sitter-javascript": "github:tree-sitter/tree-sitter-javascript#e3a5149"
2323
},
2424
"scripts": {
2525
"build": "npm run build-typescript && npm run build-tsx",

0 commit comments

Comments
 (0)