Skip to content

Commit 410c940

Browse files
fix: treat +/- after newline as prefix operators (insert ASI) (#233)
* fix: accept unterminated block comments at EOF in scanner The scanner previously rejected unterminated /* comments at EOF, causing tree-sitter to parse the comment delimiters as operators (multiplicative_expression, spread_expression). This matches JetBrains PSI behavior which recognizes unclosed /* as a BLOCK_COMMENT token. Fixes 4 cross-validation fixtures (BlockCommentAtBeginningOfFile 1-4), improving match rate from 97/124 (78.2%) to 101/126 (80.2%). * fix: treat +/- after newline as prefix operators (insert ASI) In Kotlin, + and - at the start of a new line are always prefix operators, not binary continuation. Binary operations across lines require the operator at the end of the previous line (e.g. `a +\nb`). The scanner previously only inserted ASI before ++ / -- and +/-digit, but not before standalone + or - followed by an identifier. This caused `a\n+ b` to parse as additive_expression(a, b) instead of two separate statements: `a` and `+b` (prefix_expression). The grammar ensures AUTOMATIC_SEMICOLON is only valid where a statement boundary is possible, so this doesn't affect + or - inside parentheses or brackets where newlines don't terminate statements. Fixes EOLsInComments and NewlinesInParentheses cross-validation fixtures. Match rate: 101/126 (80.2%) → 103/126 (81.7%).
1 parent b136726 commit 410c940

File tree

4 files changed

+280
-12
lines changed

4 files changed

+280
-12
lines changed

src/scanner.c

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -383,17 +383,19 @@ static bool scan_automatic_semicolon(TSLexer *lexer, const bool *valid_symbols)
383383
if (lexer->lookahead == '/' || lexer->lookahead == '*') return true;
384384
return false;
385385

386-
// Insert a semicolon before `--` and `++`, but not before binary `+` or `-`.
387-
// Insert before +/-Float
386+
// In Kotlin, `+` and `-` after a newline are always prefix operators,
387+
// not binary continuation. If a binary operation is intended, the
388+
// operator must be placed at the end of the previous line:
389+
// a + // binary: a + b
390+
// b
391+
// a // prefix: a; +b
392+
// + b
393+
// The grammar ensures AUTOMATIC_SEMICOLON is only valid where a
394+
// statement could end, so this won't fire inside () or [] where
395+
// newlines don't terminate statements.
388396
case '+':
389-
skip(lexer);
390-
if (lexer->lookahead == '+') return true;
391-
return iswdigit(lexer->lookahead);
392-
393397
case '-':
394-
skip(lexer);
395-
if (lexer->lookahead == '-') return true;
396-
return iswdigit(lexer->lookahead);
398+
return true;
397399

398400
// Don't insert a semicolon before `!=`, but do insert one before a unary `!`.
399401
case '!':
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
==================
2+
EOLsInComments
3+
==================
4+
5+
// COMPILATION_ERRORS
6+
7+
fun foo() {
8+
a
9+
+ b
10+
a
11+
/** */+ b
12+
a
13+
/* */+ b
14+
a /*
15+
*/ + b
16+
a
17+
/*
18+
*/ + b
19+
a /**
20+
*/ + b
21+
a //
22+
+ b
23+
a //
24+
+ b
25+
}
26+
---
27+
28+
(source_file
29+
(line_comment)
30+
(function_declaration
31+
(simple_identifier)
32+
(function_value_parameters)
33+
(function_body
34+
(statements
35+
(simple_identifier)
36+
(prefix_expression
37+
(simple_identifier))
38+
(simple_identifier)
39+
(multiline_comment)
40+
(prefix_expression
41+
(simple_identifier))
42+
(simple_identifier)
43+
(multiline_comment)
44+
(prefix_expression
45+
(simple_identifier))
46+
(additive_expression
47+
(simple_identifier)
48+
(multiline_comment)
49+
(simple_identifier))
50+
(simple_identifier)
51+
(multiline_comment)
52+
(prefix_expression
53+
(simple_identifier))
54+
(additive_expression
55+
(simple_identifier)
56+
(multiline_comment)
57+
(simple_identifier))
58+
(simple_identifier)
59+
(line_comment)
60+
(prefix_expression
61+
(simple_identifier))
62+
(simple_identifier)
63+
(line_comment)
64+
(prefix_expression
65+
(simple_identifier))))))
Lines changed: 203 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,203 @@
1+
==================
2+
NewlinesInParentheses
3+
==================
4+
5+
// COMPILATION_ERRORS
6+
7+
fun foo() {
8+
val a = a + b
9+
val a = a +
10+
b
11+
val a = a
12+
+ b
13+
val a = (a
14+
+ b)
15+
val a = ({a
16+
+ b})
17+
val a = ({a
18+
+ b}
19+
+ b)
20+
21+
val a = b[c
22+
+ d]
23+
val a = b[{c
24+
+ d}]
25+
val a = b[{c
26+
+ d}
27+
+ d]
28+
29+
when (e) {
30+
is T
31+
<X>
32+
-> a
33+
in f
34+
() -> a
35+
!is T
36+
<X> -> a
37+
!in f
38+
() -> a
39+
f
40+
() -> a
41+
}
42+
val f = a is T
43+
<X>
44+
}
45+
---
46+
47+
(source_file
48+
(line_comment)
49+
(function_declaration
50+
(simple_identifier)
51+
(function_value_parameters)
52+
(function_body
53+
(statements
54+
(property_declaration
55+
(binding_pattern_kind)
56+
(variable_declaration
57+
(simple_identifier))
58+
(additive_expression
59+
(simple_identifier)
60+
(simple_identifier)))
61+
(property_declaration
62+
(binding_pattern_kind)
63+
(variable_declaration
64+
(simple_identifier))
65+
(additive_expression
66+
(simple_identifier)
67+
(simple_identifier)))
68+
(property_declaration
69+
(binding_pattern_kind)
70+
(variable_declaration
71+
(simple_identifier))
72+
(simple_identifier))
73+
(prefix_expression
74+
(simple_identifier))
75+
(property_declaration
76+
(binding_pattern_kind)
77+
(variable_declaration
78+
(simple_identifier))
79+
(parenthesized_expression
80+
(additive_expression
81+
(simple_identifier)
82+
(simple_identifier))))
83+
(property_declaration
84+
(binding_pattern_kind)
85+
(variable_declaration
86+
(simple_identifier))
87+
(parenthesized_expression
88+
(lambda_literal
89+
(statements
90+
(simple_identifier)
91+
(prefix_expression
92+
(simple_identifier))))))
93+
(property_declaration
94+
(binding_pattern_kind)
95+
(variable_declaration
96+
(simple_identifier))
97+
(parenthesized_expression
98+
(additive_expression
99+
(lambda_literal
100+
(statements
101+
(simple_identifier)
102+
(prefix_expression
103+
(simple_identifier))))
104+
(simple_identifier))))
105+
(property_declaration
106+
(binding_pattern_kind)
107+
(variable_declaration
108+
(simple_identifier))
109+
(indexing_expression
110+
(simple_identifier)
111+
(indexing_suffix
112+
(additive_expression
113+
(simple_identifier)
114+
(simple_identifier)))))
115+
(property_declaration
116+
(binding_pattern_kind)
117+
(variable_declaration
118+
(simple_identifier))
119+
(indexing_expression
120+
(simple_identifier)
121+
(indexing_suffix
122+
(lambda_literal
123+
(statements
124+
(simple_identifier)
125+
(prefix_expression
126+
(simple_identifier)))))))
127+
(property_declaration
128+
(binding_pattern_kind)
129+
(variable_declaration
130+
(simple_identifier))
131+
(indexing_expression
132+
(simple_identifier)
133+
(indexing_suffix
134+
(additive_expression
135+
(lambda_literal
136+
(statements
137+
(simple_identifier)
138+
(prefix_expression
139+
(simple_identifier))))
140+
(simple_identifier)))))
141+
(when_expression
142+
(when_subject
143+
(simple_identifier))
144+
(when_entry
145+
(when_condition
146+
(type_test
147+
(user_type
148+
(type_identifier)
149+
(type_arguments
150+
(type_projection
151+
(user_type
152+
(type_identifier)))))))
153+
(control_structure_body
154+
(simple_identifier)))
155+
(when_entry
156+
(when_condition
157+
(range_test
158+
(call_expression
159+
(simple_identifier)
160+
(call_suffix
161+
(value_arguments)))))
162+
(control_structure_body
163+
(simple_identifier)))
164+
(when_entry
165+
(when_condition
166+
(type_test
167+
(user_type
168+
(type_identifier)
169+
(type_arguments
170+
(type_projection
171+
(user_type
172+
(type_identifier)))))))
173+
(control_structure_body
174+
(simple_identifier)))
175+
(when_entry
176+
(when_condition
177+
(range_test
178+
(call_expression
179+
(simple_identifier)
180+
(call_suffix
181+
(value_arguments)))))
182+
(control_structure_body
183+
(simple_identifier)))
184+
(when_entry
185+
(when_condition
186+
(call_expression
187+
(simple_identifier)
188+
(call_suffix
189+
(value_arguments))))
190+
(control_structure_body
191+
(simple_identifier))))
192+
(property_declaration
193+
(binding_pattern_kind)
194+
(variable_declaration
195+
(simple_identifier))
196+
(check_expression
197+
(simple_identifier)
198+
(user_type
199+
(type_identifier)
200+
(type_arguments
201+
(type_projection
202+
(user_type
203+
(type_identifier)))))))))))

tools/cross-validation/excluded.txt

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@
2020
# 3. Run: npm test (to verify the new corpus test passes)
2121

2222
# =============================================================================
23-
# MISMATCH: Grammar produces wrong AST structure (24 files)
23+
# MISMATCH: Grammar produces wrong AST structure (22 files)
2424
# =============================================================================
2525

2626
# --- duplicate_accessor (error recovery difference) ---
@@ -30,8 +30,6 @@ DuplicateAccessor
3030
TypealiasIsKeyword
3131

3232
# --- prefix_vs_binary (newline sensitivity — tree-sitter limitation) ---
33-
EOLsInComments
34-
NewlinesInParentheses
3533
NewLinesValidOperations
3634

3735
# --- generic_vs_comparison (<> ambiguity — tree-sitter limitation) ---

0 commit comments

Comments
 (0)