From 9de233eca8a5af1c08f7a9030cbd1efccacf09e6 Mon Sep 17 00:00:00 2001 From: Vladimir Makaev Date: Thu, 19 Feb 2026 19:24:46 +0000 Subject: [PATCH] fix: accept unterminated block comments at EOF in scanner The scanner previously rejected unterminated /* comments at EOF, causing tree-sitter to parse the comment delimiters as operators (multiplicative_expression, spread_expression). This matches JetBrains PSI behavior which recognizes unclosed /* as a BLOCK_COMMENT token. Fixes 4 cross-validation fixtures (BlockCommentAtBeginningOfFile 1-4), improving match rate from 97/124 (78.2%) to 101/126 (80.2%). --- src/scanner.c | 10 ++++++++++ .../jetbrains/BlockCommentAtBeginningOfFile1.txt | 12 ++++++++++++ .../jetbrains/BlockCommentAtBeginningOfFile2.txt | 13 +++++++++++++ .../jetbrains/BlockCommentAtBeginningOfFile3.txt | 14 ++++++++++++++ .../jetbrains/BlockCommentAtBeginningOfFile4.txt | 16 ++++++++++++++++ tools/cross-validation/excluded.txt | 8 +------- 6 files changed, 66 insertions(+), 7 deletions(-) create mode 100644 test/corpus/jetbrains/BlockCommentAtBeginningOfFile1.txt create mode 100644 test/corpus/jetbrains/BlockCommentAtBeginningOfFile2.txt create mode 100644 test/corpus/jetbrains/BlockCommentAtBeginningOfFile3.txt create mode 100644 test/corpus/jetbrains/BlockCommentAtBeginningOfFile4.txt diff --git a/src/scanner.c b/src/scanner.c index caed63a2..03243643 100644 --- a/src/scanner.c +++ b/src/scanner.c @@ -224,6 +224,16 @@ static bool scan_multiline_comment(TSLexer *lexer) { } break; case '\0': + // Accept unterminated block comments at EOF rather than rejecting them. + // This matches JetBrains PSI behavior which recognizes unclosed /* as a + // BLOCK_COMMENT token (plus an error element). Without this, the scanner + // returns false and tree-sitter tries to parse the comment delimiters + // as operators/expressions. + if (lexer->eof(lexer)) { + lexer->result_symbol = MULTILINE_COMMENT; + lexer->mark_end(lexer); + return true; + } return false; default: advance(lexer); diff --git a/test/corpus/jetbrains/BlockCommentAtBeginningOfFile1.txt b/test/corpus/jetbrains/BlockCommentAtBeginningOfFile1.txt new file mode 100644 index 00000000..01b300f9 --- /dev/null +++ b/test/corpus/jetbrains/BlockCommentAtBeginningOfFile1.txt @@ -0,0 +1,12 @@ +================== +BlockCommentAtBeginningOfFile1 +================== + +// COMPILATION_ERRORS + +/* +--- + +(source_file + (line_comment) + (multiline_comment)) diff --git a/test/corpus/jetbrains/BlockCommentAtBeginningOfFile2.txt b/test/corpus/jetbrains/BlockCommentAtBeginningOfFile2.txt new file mode 100644 index 00000000..3e8c6a15 --- /dev/null +++ b/test/corpus/jetbrains/BlockCommentAtBeginningOfFile2.txt @@ -0,0 +1,13 @@ +================== +BlockCommentAtBeginningOfFile2 +================== + +// COMPILATION_ERRORS + +/* +/* +--- + +(source_file + (line_comment) + (multiline_comment)) diff --git a/test/corpus/jetbrains/BlockCommentAtBeginningOfFile3.txt b/test/corpus/jetbrains/BlockCommentAtBeginningOfFile3.txt new file mode 100644 index 00000000..5f58f03b --- /dev/null +++ b/test/corpus/jetbrains/BlockCommentAtBeginningOfFile3.txt @@ -0,0 +1,14 @@ +================== +BlockCommentAtBeginningOfFile3 +================== + +// COMPILATION_ERRORS + +/* + +fooo +--- + +(source_file + (line_comment) + (multiline_comment)) diff --git a/test/corpus/jetbrains/BlockCommentAtBeginningOfFile4.txt b/test/corpus/jetbrains/BlockCommentAtBeginningOfFile4.txt new file mode 100644 index 00000000..5bea7b59 --- /dev/null +++ b/test/corpus/jetbrains/BlockCommentAtBeginningOfFile4.txt @@ -0,0 +1,16 @@ +================== +BlockCommentAtBeginningOfFile4 +================== + +// COMPILATION_ERRORS + +/* + +/*foo*/ + +asdfas +--- + +(source_file + (line_comment) + (multiline_comment)) diff --git a/tools/cross-validation/excluded.txt b/tools/cross-validation/excluded.txt index aa7d8b0a..6980b3af 100644 --- a/tools/cross-validation/excluded.txt +++ b/tools/cross-validation/excluded.txt @@ -20,13 +20,9 @@ # 3. Run: npm test (to verify the new corpus test passes) # ============================================================================= -# MISMATCH: Grammar produces wrong AST structure (26 files) +# MISMATCH: Grammar produces wrong AST structure (24 files) # ============================================================================= -# --- unterminated_block_comment (tree-sitter limitation) --- -BlockCommentAtBeginningOfFile3 -BlockCommentAtBeginningOfFile4 - # --- duplicate_accessor (error recovery difference) --- DuplicateAccessor @@ -88,8 +84,6 @@ AbsentInnerType AnnotatedIntersections AssertNotNull BackslashInString -BlockCommentAtBeginningOfFile1 -BlockCommentAtBeginningOfFile2 CallsInWhen CollectionLiterals CommentsBinding