Add tests for comment coalescing and span checks

mkpro118 · mkpro118 · commit 43f3f5fda7d3 · 2025-09-07T20:53:21.000-07:00
Expanded the test suite to cover comment coalescing and SymbolLocation span
checks.
diff --git a/src/core/scanner/lexer.rs b/src/core/scanner/lexer.rs
@@ -1687,4 +1687,202 @@ mod tests {
             assert_eq!(*token.r#type(), TokenType::RightBrace);
         }
     }
+
+    #[test]
+    fn comment_coalescing_adjacent_regular_comments() {
+        // Test that adjacent regular comments are coalesced
+        let input = "// First comment\n// Second comment\nidentifier";
+        let mut lexer = Lexer::default_for_input(input);
+
+        let first_token = lexer.next_token().unwrap().unwrap();
+        if let TokenType::Comment(content) = first_token.r#type() {
+            assert!(content.contains(" First comment"));
+            assert!(content.contains(" Second comment"));
+            // Should be joined with newline
+            assert!(content.contains('\n'));
+        } else {
+            panic!(
+                "Expected coalesced comment token, got: {:?}",
+                first_token.r#type()
+            );
+        }
+
+        let second_token = lexer.next_token().unwrap().unwrap();
+        assert_eq!(
+            *second_token.r#type(),
+            TokenType::Identifier("identifier".to_string())
+        );
+    }
+
+    #[test]
+    fn comment_coalescing_adjacent_doc_comments() {
+        // Test that adjacent doc comments are coalesced
+        let input = "/// First doc\n/// Second doc\nfn test() {}";
+        let mut lexer = Lexer::default_for_input(input);
+
+        let first_token = lexer.next_token().unwrap().unwrap();
+        if let TokenType::DocComment(content) = first_token.r#type() {
+            assert!(content.contains(" First doc"));
+            assert!(content.contains(" Second doc"));
+            // Should be joined with newline
+            assert!(content.contains('\n'));
+        } else {
+            panic!(
+                "Expected coalesced doc comment token, got: {:?}",
+                first_token.r#type()
+            );
+        }
+    }
+
+    #[test]
+    fn comment_coalescing_mixed_comment_types_separate() {
+        // Test that regular comments and doc comments don't coalesce together
+        let input = "// Regular comment\n/// Doc comment\nidentifier";
+        let mut lexer = Lexer::default_for_input(input);
+
+        let first_token = lexer.next_token().unwrap().unwrap();
+        if let TokenType::Comment(content) = first_token.r#type() {
+            assert!(content.contains(" Regular comment"));
+            assert!(!content.contains(" Doc comment"));
+        } else {
+            panic!(
+                "Expected regular comment token, got: {:?}",
+                first_token.r#type()
+            );
+        }
+
+        let second_token = lexer.next_token().unwrap().unwrap();
+        if let TokenType::DocComment(content) = second_token.r#type() {
+            assert!(content.contains(" Doc comment"));
+            assert!(!content.contains(" Regular comment"));
+        } else {
+            panic!(
+                "Expected doc comment token, got: {:?}",
+                second_token.r#type()
+            );
+        }
+
+        let third_token = lexer.next_token().unwrap().unwrap();
+        assert_eq!(
+            *third_token.r#type(),
+            TokenType::Identifier("identifier".to_string())
+        );
+    }
+
+    #[test]
+    fn comment_coalescing_with_intervening_code() {
+        // Test that comments separated by code don't coalesce
+        let input = "// First comment\nlet x = 1;\n// Second comment";
+        let mut lexer = Lexer::default_for_input(input);
+
+        let first_token = lexer.next_token().unwrap().unwrap();
+        if let TokenType::Comment(content) = first_token.r#type() {
+            assert!(content.contains(" First comment"));
+            assert!(!content.contains(" Second comment"));
+        } else {
+            panic!(
+                "Expected first comment token, got: {:?}",
+                first_token.r#type()
+            );
+        }
+
+        // Should get the 'let' identifier next
+        let second_token = lexer.next_token().unwrap().unwrap();
+        assert_eq!(
+            *second_token.r#type(),
+            TokenType::Identifier("let".to_string())
+        );
+    }
+
+    #[test]
+    fn comment_coalescing_multiple_adjacent() {
+        // Test coalescing of more than two comments
+        let input = "// First\n// Second\n// Third\ncode";
+        let mut lexer = Lexer::default_for_input(input);
+
+        let comment_token = lexer.next_token().unwrap().unwrap();
+        if let TokenType::Comment(content) = comment_token.r#type() {
+            assert!(content.contains(" First"));
+            assert!(content.contains(" Second"));
+            assert!(content.contains(" Third"));
+            // Should have two newlines joining three comments
+            let newline_count = content.matches('\n').count();
+            assert_eq!(newline_count, 2);
+        } else {
+            panic!(
+                "Expected coalesced comment token, got: {:?}",
+                comment_token.r#type()
+            );
+        }
+    }
+
+    #[test]
+    fn comment_coalescing_spans_updated() {
+        // Test that coalesced comment spans cover the entire range
+        let input = "// Start comment\n// End comment\n";
+        let mut lexer = Lexer::default_for_input(input);
+
+        let comment_token = lexer.next_token().unwrap().unwrap();
+        let span = comment_token.span();
+
+        // Should start at line 1, column 1 and end after the second comment
+        assert_eq!(span.start.line, 1);
+        assert_eq!(span.start.column, 1);
+        assert_eq!(span.end.line, 2);
+        // End column should be after "// End comment"
+        assert!(span.end.column > 10);
+    }
+
+    #[test]
+    fn comment_coalescing_empty_comments() {
+        // Test edge case with empty comments
+        let input = "//\n// Non-empty\ncode";
+        let mut lexer = Lexer::default_for_input(input);
+
+        let comment_token = lexer.next_token().unwrap().unwrap();
+        if let TokenType::Comment(content) = comment_token.r#type() {
+            // Should coalesce empty comment with non-empty one
+            assert!(content.contains("\n Non-empty"));
+        } else {
+            panic!("Expected comment token, got: {:?}", comment_token.r#type());
+        }
+    }
+
+    #[test]
+    fn comment_coalescing_flush_on_eof() {
+        // Test that buffered comments are flushed at end of input
+        let input = "// Only comment";
+        let mut lexer = Lexer::default_for_input(input);
+
+        let comment_token = lexer.next_token().unwrap().unwrap();
+        assert_eq!(
+            *comment_token.r#type(),
+            TokenType::Comment(" Only comment".to_string())
+        );
+
+        let eof_token = lexer.next_token().unwrap().unwrap();
+        assert_eq!(*eof_token.r#type(), TokenType::EOF);
+    }
+
+    #[test]
+    fn comment_coalescing_doc_comments_multiple() {
+        // Test multiple doc comment coalescing
+        let input = "/// Doc 1\n/// Doc 2\n/// Doc 3\nstruct Test;";
+        let mut lexer = Lexer::default_for_input(input);
+
+        let doc_comment_token = lexer.next_token().unwrap().unwrap();
+        if let TokenType::DocComment(content) = doc_comment_token.r#type() {
+            assert!(content.contains(" Doc 1"));
+            assert!(content.contains(" Doc 2"));
+            assert!(content.contains(" Doc 3"));
+            // Should have two newlines for three doc comments
+            let newline_count = content.matches('\n').count();
+            assert_eq!(newline_count, 2);
+        } else {
+            panic!(
+                "Expected coalesced doc comment token, got: {:?}",
+                doc_comment_token.r#type()
+            );
+        }
+    }
 }
diff --git a/src/core/scanner/tokens.rs b/src/core/scanner/tokens.rs
@@ -392,4 +392,109 @@ mod tests {
         // end before start should panic
         let _ = Token::new(TokenType::At, (2, 1), (1, 1));
     }
+
+    #[test]
+    fn symbol_location_from_reference_conversion() {
+        // Test the new From<&SymbolLocation> for (u32, u32) implementation
+        let location = SymbolLocation {
+            line: 5,
+            column: 10,
+        };
+        let tuple: (u32, u32) = (&location).into();
+        assert_eq!(tuple, (5, 10));
+
+        // Test with zero values
+        let location_zero = SymbolLocation { line: 0, column: 0 };
+        let tuple_zero: (u32, u32) = (&location_zero).into();
+        assert_eq!(tuple_zero, (0, 0));
+
+        // Test with maximum values
+        let location_max = SymbolLocation {
+            line: u32::MAX,
+            column: u32::MAX,
+        };
+        let tuple_max: (u32, u32) = (&location_max).into();
+        assert_eq!(tuple_max, (u32::MAX, u32::MAX));
+    }
+
+    #[test]
+    fn token_new_lexicographic_span_ordering() {
+        // Test the updated span monotonicity check with lexicographic ordering
+
+        // Same line: start.column <= end.column should work
+        let start = SymbolLocation { line: 1, column: 5 };
+        let end = SymbolLocation {
+            line: 1,
+            column: 10,
+        };
+        let token = Token::new(
+            TokenType::Identifier("test".to_string()),
+            (start.line, start.column),
+            (end.line, end.column),
+        );
+        assert_eq!(token.span().start, start);
+        assert_eq!(token.span().end, end);
+    }
+
+    #[test]
+    fn token_new_lexicographic_span_same_position() {
+        // Same line, same column should be valid
+        let location = SymbolLocation { line: 1, column: 5 };
+        let token = Token::new(
+            TokenType::Identifier("x".to_string()),
+            (location.line, location.column),
+            (location.line, location.column),
+        );
+        assert_eq!(token.span().start, location);
+        assert_eq!(token.span().end, location);
+    }
+
+    #[test]
+    fn token_new_lexicographic_span_different_lines() {
+        // Different lines: start.line < end.line should work regardless of columns
+        let start = SymbolLocation {
+            line: 1,
+            column: 20,
+        };
+        let end = SymbolLocation { line: 2, column: 5 };
+        let token = Token::new(
+            TokenType::Literal("multiline".to_string()),
+            (start.line, start.column),
+            (end.line, end.column),
+        );
+        assert_eq!(token.span().start, start);
+        assert_eq!(token.span().end, end);
+    }
+
+    #[test]
+    #[should_panic(expected = "span should be monotonically increasing")]
+    fn token_new_lexicographic_span_invalid_same_line() {
+        // Same line but end column before start column should panic
+        let start = SymbolLocation {
+            line: 1,
+            column: 10,
+        };
+        let end = SymbolLocation { line: 1, column: 5 };
+        let _ = Token::new(
+            TokenType::Identifier("invalid".to_string()),
+            (start.line, start.column),
+            (end.line, end.column),
+        );
+    }
+
+    #[test]
+    #[should_panic(expected = "span should be monotonically increasing")]
+    fn token_new_lexicographic_span_invalid_reverse_lines() {
+        // End line before start line should panic
+        let start = SymbolLocation { line: 2, column: 5 };
+        let end = SymbolLocation {
+            line: 1,
+            column: 10,
+        };
+        let _ = Token::new(
+            TokenType::Identifier("invalid".to_string()),
+            (start.line, start.column),
+            (end.line, end.column),
+        );
+    }
 }