@@ -1687,4 +1687,202 @@ mod tests {
16871687 assert_eq ! ( * token. r#type( ) , TokenType :: RightBrace ) ;
16881688 }
16891689 }
1690+
1691+ #[ test]
1692+ fn comment_coalescing_adjacent_regular_comments ( ) {
1693+ // Test that adjacent regular comments are coalesced
1694+ let input = "// First comment\n // Second comment\n identifier" ;
1695+ let mut lexer = Lexer :: default_for_input ( input) ;
1696+
1697+ let first_token = lexer. next_token ( ) . unwrap ( ) . unwrap ( ) ;
1698+ if let TokenType :: Comment ( content) = first_token. r#type ( ) {
1699+ assert ! ( content. contains( " First comment" ) ) ;
1700+ assert ! ( content. contains( " Second comment" ) ) ;
1701+ // Should be joined with newline
1702+ assert ! ( content. contains( '\n' ) ) ;
1703+ } else {
1704+ panic ! (
1705+ "Expected coalesced comment token, got: {:?}" ,
1706+ first_token. r#type( )
1707+ ) ;
1708+ }
1709+
1710+ let second_token = lexer. next_token ( ) . unwrap ( ) . unwrap ( ) ;
1711+ assert_eq ! (
1712+ * second_token. r#type( ) ,
1713+ TokenType :: Identifier ( "identifier" . to_string( ) )
1714+ ) ;
1715+ }
1716+
1717+ #[ test]
1718+ fn comment_coalescing_adjacent_doc_comments ( ) {
1719+ // Test that adjacent doc comments are coalesced
1720+ let input = "/// First doc\n /// Second doc\n fn test() {}" ;
1721+ let mut lexer = Lexer :: default_for_input ( input) ;
1722+
1723+ let first_token = lexer. next_token ( ) . unwrap ( ) . unwrap ( ) ;
1724+ if let TokenType :: DocComment ( content) = first_token. r#type ( ) {
1725+ assert ! ( content. contains( " First doc" ) ) ;
1726+ assert ! ( content. contains( " Second doc" ) ) ;
1727+ // Should be joined with newline
1728+ assert ! ( content. contains( '\n' ) ) ;
1729+ } else {
1730+ panic ! (
1731+ "Expected coalesced doc comment token, got: {:?}" ,
1732+ first_token. r#type( )
1733+ ) ;
1734+ }
1735+ }
1736+
1737+ #[ test]
1738+ fn comment_coalescing_mixed_comment_types_separate ( ) {
1739+ // Test that regular comments and doc comments don't coalesce together
1740+ let input = "// Regular comment\n /// Doc comment\n identifier" ;
1741+ let mut lexer = Lexer :: default_for_input ( input) ;
1742+
1743+ let first_token = lexer. next_token ( ) . unwrap ( ) . unwrap ( ) ;
1744+ if let TokenType :: Comment ( content) = first_token. r#type ( ) {
1745+ assert ! ( content. contains( " Regular comment" ) ) ;
1746+ assert ! ( !content. contains( " Doc comment" ) ) ;
1747+ } else {
1748+ panic ! (
1749+ "Expected regular comment token, got: {:?}" ,
1750+ first_token. r#type( )
1751+ ) ;
1752+ }
1753+
1754+ let second_token = lexer. next_token ( ) . unwrap ( ) . unwrap ( ) ;
1755+ if let TokenType :: DocComment ( content) = second_token. r#type ( ) {
1756+ assert ! ( content. contains( " Doc comment" ) ) ;
1757+ assert ! ( !content. contains( " Regular comment" ) ) ;
1758+ } else {
1759+ panic ! (
1760+ "Expected doc comment token, got: {:?}" ,
1761+ second_token. r#type( )
1762+ ) ;
1763+ }
1764+
1765+ let third_token = lexer. next_token ( ) . unwrap ( ) . unwrap ( ) ;
1766+ assert_eq ! (
1767+ * third_token. r#type( ) ,
1768+ TokenType :: Identifier ( "identifier" . to_string( ) )
1769+ ) ;
1770+ }
1771+
1772+ #[ test]
1773+ fn comment_coalescing_with_intervening_code ( ) {
1774+ // Test that comments separated by code don't coalesce
1775+ let input = "// First comment\n let x = 1;\n // Second comment" ;
1776+ let mut lexer = Lexer :: default_for_input ( input) ;
1777+
1778+ let first_token = lexer. next_token ( ) . unwrap ( ) . unwrap ( ) ;
1779+ if let TokenType :: Comment ( content) = first_token. r#type ( ) {
1780+ assert ! ( content. contains( " First comment" ) ) ;
1781+ assert ! ( !content. contains( " Second comment" ) ) ;
1782+ } else {
1783+ panic ! (
1784+ "Expected first comment token, got: {:?}" ,
1785+ first_token. r#type( )
1786+ ) ;
1787+ }
1788+
1789+ // Should get the 'let' identifier next
1790+ let second_token = lexer. next_token ( ) . unwrap ( ) . unwrap ( ) ;
1791+ assert_eq ! (
1792+ * second_token. r#type( ) ,
1793+ TokenType :: Identifier ( "let" . to_string( ) )
1794+ ) ;
1795+ }
1796+
1797+ #[ test]
1798+ fn comment_coalescing_multiple_adjacent ( ) {
1799+ // Test coalescing of more than two comments
1800+ let input = "// First\n // Second\n // Third\n code" ;
1801+ let mut lexer = Lexer :: default_for_input ( input) ;
1802+
1803+ let comment_token = lexer. next_token ( ) . unwrap ( ) . unwrap ( ) ;
1804+ if let TokenType :: Comment ( content) = comment_token. r#type ( ) {
1805+ assert ! ( content. contains( " First" ) ) ;
1806+ assert ! ( content. contains( " Second" ) ) ;
1807+ assert ! ( content. contains( " Third" ) ) ;
1808+ // Should have two newlines joining three comments
1809+ let newline_count = content. matches ( '\n' ) . count ( ) ;
1810+ assert_eq ! ( newline_count, 2 ) ;
1811+ } else {
1812+ panic ! (
1813+ "Expected coalesced comment token, got: {:?}" ,
1814+ comment_token. r#type( )
1815+ ) ;
1816+ }
1817+ }
1818+
1819+ #[ test]
1820+ fn comment_coalescing_spans_updated ( ) {
1821+ // Test that coalesced comment spans cover the entire range
1822+ let input = "// Start comment\n // End comment\n " ;
1823+ let mut lexer = Lexer :: default_for_input ( input) ;
1824+
1825+ let comment_token = lexer. next_token ( ) . unwrap ( ) . unwrap ( ) ;
1826+ let span = comment_token. span ( ) ;
1827+
1828+ // Should start at line 1, column 1 and end after the second comment
1829+ assert_eq ! ( span. start. line, 1 ) ;
1830+ assert_eq ! ( span. start. column, 1 ) ;
1831+ assert_eq ! ( span. end. line, 2 ) ;
1832+ // End column should be after "// End comment"
1833+ assert ! ( span. end. column > 10 ) ;
1834+ }
1835+
1836+ #[ test]
1837+ fn comment_coalescing_empty_comments ( ) {
1838+ // Test edge case with empty comments
1839+ let input = "//\n // Non-empty\n code" ;
1840+ let mut lexer = Lexer :: default_for_input ( input) ;
1841+
1842+ let comment_token = lexer. next_token ( ) . unwrap ( ) . unwrap ( ) ;
1843+ if let TokenType :: Comment ( content) = comment_token. r#type ( ) {
1844+ // Should coalesce empty comment with non-empty one
1845+ assert ! ( content. contains( "\n Non-empty" ) ) ;
1846+ } else {
1847+ panic ! ( "Expected comment token, got: {:?}" , comment_token. r#type( ) ) ;
1848+ }
1849+ }
1850+
1851+ #[ test]
1852+ fn comment_coalescing_flush_on_eof ( ) {
1853+ // Test that buffered comments are flushed at end of input
1854+ let input = "// Only comment" ;
1855+ let mut lexer = Lexer :: default_for_input ( input) ;
1856+
1857+ let comment_token = lexer. next_token ( ) . unwrap ( ) . unwrap ( ) ;
1858+ assert_eq ! (
1859+ * comment_token. r#type( ) ,
1860+ TokenType :: Comment ( " Only comment" . to_string( ) )
1861+ ) ;
1862+
1863+ let eof_token = lexer. next_token ( ) . unwrap ( ) . unwrap ( ) ;
1864+ assert_eq ! ( * eof_token. r#type( ) , TokenType :: EOF ) ;
1865+ }
1866+
1867+ #[ test]
1868+ fn comment_coalescing_doc_comments_multiple ( ) {
1869+ // Test multiple doc comment coalescing
1870+ let input = "/// Doc 1\n /// Doc 2\n /// Doc 3\n struct Test;" ;
1871+ let mut lexer = Lexer :: default_for_input ( input) ;
1872+
1873+ let doc_comment_token = lexer. next_token ( ) . unwrap ( ) . unwrap ( ) ;
1874+ if let TokenType :: DocComment ( content) = doc_comment_token. r#type ( ) {
1875+ assert ! ( content. contains( " Doc 1" ) ) ;
1876+ assert ! ( content. contains( " Doc 2" ) ) ;
1877+ assert ! ( content. contains( " Doc 3" ) ) ;
1878+ // Should have two newlines for three doc comments
1879+ let newline_count = content. matches ( '\n' ) . count ( ) ;
1880+ assert_eq ! ( newline_count, 2 ) ;
1881+ } else {
1882+ panic ! (
1883+ "Expected coalesced doc comment token, got: {:?}" ,
1884+ doc_comment_token. r#type( )
1885+ ) ;
1886+ }
1887+ }
16901888}
0 commit comments