@@ -433,17 +433,35 @@ impl Lexer {
433433 } ;
434434 } ;
435435
436+ macro_rules! coalesce_comments {
437+ ( $comment_type: ident, $comment_variants: ident) => {
438+ if let Some ( ref mut last) = self . $comment_type {
439+ if let (
440+ TokenType :: $comment_variants( prev) ,
441+ TokenType :: $comment_variants( curr) ,
442+ ) = ( last. r#type( ) , token. r#type( ) )
443+ {
444+ // Combine the comment text
445+ let combined = format!( "{}\n {}" , prev, curr) ;
446+ * last = Token :: new(
447+ TokenType :: $comment_variants( combined) ,
448+ ( & last. span( ) . start) . into( ) ,
449+ ( & token. span( ) . end) . into( ) ,
450+ ) ;
451+ }
452+ } else {
453+ self . $comment_type = Some ( token) ;
454+ }
455+ } ;
456+ }
457+
436458 // Handle comment coalescing
437459 match token. r#type ( ) {
438460 TokenType :: Comment ( _) => {
439- // Replace any existing buffered comment (coalescing consecutive comments)
440- self . last_comment = Some ( token) ;
441- // Continue to next token
461+ coalesce_comments ! ( last_comment, Comment ) ;
442462 }
443463 TokenType :: DocComment ( _) => {
444- // Replace any existing buffered doc comment (coalescing consecutive doc comments)
445- self . last_doc_comment = Some ( token) ;
446- // Continue to next token
464+ coalesce_comments ! ( last_doc_comment, DocComment ) ;
447465 }
448466 _ => {
449467 // Non-comment token found - flush buffered comments and queue this token
@@ -1669,4 +1687,202 @@ mod tests {
16691687 assert_eq ! ( * token. r#type( ) , TokenType :: RightBrace ) ;
16701688 }
16711689 }
1690+
1691+ #[ test]
1692+ fn comment_coalescing_adjacent_regular_comments ( ) {
1693+ // Test that adjacent regular comments are coalesced
1694+ let input = "// First comment\n // Second comment\n identifier" ;
1695+ let mut lexer = Lexer :: default_for_input ( input) ;
1696+
1697+ let first_token = lexer. next_token ( ) . unwrap ( ) . unwrap ( ) ;
1698+ if let TokenType :: Comment ( content) = first_token. r#type ( ) {
1699+ assert ! ( content. contains( " First comment" ) ) ;
1700+ assert ! ( content. contains( " Second comment" ) ) ;
1701+ // Should be joined with newline
1702+ assert ! ( content. contains( '\n' ) ) ;
1703+ } else {
1704+ panic ! (
1705+ "Expected coalesced comment token, got: {:?}" ,
1706+ first_token. r#type( )
1707+ ) ;
1708+ }
1709+
1710+ let second_token = lexer. next_token ( ) . unwrap ( ) . unwrap ( ) ;
1711+ assert_eq ! (
1712+ * second_token. r#type( ) ,
1713+ TokenType :: Identifier ( "identifier" . to_string( ) )
1714+ ) ;
1715+ }
1716+
1717+ #[ test]
1718+ fn comment_coalescing_adjacent_doc_comments ( ) {
1719+ // Test that adjacent doc comments are coalesced
1720+ let input = "/// First doc\n /// Second doc\n fn test() {}" ;
1721+ let mut lexer = Lexer :: default_for_input ( input) ;
1722+
1723+ let first_token = lexer. next_token ( ) . unwrap ( ) . unwrap ( ) ;
1724+ if let TokenType :: DocComment ( content) = first_token. r#type ( ) {
1725+ assert ! ( content. contains( " First doc" ) ) ;
1726+ assert ! ( content. contains( " Second doc" ) ) ;
1727+ // Should be joined with newline
1728+ assert ! ( content. contains( '\n' ) ) ;
1729+ } else {
1730+ panic ! (
1731+ "Expected coalesced doc comment token, got: {:?}" ,
1732+ first_token. r#type( )
1733+ ) ;
1734+ }
1735+ }
1736+
1737+ #[ test]
1738+ fn comment_coalescing_mixed_comment_types_separate ( ) {
1739+ // Test that regular comments and doc comments don't coalesce together
1740+ let input = "// Regular comment\n /// Doc comment\n identifier" ;
1741+ let mut lexer = Lexer :: default_for_input ( input) ;
1742+
1743+ let first_token = lexer. next_token ( ) . unwrap ( ) . unwrap ( ) ;
1744+ if let TokenType :: Comment ( content) = first_token. r#type ( ) {
1745+ assert ! ( content. contains( " Regular comment" ) ) ;
1746+ assert ! ( !content. contains( " Doc comment" ) ) ;
1747+ } else {
1748+ panic ! (
1749+ "Expected regular comment token, got: {:?}" ,
1750+ first_token. r#type( )
1751+ ) ;
1752+ }
1753+
1754+ let second_token = lexer. next_token ( ) . unwrap ( ) . unwrap ( ) ;
1755+ if let TokenType :: DocComment ( content) = second_token. r#type ( ) {
1756+ assert ! ( content. contains( " Doc comment" ) ) ;
1757+ assert ! ( !content. contains( " Regular comment" ) ) ;
1758+ } else {
1759+ panic ! (
1760+ "Expected doc comment token, got: {:?}" ,
1761+ second_token. r#type( )
1762+ ) ;
1763+ }
1764+
1765+ let third_token = lexer. next_token ( ) . unwrap ( ) . unwrap ( ) ;
1766+ assert_eq ! (
1767+ * third_token. r#type( ) ,
1768+ TokenType :: Identifier ( "identifier" . to_string( ) )
1769+ ) ;
1770+ }
1771+
1772+ #[ test]
1773+ fn comment_coalescing_with_intervening_code ( ) {
1774+ // Test that comments separated by code don't coalesce
1775+ let input = "// First comment\n let x = 1;\n // Second comment" ;
1776+ let mut lexer = Lexer :: default_for_input ( input) ;
1777+
1778+ let first_token = lexer. next_token ( ) . unwrap ( ) . unwrap ( ) ;
1779+ if let TokenType :: Comment ( content) = first_token. r#type ( ) {
1780+ assert ! ( content. contains( " First comment" ) ) ;
1781+ assert ! ( !content. contains( " Second comment" ) ) ;
1782+ } else {
1783+ panic ! (
1784+ "Expected first comment token, got: {:?}" ,
1785+ first_token. r#type( )
1786+ ) ;
1787+ }
1788+
1789+ // Should get the 'let' identifier next
1790+ let second_token = lexer. next_token ( ) . unwrap ( ) . unwrap ( ) ;
1791+ assert_eq ! (
1792+ * second_token. r#type( ) ,
1793+ TokenType :: Identifier ( "let" . to_string( ) )
1794+ ) ;
1795+ }
1796+
1797+ #[ test]
1798+ fn comment_coalescing_multiple_adjacent ( ) {
1799+ // Test coalescing of more than two comments
1800+ let input = "// First\n // Second\n // Third\n code" ;
1801+ let mut lexer = Lexer :: default_for_input ( input) ;
1802+
1803+ let comment_token = lexer. next_token ( ) . unwrap ( ) . unwrap ( ) ;
1804+ if let TokenType :: Comment ( content) = comment_token. r#type ( ) {
1805+ assert ! ( content. contains( " First" ) ) ;
1806+ assert ! ( content. contains( " Second" ) ) ;
1807+ assert ! ( content. contains( " Third" ) ) ;
1808+ // Should have two newlines joining three comments
1809+ let newline_count = content. matches ( '\n' ) . count ( ) ;
1810+ assert_eq ! ( newline_count, 2 ) ;
1811+ } else {
1812+ panic ! (
1813+ "Expected coalesced comment token, got: {:?}" ,
1814+ comment_token. r#type( )
1815+ ) ;
1816+ }
1817+ }
1818+
1819+ #[ test]
1820+ fn comment_coalescing_spans_updated ( ) {
1821+ // Test that coalesced comment spans cover the entire range
1822+ let input = "// Start comment\n // End comment\n " ;
1823+ let mut lexer = Lexer :: default_for_input ( input) ;
1824+
1825+ let comment_token = lexer. next_token ( ) . unwrap ( ) . unwrap ( ) ;
1826+ let span = comment_token. span ( ) ;
1827+
1828+ // Should start at line 1, column 1 and end after the second comment
1829+ assert_eq ! ( span. start. line, 1 ) ;
1830+ assert_eq ! ( span. start. column, 1 ) ;
1831+ assert_eq ! ( span. end. line, 2 ) ;
1832+ // End column should be after "// End comment"
1833+ assert ! ( span. end. column > 10 ) ;
1834+ }
1835+
1836+ #[ test]
1837+ fn comment_coalescing_empty_comments ( ) {
1838+ // Test edge case with empty comments
1839+ let input = "//\n // Non-empty\n code" ;
1840+ let mut lexer = Lexer :: default_for_input ( input) ;
1841+
1842+ let comment_token = lexer. next_token ( ) . unwrap ( ) . unwrap ( ) ;
1843+ if let TokenType :: Comment ( content) = comment_token. r#type ( ) {
1844+ // Should coalesce empty comment with non-empty one
1845+ assert ! ( content. contains( "\n Non-empty" ) ) ;
1846+ } else {
1847+ panic ! ( "Expected comment token, got: {:?}" , comment_token. r#type( ) ) ;
1848+ }
1849+ }
1850+
1851+ #[ test]
1852+ fn comment_coalescing_flush_on_eof ( ) {
1853+ // Test that buffered comments are flushed at end of input
1854+ let input = "// Only comment" ;
1855+ let mut lexer = Lexer :: default_for_input ( input) ;
1856+
1857+ let comment_token = lexer. next_token ( ) . unwrap ( ) . unwrap ( ) ;
1858+ assert_eq ! (
1859+ * comment_token. r#type( ) ,
1860+ TokenType :: Comment ( " Only comment" . to_string( ) )
1861+ ) ;
1862+
1863+ let eof_token = lexer. next_token ( ) . unwrap ( ) . unwrap ( ) ;
1864+ assert_eq ! ( * eof_token. r#type( ) , TokenType :: EOF ) ;
1865+ }
1866+
1867+ #[ test]
1868+ fn comment_coalescing_doc_comments_multiple ( ) {
1869+ // Test multiple doc comment coalescing
1870+ let input = "/// Doc 1\n /// Doc 2\n /// Doc 3\n struct Test;" ;
1871+ let mut lexer = Lexer :: default_for_input ( input) ;
1872+
1873+ let doc_comment_token = lexer. next_token ( ) . unwrap ( ) . unwrap ( ) ;
1874+ if let TokenType :: DocComment ( content) = doc_comment_token. r#type ( ) {
1875+ assert ! ( content. contains( " Doc 1" ) ) ;
1876+ assert ! ( content. contains( " Doc 2" ) ) ;
1877+ assert ! ( content. contains( " Doc 3" ) ) ;
1878+ // Should have two newlines for three doc comments
1879+ let newline_count = content. matches ( '\n' ) . count ( ) ;
1880+ assert_eq ! ( newline_count, 2 ) ;
1881+ } else {
1882+ panic ! (
1883+ "Expected coalesced doc comment token, got: {:?}" ,
1884+ doc_comment_token. r#type( )
1885+ ) ;
1886+ }
1887+ }
16721888}
0 commit comments