Skip to content

Commit 43f3f5f

Browse files
committed
Add tests for comment coalescing and span checks
Expanded the test suite to cover comment coalescing and SymbolLocation span checks.
1 parent ebb53c4 commit 43f3f5f

File tree

2 files changed

+303
-0
lines changed

2 files changed

+303
-0
lines changed

src/core/scanner/lexer.rs

Lines changed: 198 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1687,4 +1687,202 @@ mod tests {
16871687
assert_eq!(*token.r#type(), TokenType::RightBrace);
16881688
}
16891689
}
1690+
1691+
#[test]
1692+
fn comment_coalescing_adjacent_regular_comments() {
1693+
// Test that adjacent regular comments are coalesced
1694+
let input = "// First comment\n// Second comment\nidentifier";
1695+
let mut lexer = Lexer::default_for_input(input);
1696+
1697+
let first_token = lexer.next_token().unwrap().unwrap();
1698+
if let TokenType::Comment(content) = first_token.r#type() {
1699+
assert!(content.contains(" First comment"));
1700+
assert!(content.contains(" Second comment"));
1701+
// Should be joined with newline
1702+
assert!(content.contains('\n'));
1703+
} else {
1704+
panic!(
1705+
"Expected coalesced comment token, got: {:?}",
1706+
first_token.r#type()
1707+
);
1708+
}
1709+
1710+
let second_token = lexer.next_token().unwrap().unwrap();
1711+
assert_eq!(
1712+
*second_token.r#type(),
1713+
TokenType::Identifier("identifier".to_string())
1714+
);
1715+
}
1716+
1717+
#[test]
1718+
fn comment_coalescing_adjacent_doc_comments() {
1719+
// Test that adjacent doc comments are coalesced
1720+
let input = "/// First doc\n/// Second doc\nfn test() {}";
1721+
let mut lexer = Lexer::default_for_input(input);
1722+
1723+
let first_token = lexer.next_token().unwrap().unwrap();
1724+
if let TokenType::DocComment(content) = first_token.r#type() {
1725+
assert!(content.contains(" First doc"));
1726+
assert!(content.contains(" Second doc"));
1727+
// Should be joined with newline
1728+
assert!(content.contains('\n'));
1729+
} else {
1730+
panic!(
1731+
"Expected coalesced doc comment token, got: {:?}",
1732+
first_token.r#type()
1733+
);
1734+
}
1735+
}
1736+
1737+
#[test]
1738+
fn comment_coalescing_mixed_comment_types_separate() {
1739+
// Test that regular comments and doc comments don't coalesce together
1740+
let input = "// Regular comment\n/// Doc comment\nidentifier";
1741+
let mut lexer = Lexer::default_for_input(input);
1742+
1743+
let first_token = lexer.next_token().unwrap().unwrap();
1744+
if let TokenType::Comment(content) = first_token.r#type() {
1745+
assert!(content.contains(" Regular comment"));
1746+
assert!(!content.contains(" Doc comment"));
1747+
} else {
1748+
panic!(
1749+
"Expected regular comment token, got: {:?}",
1750+
first_token.r#type()
1751+
);
1752+
}
1753+
1754+
let second_token = lexer.next_token().unwrap().unwrap();
1755+
if let TokenType::DocComment(content) = second_token.r#type() {
1756+
assert!(content.contains(" Doc comment"));
1757+
assert!(!content.contains(" Regular comment"));
1758+
} else {
1759+
panic!(
1760+
"Expected doc comment token, got: {:?}",
1761+
second_token.r#type()
1762+
);
1763+
}
1764+
1765+
let third_token = lexer.next_token().unwrap().unwrap();
1766+
assert_eq!(
1767+
*third_token.r#type(),
1768+
TokenType::Identifier("identifier".to_string())
1769+
);
1770+
}
1771+
1772+
#[test]
1773+
fn comment_coalescing_with_intervening_code() {
1774+
// Test that comments separated by code don't coalesce
1775+
let input = "// First comment\nlet x = 1;\n// Second comment";
1776+
let mut lexer = Lexer::default_for_input(input);
1777+
1778+
let first_token = lexer.next_token().unwrap().unwrap();
1779+
if let TokenType::Comment(content) = first_token.r#type() {
1780+
assert!(content.contains(" First comment"));
1781+
assert!(!content.contains(" Second comment"));
1782+
} else {
1783+
panic!(
1784+
"Expected first comment token, got: {:?}",
1785+
first_token.r#type()
1786+
);
1787+
}
1788+
1789+
// Should get the 'let' identifier next
1790+
let second_token = lexer.next_token().unwrap().unwrap();
1791+
assert_eq!(
1792+
*second_token.r#type(),
1793+
TokenType::Identifier("let".to_string())
1794+
);
1795+
}
1796+
1797+
#[test]
1798+
fn comment_coalescing_multiple_adjacent() {
1799+
// Test coalescing of more than two comments
1800+
let input = "// First\n// Second\n// Third\ncode";
1801+
let mut lexer = Lexer::default_for_input(input);
1802+
1803+
let comment_token = lexer.next_token().unwrap().unwrap();
1804+
if let TokenType::Comment(content) = comment_token.r#type() {
1805+
assert!(content.contains(" First"));
1806+
assert!(content.contains(" Second"));
1807+
assert!(content.contains(" Third"));
1808+
// Should have two newlines joining three comments
1809+
let newline_count = content.matches('\n').count();
1810+
assert_eq!(newline_count, 2);
1811+
} else {
1812+
panic!(
1813+
"Expected coalesced comment token, got: {:?}",
1814+
comment_token.r#type()
1815+
);
1816+
}
1817+
}
1818+
1819+
#[test]
1820+
fn comment_coalescing_spans_updated() {
1821+
// Test that coalesced comment spans cover the entire range
1822+
let input = "// Start comment\n// End comment\n";
1823+
let mut lexer = Lexer::default_for_input(input);
1824+
1825+
let comment_token = lexer.next_token().unwrap().unwrap();
1826+
let span = comment_token.span();
1827+
1828+
// Should start at line 1, column 1 and end after the second comment
1829+
assert_eq!(span.start.line, 1);
1830+
assert_eq!(span.start.column, 1);
1831+
assert_eq!(span.end.line, 2);
1832+
// End column should be after "// End comment"
1833+
assert!(span.end.column > 10);
1834+
}
1835+
1836+
#[test]
1837+
fn comment_coalescing_empty_comments() {
1838+
// Test edge case with empty comments
1839+
let input = "//\n// Non-empty\ncode";
1840+
let mut lexer = Lexer::default_for_input(input);
1841+
1842+
let comment_token = lexer.next_token().unwrap().unwrap();
1843+
if let TokenType::Comment(content) = comment_token.r#type() {
1844+
// Should coalesce empty comment with non-empty one
1845+
assert!(content.contains("\n Non-empty"));
1846+
} else {
1847+
panic!("Expected comment token, got: {:?}", comment_token.r#type());
1848+
}
1849+
}
1850+
1851+
#[test]
1852+
fn comment_coalescing_flush_on_eof() {
1853+
// Test that buffered comments are flushed at end of input
1854+
let input = "// Only comment";
1855+
let mut lexer = Lexer::default_for_input(input);
1856+
1857+
let comment_token = lexer.next_token().unwrap().unwrap();
1858+
assert_eq!(
1859+
*comment_token.r#type(),
1860+
TokenType::Comment(" Only comment".to_string())
1861+
);
1862+
1863+
let eof_token = lexer.next_token().unwrap().unwrap();
1864+
assert_eq!(*eof_token.r#type(), TokenType::EOF);
1865+
}
1866+
1867+
#[test]
1868+
fn comment_coalescing_doc_comments_multiple() {
1869+
// Test multiple doc comment coalescing
1870+
let input = "/// Doc 1\n/// Doc 2\n/// Doc 3\nstruct Test;";
1871+
let mut lexer = Lexer::default_for_input(input);
1872+
1873+
let doc_comment_token = lexer.next_token().unwrap().unwrap();
1874+
if let TokenType::DocComment(content) = doc_comment_token.r#type() {
1875+
assert!(content.contains(" Doc 1"));
1876+
assert!(content.contains(" Doc 2"));
1877+
assert!(content.contains(" Doc 3"));
1878+
// Should have two newlines for three doc comments
1879+
let newline_count = content.matches('\n').count();
1880+
assert_eq!(newline_count, 2);
1881+
} else {
1882+
panic!(
1883+
"Expected coalesced doc comment token, got: {:?}",
1884+
doc_comment_token.r#type()
1885+
);
1886+
}
1887+
}
16901888
}

src/core/scanner/tokens.rs

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -392,4 +392,109 @@ mod tests {
392392
// end before start should panic
393393
let _ = Token::new(TokenType::At, (2, 1), (1, 1));
394394
}
395+
396+
#[test]
397+
fn symbol_location_from_reference_conversion() {
398+
// Test the new From<&SymbolLocation> for (u32, u32) implementation
399+
let location = SymbolLocation {
400+
line: 5,
401+
column: 10,
402+
};
403+
let tuple: (u32, u32) = (&location).into();
404+
assert_eq!(tuple, (5, 10));
405+
406+
// Test with zero values
407+
let location_zero = SymbolLocation { line: 0, column: 0 };
408+
let tuple_zero: (u32, u32) = (&location_zero).into();
409+
assert_eq!(tuple_zero, (0, 0));
410+
411+
// Test with maximum values
412+
let location_max = SymbolLocation {
413+
line: u32::MAX,
414+
column: u32::MAX,
415+
};
416+
let tuple_max: (u32, u32) = (&location_max).into();
417+
assert_eq!(tuple_max, (u32::MAX, u32::MAX));
418+
}
419+
420+
#[test]
421+
fn token_new_lexicographic_span_ordering() {
422+
// Test the updated span monotonicity check with lexicographic ordering
423+
424+
// Same line: start.column <= end.column should work
425+
let start = SymbolLocation { line: 1, column: 5 };
426+
let end = SymbolLocation {
427+
line: 1,
428+
column: 10,
429+
};
430+
let token = Token::new(
431+
TokenType::Identifier("test".to_string()),
432+
(start.line, start.column),
433+
(end.line, end.column),
434+
);
435+
assert_eq!(token.span().start, start);
436+
assert_eq!(token.span().end, end);
437+
}
438+
439+
#[test]
440+
fn token_new_lexicographic_span_same_position() {
441+
// Same line, same column should be valid
442+
let location = SymbolLocation { line: 1, column: 5 };
443+
let token = Token::new(
444+
TokenType::Identifier("x".to_string()),
445+
(location.line, location.column),
446+
(location.line, location.column),
447+
);
448+
assert_eq!(token.span().start, location);
449+
assert_eq!(token.span().end, location);
450+
}
451+
452+
#[test]
453+
fn token_new_lexicographic_span_different_lines() {
454+
// Different lines: start.line < end.line should work regardless of columns
455+
let start = SymbolLocation {
456+
line: 1,
457+
column: 20,
458+
};
459+
let end = SymbolLocation { line: 2, column: 5 };
460+
let token = Token::new(
461+
TokenType::Literal("multiline".to_string()),
462+
(start.line, start.column),
463+
(end.line, end.column),
464+
);
465+
assert_eq!(token.span().start, start);
466+
assert_eq!(token.span().end, end);
467+
}
468+
469+
#[test]
470+
#[should_panic(expected = "span should be monotonically increasing")]
471+
fn token_new_lexicographic_span_invalid_same_line() {
472+
// Same line but end column before start column should panic
473+
let start = SymbolLocation {
474+
line: 1,
475+
column: 10,
476+
};
477+
let end = SymbolLocation { line: 1, column: 5 };
478+
let _ = Token::new(
479+
TokenType::Identifier("invalid".to_string()),
480+
(start.line, start.column),
481+
(end.line, end.column),
482+
);
483+
}
484+
485+
#[test]
486+
#[should_panic(expected = "span should be monotonically increasing")]
487+
fn token_new_lexicographic_span_invalid_reverse_lines() {
488+
// End line before start line should panic
489+
let start = SymbolLocation { line: 2, column: 5 };
490+
let end = SymbolLocation {
491+
line: 1,
492+
column: 10,
493+
};
494+
let _ = Token::new(
495+
TokenType::Identifier("invalid".to_string()),
496+
(start.line, start.column),
497+
(end.line, end.column),
498+
);
499+
}
395500
}

0 commit comments

Comments
 (0)