Skip to content

Commit 9c36b9b

Browse files
committed
Refactor doc comment parsing to preserve whitespace
The doc comment parsing logic has been updated to be less destructive. Previously, it would trim all leading and trailing whitespace from comment lines. The new implementation only removes a single optional leading space, preserving intentional indentation and formatting. This change aligns the parser with an updated lexer behavior where the `///` marker is stripped earlier in the pipeline.
1 parent 300d790 commit 9c36b9b

File tree

3 files changed

+34
-22
lines changed

3 files changed

+34
-22
lines changed

src/core/parser/components/doc_integration_tests.rs

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
//! Integration tests for doc comment association across all parsers.
22
3-
#![expect(clippy::unwrap_used)]
4-
53
use crate::core::parser::components::attributes::{
64
BlockAttributeParser, FieldAttributeParser,
75
};
@@ -19,7 +17,7 @@ use crate::core::scanner::tokens::{Token, TokenType};
1917
/// Helper to create a `DocComment` token.
2018
fn doc_token(text: &str, line: u32) -> Token {
2119
Token::new(
22-
TokenType::DocComment(format!("///{text}")),
20+
TokenType::DocComment(text.to_string()),
2321
(line, 1),
2422
(line, 4 + u32::try_from(text.len()).unwrap_or(0)),
2523
)

src/core/parser/components/expressions.rs

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1023,8 +1023,6 @@ impl Parser<Expr> for ExpressionParser {
10231023

10241024
#[cfg(test)]
10251025
mod tests {
1026-
#![expect(clippy::unwrap_used)]
1027-
10281026
use crate::core::parser::components::expressions::ExpressionParser;
10291027
use crate::core::parser::components::helpers::{
10301028
extract_doc_text, parse_leading_docs,
@@ -1442,7 +1440,7 @@ mod tests {
14421440
end_col: u32,
14431441
) -> Token {
14441442
Token::new(
1445-
TokenType::DocComment(format!("///{text}")),
1443+
TokenType::DocComment(text.to_string()),
14461444
(line, start_col),
14471445
(line, end_col),
14481446
)
@@ -1639,9 +1637,10 @@ mod tests {
16391637
assert!(result.is_some());
16401638
let docs = result.unwrap();
16411639
assert_eq!(docs.lines.len(), 3);
1642-
assert_eq!(docs.lines[0], ""); // Trimmed to empty
1640+
// Only a single leading space is removed; remaining spaces preserved
1641+
assert_eq!(docs.lines[0], " ");
16431642
assert_eq!(docs.lines[1], "Real content");
1644-
assert_eq!(docs.lines[2], ""); // Trimmed to empty
1643+
assert_eq!(docs.lines[2], " ");
16451644
}
16461645

16471646
#[test]
@@ -1701,7 +1700,7 @@ mod tests {
17011700
#[test]
17021701
fn extract_doc_text_with_prefix() {
17031702
let token = Token::new(
1704-
TokenType::DocComment("/// This is documentation".to_string()),
1703+
TokenType::DocComment(" This is documentation".to_string()),
17051704
(1, 1),
17061705
(1, 25),
17071706
);
@@ -1725,13 +1724,14 @@ mod tests {
17251724
#[test]
17261725
fn extract_doc_text_with_extra_whitespace() {
17271726
let token = Token::new(
1728-
TokenType::DocComment("/// This has extra spaces ".to_string()),
1727+
TokenType::DocComment(" This has extra spaces ".to_string()),
17291728
(1, 1),
17301729
(1, 31),
17311730
);
17321731

17331732
let result = extract_doc_text(&token);
1734-
assert_eq!(result, Some("This has extra spaces".to_string()));
1733+
// Only a single leading space is removed; preserve the rest
1734+
assert_eq!(result, Some(" This has extra spaces ".to_string()));
17351735
}
17361736

17371737
#[test]
@@ -1750,17 +1750,17 @@ mod tests {
17501750
fn docs_span_calculation() {
17511751
let tokens = vec![
17521752
Token::new(
1753-
TokenType::DocComment("/// First".to_string()),
1753+
TokenType::DocComment(" First".to_string()),
17541754
(1, 1),
17551755
(1, 10),
17561756
),
17571757
Token::new(
1758-
TokenType::DocComment("/// Second".to_string()),
1758+
TokenType::DocComment(" Second".to_string()),
17591759
(2, 1),
17601760
(2, 11),
17611761
),
17621762
Token::new(
1763-
TokenType::DocComment("/// Third".to_string()),
1763+
TokenType::DocComment(" Third".to_string()),
17641764
(3, 1),
17651765
(3, 10),
17661766
),

src/core/parser/components/helpers.rs

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -26,14 +26,16 @@ pub(crate) fn span_from_to(a: &SymbolSpan, b: &SymbolSpan) -> SymbolSpan {
2626

2727
/// Extract documentation text from a `DocComment` token.
2828
///
29-
/// Normalizes the raw doc comment text by stripping an optional leading
30-
/// `///` prefix and trimming surrounding whitespace. Works for inputs with
31-
/// or without the `///` prefix.
29+
/// Given a `DocComment` token whose text is the content after the `///`
30+
/// marker, remove at most one leading space. Preserve all other whitespace.
3231
#[must_use]
3332
pub fn extract_doc_text(token: &Token) -> Option<String> {
3433
if let TokenType::DocComment(text) = token.r#type() {
35-
let s = text.strip_prefix("///").unwrap_or(text).trim();
36-
Some(s.to_string())
34+
if let Some(rest) = text.strip_prefix(' ') {
35+
Some(rest.to_string())
36+
} else {
37+
Some(text.to_string())
38+
}
3739
} else {
3840
None
3941
}
@@ -114,8 +116,6 @@ pub fn parse_leading_docs(stream: &mut dyn TokenStream) -> Option<Docs> {
114116

115117
#[cfg(test)]
116118
mod tests {
117-
#![expect(clippy::unwrap_used)]
118-
119119
use super::*;
120120
use crate::core::parser::stream::VectorTokenStream;
121121

@@ -125,14 +125,28 @@ mod tests {
125125

126126
#[test]
127127
fn extract_doc_text_variants() {
128-
let t = tok(TokenType::DocComment("/// hello".into()));
128+
let t = tok(TokenType::DocComment(" hello".into()));
129129
assert_eq!(extract_doc_text(&t).unwrap(), "hello");
130130
let t = tok(TokenType::DocComment("plain".into()));
131131
assert_eq!(extract_doc_text(&t).unwrap(), "plain");
132132
let t = tok(TokenType::Comment(" not-doc".into()));
133133
assert!(extract_doc_text(&t).is_none());
134134
}
135135

136+
#[test]
137+
fn extract_doc_text_removes_only_one_space() {
138+
let t = tok(TokenType::DocComment(" many spaces".into()));
139+
// Only the first leading space is removed; remaining preserved
140+
assert_eq!(extract_doc_text(&t).unwrap(), " many spaces");
141+
}
142+
143+
#[test]
144+
fn extract_doc_text_preserves_tabs_and_other_whitespace() {
145+
let t = tok(TokenType::DocComment("\tTabbed doc".into()));
146+
// Not a space prefix, so unchanged
147+
assert_eq!(extract_doc_text(&t).unwrap(), "\tTabbed doc");
148+
}
149+
136150
#[test]
137151
fn parse_leading_docs_none_and_some() {
138152
// None path (no docs)

0 commit comments

Comments
 (0)