Refactor doc comment parsing to preserve whitespace

mkpro118 · mkpro118 · commit 9c36b9bbefac · 2025-09-14T02:49:45.000-07:00
The doc comment parsing logic has been updated to be less destructive.
Previously, it would trim all leading and trailing whitespace from comment
lines. The new implementation only removes a single optional leading space,
preserving intentional indentation and formatting. This change aligns the
parser with an updated lexer behavior where the `///` marker is stripped
earlier in the pipeline.
diff --git a/src/core/parser/components/doc_integration_tests.rs b/src/core/parser/components/doc_integration_tests.rs
@@ -1,7 +1,5 @@
 //! Integration tests for doc comment association across all parsers.
 
-#![expect(clippy::unwrap_used)]
-
 use crate::core::parser::components::attributes::{
     BlockAttributeParser, FieldAttributeParser,
 };
@@ -19,7 +17,7 @@ use crate::core::scanner::tokens::{Token, TokenType};
 /// Helper to create a `DocComment` token.
 fn doc_token(text: &str, line: u32) -> Token {
     Token::new(
-        TokenType::DocComment(format!("///{text}")),
+        TokenType::DocComment(text.to_string()),
         (line, 1),
         (line, 4 + u32::try_from(text.len()).unwrap_or(0)),
     )
diff --git a/src/core/parser/components/expressions.rs b/src/core/parser/components/expressions.rs
@@ -1023,8 +1023,6 @@ impl Parser<Expr> for ExpressionParser {
 
 #[cfg(test)]
 mod tests {
-    #![expect(clippy::unwrap_used)]
-
     use crate::core::parser::components::expressions::ExpressionParser;
     use crate::core::parser::components::helpers::{
         extract_doc_text, parse_leading_docs,
@@ -1442,7 +1440,7 @@ mod tests {
         end_col: u32,
     ) -> Token {
         Token::new(
-            TokenType::DocComment(format!("///{text}")),
+            TokenType::DocComment(text.to_string()),
             (line, start_col),
             (line, end_col),
         )
@@ -1639,9 +1637,10 @@ mod tests {
         assert!(result.is_some());
         let docs = result.unwrap();
         assert_eq!(docs.lines.len(), 3);
-        assert_eq!(docs.lines[0], ""); // Trimmed to empty
+        // Only a single leading space is removed; remaining spaces preserved
+        assert_eq!(docs.lines[0], "  ");
         assert_eq!(docs.lines[1], "Real content");
-        assert_eq!(docs.lines[2], ""); // Trimmed to empty
+        assert_eq!(docs.lines[2], "    ");
     }
 
     #[test]
@@ -1701,7 +1700,7 @@ mod tests {
     #[test]
     fn extract_doc_text_with_prefix() {
         let token = Token::new(
-            TokenType::DocComment("/// This is documentation".to_string()),
+            TokenType::DocComment(" This is documentation".to_string()),
             (1, 1),
             (1, 25),
         );
@@ -1725,13 +1724,14 @@ mod tests {
     #[test]
     fn extract_doc_text_with_extra_whitespace() {
         let token = Token::new(
-            TokenType::DocComment("///   This has extra spaces   ".to_string()),
+            TokenType::DocComment("   This has extra spaces   ".to_string()),
             (1, 1),
             (1, 31),
         );
 
         let result = extract_doc_text(&token);
-        assert_eq!(result, Some("This has extra spaces".to_string()));
+        // Only a single leading space is removed; preserve the rest
+        assert_eq!(result, Some("  This has extra spaces   ".to_string()));
     }
 
     #[test]
@@ -1750,17 +1750,17 @@ mod tests {
     fn docs_span_calculation() {
         let tokens = vec![
             Token::new(
-                TokenType::DocComment("/// First".to_string()),
+                TokenType::DocComment(" First".to_string()),
                 (1, 1),
                 (1, 10),
             ),
             Token::new(
-                TokenType::DocComment("/// Second".to_string()),
+                TokenType::DocComment(" Second".to_string()),
                 (2, 1),
                 (2, 11),
             ),
             Token::new(
-                TokenType::DocComment("/// Third".to_string()),
+                TokenType::DocComment(" Third".to_string()),
                 (3, 1),
                 (3, 10),
             ),
diff --git a/src/core/parser/components/helpers.rs b/src/core/parser/components/helpers.rs
@@ -26,14 +26,16 @@ pub(crate) fn span_from_to(a: &SymbolSpan, b: &SymbolSpan) -> SymbolSpan {
 
 /// Extract documentation text from a `DocComment` token.
 ///
-/// Normalizes the raw doc comment text by stripping an optional leading
-/// `///` prefix and trimming surrounding whitespace. Works for inputs with
-/// or without the `///` prefix.
+/// Given a `DocComment` token whose text is the content after the `///`
+/// marker, remove at most one leading space. Preserve all other whitespace.
 #[must_use]
 pub fn extract_doc_text(token: &Token) -> Option<String> {
     if let TokenType::DocComment(text) = token.r#type() {
-        let s = text.strip_prefix("///").unwrap_or(text).trim();
-        Some(s.to_string())
+        if let Some(rest) = text.strip_prefix(' ') {
+            Some(rest.to_string())
+        } else {
+            Some(text.to_string())
+        }
     } else {
         None
     }
@@ -114,8 +116,6 @@ pub fn parse_leading_docs(stream: &mut dyn TokenStream) -> Option<Docs> {
 
 #[cfg(test)]
 mod tests {
-    #![expect(clippy::unwrap_used)]
-
     use super::*;
     use crate::core::parser::stream::VectorTokenStream;
 
@@ -125,14 +125,28 @@ mod tests {
 
     #[test]
     fn extract_doc_text_variants() {
-        let t = tok(TokenType::DocComment("/// hello".into()));
+        let t = tok(TokenType::DocComment(" hello".into()));
         assert_eq!(extract_doc_text(&t).unwrap(), "hello");
         let t = tok(TokenType::DocComment("plain".into()));
         assert_eq!(extract_doc_text(&t).unwrap(), "plain");
         let t = tok(TokenType::Comment(" not-doc".into()));
         assert!(extract_doc_text(&t).is_none());
     }
 
+    #[test]
+    fn extract_doc_text_removes_only_one_space() {
+        let t = tok(TokenType::DocComment("   many spaces".into()));
+        // Only the first leading space is removed; remaining preserved
+        assert_eq!(extract_doc_text(&t).unwrap(), "  many spaces");
+    }
+
+    #[test]
+    fn extract_doc_text_preserves_tabs_and_other_whitespace() {
+        let t = tok(TokenType::DocComment("\tTabbed doc".into()));
+        // Not a space prefix, so unchanged
+        assert_eq!(extract_doc_text(&t).unwrap(), "\tTabbed doc");
+    }
+
     #[test]
     fn parse_leading_docs_none_and_some() {
         // None path (no docs)