Skip to content

Conversation

tcottin
Copy link
Contributor

@tcottin tcottin commented Oct 5, 2025

Fix clangd/clangd#2513

This regression was introduced with #140498.

The issue is that with #140498 the extraction of the documentation comment changed from line based to paragraph based.
This also removed some required line breaks inside paragraphs, which used to be added before the change.

This PR adds the missing line breaks again.

@llvmbot
Copy link
Member

llvmbot commented Oct 5, 2025

@llvm/pr-subscribers-clangd

Author: None (tcottin)

Changes

Fix clangd/clangd#2513

This regression was introduced with #140498.

The issue is that with #140498 the extraction of the documentation comment changed from line based to paragraph based.
This also removed some required line breaks inside paragraphs, which used to be added before the change.

This PR adds the missing line breaks again.


Patch is 24.60 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/162029.diff

5 Files Affected:

  • (modified) clang-tools-extra/clangd/support/Markup.cpp (+119-75)
  • (modified) clang-tools-extra/clangd/support/Markup.h (+78-3)
  • (modified) clang-tools-extra/clangd/unittests/HoverTests.cpp (+28-10)
  • (modified) clang-tools-extra/clangd/unittests/SymbolDocumentationTests.cpp (+2-2)
  • (modified) clang-tools-extra/clangd/unittests/support/MarkupTests.cpp (+101-5)
diff --git a/clang-tools-extra/clangd/support/Markup.cpp b/clang-tools-extra/clangd/support/Markup.cpp
index 89bdc656d440f..e4247974ff9eb 100644
--- a/clang-tools-extra/clangd/support/Markup.cpp
+++ b/clang-tools-extra/clangd/support/Markup.cpp
@@ -450,31 +450,62 @@ std::string Block::asPlainText() const {
   return llvm::StringRef(OS.str()).trim().str();
 }
 
+void Paragraph::renderNewlinesMarkdown(llvm::raw_ostream &OS,
+                                       std::string &ParagraphText) const {
+  llvm::StringRef Line, Rest;
+
+  for (std::tie(Line, Rest) =
+           llvm::StringRef(ParagraphText).ltrim("\n").rtrim().split('\n');
+       !(Line.empty() && Rest.empty());
+       std::tie(Line, Rest) = Rest.split('\n')) {
+
+    if (Line.empty()) {
+      // Blank lines are preserved in markdown.
+      OS << '\n';
+      continue;
+    }
+
+    OS << Line;
+
+    if (!Rest.empty() && isHardLineBreakAfter(Line, Rest, /*IsMarkdown=*/true))
+      // In markdown, 2 spaces before a line break forces a line break.
+      OS << "  ";
+    OS << '\n';
+  }
+}
+
 void Paragraph::renderEscapedMarkdown(llvm::raw_ostream &OS) const {
   bool NeedsSpace = false;
   bool HasChunks = false;
+  std::string ParagraphText;
+  ParagraphText.reserve(EstimatedStringSize);
+  llvm::raw_string_ostream ParagraphTextOS(ParagraphText);
   for (auto &C : Chunks) {
     if (C.SpaceBefore || NeedsSpace)
-      OS << " ";
+      ParagraphTextOS << " ";
     switch (C.Kind) {
     case ChunkKind::PlainText:
-      OS << renderText(C.Contents, !HasChunks, /*EscapeMarkdown=*/true);
+      ParagraphTextOS << renderText(C.Contents, !HasChunks,
+                                    /*EscapeMarkdown=*/true);
       break;
     case ChunkKind::InlineCode:
-      OS << renderInlineBlock(C.Contents);
+      ParagraphTextOS << renderInlineBlock(C.Contents);
       break;
     case ChunkKind::Bold:
-      OS << renderText("**" + C.Contents + "**", !HasChunks,
-                       /*EscapeMarkdown=*/true);
+      ParagraphTextOS << renderText("**" + C.Contents + "**", !HasChunks,
+                                    /*EscapeMarkdown=*/true);
       break;
     case ChunkKind::Emphasized:
-      OS << renderText("*" + C.Contents + "*", !HasChunks,
-                       /*EscapeMarkdown=*/true);
+      ParagraphTextOS << renderText("*" + C.Contents + "*", !HasChunks,
+                                    /*EscapeMarkdown=*/true);
       break;
     }
     HasChunks = true;
     NeedsSpace = C.SpaceAfter;
   }
+
+  renderNewlinesMarkdown(OS, ParagraphText);
+
   // A paragraph in markdown is separated by a blank line.
   OS << "\n\n";
 }
@@ -482,28 +513,39 @@ void Paragraph::renderEscapedMarkdown(llvm::raw_ostream &OS) const {
 void Paragraph::renderMarkdown(llvm::raw_ostream &OS) const {
   bool NeedsSpace = false;
   bool HasChunks = false;
+  std::string ParagraphText;
+  ParagraphText.reserve(EstimatedStringSize);
+  llvm::raw_string_ostream ParagraphTextOS(ParagraphText);
   for (auto &C : Chunks) {
     if (C.SpaceBefore || NeedsSpace)
-      OS << " ";
+      ParagraphTextOS << " ";
     switch (C.Kind) {
     case ChunkKind::PlainText:
-      OS << renderText(C.Contents, !HasChunks, /*EscapeMarkdown=*/false);
+      ParagraphTextOS << renderText(C.Contents, !HasChunks,
+                                    /*EscapeMarkdown=*/false);
       break;
     case ChunkKind::InlineCode:
-      OS << renderInlineBlock(C.Contents);
+      ParagraphTextOS << renderInlineBlock(C.Contents);
       break;
     case ChunkKind::Bold:
-      OS << "**" << renderText(C.Contents, !HasChunks, /*EscapeMarkdown=*/false)
-         << "**";
+      ParagraphTextOS << "**"
+                      << renderText(C.Contents, !HasChunks,
+                                    /*EscapeMarkdown=*/false)
+                      << "**";
       break;
     case ChunkKind::Emphasized:
-      OS << "*" << renderText(C.Contents, !HasChunks, /*EscapeMarkdown=*/false)
-         << "*";
+      ParagraphTextOS << "*"
+                      << renderText(C.Contents, !HasChunks,
+                                    /*EscapeMarkdown=*/false)
+                      << "*";
       break;
     }
     HasChunks = true;
     NeedsSpace = C.SpaceAfter;
   }
+
+  renderNewlinesMarkdown(OS, ParagraphText);
+
   // A paragraph in markdown is separated by a blank line.
   OS << "\n\n";
 }
@@ -512,8 +554,6 @@ std::unique_ptr<Block> Paragraph::clone() const {
   return std::make_unique<Paragraph>(*this);
 }
 
-/// Choose a marker to delimit `Text` from a prioritized list of options.
-/// This is more readable than escaping for plain-text.
 llvm::StringRef Paragraph::chooseMarker(llvm::ArrayRef<llvm::StringRef> Options,
                                         llvm::StringRef Text) const {
   // Prefer a delimiter whose characters don't appear in the text.
@@ -523,23 +563,39 @@ llvm::StringRef Paragraph::chooseMarker(llvm::ArrayRef<llvm::StringRef> Options,
   return Options.front();
 }
 
-bool Paragraph::punctuationIndicatesLineBreak(llvm::StringRef Line) const {
+bool Paragraph::punctuationIndicatesLineBreak(llvm::StringRef Line,
+                                              bool IsMarkdown) const {
   constexpr llvm::StringLiteral Punctuation = R"txt(.:,;!?)txt";
 
+  if (!IsMarkdown && Line.ends_with("  "))
+    return true;
+
   Line = Line.rtrim();
   return !Line.empty() && Punctuation.contains(Line.back());
 }
 
-bool Paragraph::isHardLineBreakIndicator(llvm::StringRef Rest) const {
+bool Paragraph::isHardLineBreakIndicator(llvm::StringRef Rest,
+                                         bool IsMarkdown) const {
+  // Plaintext indicators:
   // '-'/'*' md list, '@'/'\' documentation command, '>' md blockquote,
-  // '#' headings, '`' code blocks, two spaces (markdown force newline)
-  constexpr llvm::StringLiteral LinebreakIndicators = R"txt(-*@\>#`)txt";
+  // '#' headings, '`' code blocks
+  constexpr llvm::StringLiteral LinebreakIndicatorsPlainText =
+      R"txt(-*@\>#`)txt";
+  // Markdown indicators:
+  // Only '@' and '\' documentation commands/escaped markdown syntax.
+  constexpr llvm::StringLiteral LinebreakIndicatorsMarkdown = R"txt(@\)txt";
 
   Rest = Rest.ltrim(" \t");
   if (Rest.empty())
     return false;
 
-  if (LinebreakIndicators.contains(Rest.front()))
+  if (IsMarkdown) {
+    if (LinebreakIndicatorsMarkdown.contains(Rest.front()))
+      return true;
+    return false;
+  }
+
+  if (LinebreakIndicatorsPlainText.contains(Rest.front()))
     return true;
 
   if (llvm::isDigit(Rest.front())) {
@@ -550,64 +606,18 @@ bool Paragraph::isHardLineBreakIndicator(llvm::StringRef Rest) const {
   return false;
 }
 
-bool Paragraph::isHardLineBreakAfter(llvm::StringRef Line,
-                                     llvm::StringRef Rest) const {
-  // In Markdown, 2 spaces before a line break forces a line break.
-  // Add a line break for plaintext in this case too.
+bool Paragraph::isHardLineBreakAfter(llvm::StringRef Line, llvm::StringRef Rest,
+                                     bool IsMarkdown) const {
   // Should we also consider whether Line is short?
-  return Line.ends_with("  ") || punctuationIndicatesLineBreak(Line) ||
-         isHardLineBreakIndicator(Rest);
+  return (punctuationIndicatesLineBreak(Line, IsMarkdown) ||
+          isHardLineBreakIndicator(Rest, IsMarkdown));
 }
 
-void Paragraph::renderPlainText(llvm::raw_ostream &OS) const {
-  bool NeedsSpace = false;
-  std::string ConcatenatedText;
-  ConcatenatedText.reserve(EstimatedStringSize);
-
-  llvm::raw_string_ostream ConcatenatedOS(ConcatenatedText);
-
-  for (auto &C : Chunks) {
-
-    if (C.Kind == ChunkKind::PlainText) {
-      if (C.SpaceBefore || NeedsSpace)
-        ConcatenatedOS << ' ';
-
-      ConcatenatedOS << C.Contents;
-      NeedsSpace = llvm::isSpace(C.Contents.back()) || C.SpaceAfter;
-      continue;
-    }
-
-    if (C.SpaceBefore || NeedsSpace)
-      ConcatenatedOS << ' ';
-    llvm::StringRef Marker = "";
-    if (C.Preserve && C.Kind == ChunkKind::InlineCode)
-      Marker = chooseMarker({"`", "'", "\""}, C.Contents);
-    else if (C.Kind == ChunkKind::Bold)
-      Marker = "**";
-    else if (C.Kind == ChunkKind::Emphasized)
-      Marker = "*";
-    ConcatenatedOS << Marker << C.Contents << Marker;
-    NeedsSpace = C.SpaceAfter;
-  }
-
-  // We go through the contents line by line to handle the newlines
-  // and required spacing correctly.
-  //
-  // Newlines are added if:
-  // - the line ends with 2 spaces and a newline follows
-  // - the line ends with punctuation that indicates a line break (.:,;!?)
-  // - the next line starts with a hard line break indicator (-@>#`, or a digit
-  //   followed by '.' or ')'), ignoring leading whitespace.
-  //
-  // Otherwise, newlines in the input are replaced with a single space.
-  //
-  // Multiple spaces are collapsed into a single space.
-  //
-  // Lines containing only whitespace are ignored.
+void Paragraph::renderNewlinesPlaintext(llvm::raw_ostream &OS,
+                                        std::string &ParagraphText) const {
   llvm::StringRef Line, Rest;
 
-  for (std::tie(Line, Rest) =
-           llvm::StringRef(ConcatenatedText).trim().split('\n');
+  for (std::tie(Line, Rest) = llvm::StringRef(ParagraphText).trim().split('\n');
        !(Line.empty() && Rest.empty());
        std::tie(Line, Rest) = Rest.split('\n')) {
 
@@ -628,7 +638,7 @@ void Paragraph::renderPlainText(llvm::raw_ostream &OS) const {
 
     OS << canonicalizeSpaces(Line);
 
-    if (isHardLineBreakAfter(Line, Rest))
+    if (isHardLineBreakAfter(Line, Rest, /*IsMarkdown=*/false))
       OS << '\n';
     else if (!Rest.empty())
       // Since we removed any trailing whitespace from the input using trim(),
@@ -636,6 +646,40 @@ void Paragraph::renderPlainText(llvm::raw_ostream &OS) const {
       // Therefore, we can add a space without worrying about trailing spaces.
       OS << ' ';
   }
+}
+
+void Paragraph::renderPlainText(llvm::raw_ostream &OS) const {
+  bool NeedsSpace = false;
+  std::string ParagraphText;
+  ParagraphText.reserve(EstimatedStringSize);
+
+  llvm::raw_string_ostream ParagraphTextOS(ParagraphText);
+
+  for (auto &C : Chunks) {
+
+    if (C.Kind == ChunkKind::PlainText) {
+      if (C.SpaceBefore || NeedsSpace)
+        ParagraphTextOS << ' ';
+
+      ParagraphTextOS << C.Contents;
+      NeedsSpace = llvm::isSpace(C.Contents.back()) || C.SpaceAfter;
+      continue;
+    }
+
+    if (C.SpaceBefore || NeedsSpace)
+      ParagraphTextOS << ' ';
+    llvm::StringRef Marker = "";
+    if (C.Preserve && C.Kind == ChunkKind::InlineCode)
+      Marker = chooseMarker({"`", "'", "\""}, C.Contents);
+    else if (C.Kind == ChunkKind::Bold)
+      Marker = "**";
+    else if (C.Kind == ChunkKind::Emphasized)
+      Marker = "*";
+    ParagraphTextOS << Marker << C.Contents << Marker;
+    NeedsSpace = C.SpaceAfter;
+  }
+
+  renderNewlinesPlaintext(OS, ParagraphText);
 
   // Paragraphs are separated by a blank line.
   OS << "\n\n";
diff --git a/clang-tools-extra/clangd/support/Markup.h b/clang-tools-extra/clangd/support/Markup.h
index eea6328f69a12..2400f723c0dc5 100644
--- a/clang-tools-extra/clangd/support/Markup.h
+++ b/clang-tools-extra/clangd/support/Markup.h
@@ -92,9 +92,84 @@ class Paragraph : public Block {
 
   llvm::StringRef chooseMarker(llvm::ArrayRef<llvm::StringRef> Options,
                                llvm::StringRef Text) const;
-  bool punctuationIndicatesLineBreak(llvm::StringRef Line) const;
-  bool isHardLineBreakIndicator(llvm::StringRef Rest) const;
-  bool isHardLineBreakAfter(llvm::StringRef Line, llvm::StringRef Rest) const;
+
+  /// Checks if the given line ends with punctuation that indicates a line break
+  /// (.:,;!?).
+  ///
+  /// If \p IsMarkdown is false, lines ending with 2 spaces are also considered
+  /// as indicating a line break. This is not needed for markdown because the
+  /// client renderer will handle this case.
+  bool punctuationIndicatesLineBreak(llvm::StringRef Line,
+                                     bool IsMarkdown) const;
+
+  /// Checks if the given line starts with a hard line break indicator.
+  ///
+  /// If \p IsMarkdown is true, only '@' and '\' are considered as indicators.
+  /// Otherwise, '-', '*', '@', '\', '>', '#', '`' and a digit followed by '.'
+  /// or ')' are also considered as indicators.
+  bool isHardLineBreakIndicator(llvm::StringRef Rest, bool IsMarkdown) const;
+
+  /// Checks if a hard line break should be added after the given line.
+  bool isHardLineBreakAfter(llvm::StringRef Line, llvm::StringRef Rest,
+                            bool IsMarkdown) const;
+
+  /// \brief Go through the contents line by line to handle the newlines
+  /// and required spacing correctly for markdown rendering.
+  ///
+  /// Newlines are added if:
+  /// - the line ends with punctuation that indicates a line break (.:,;!?)
+  /// - the next line starts with a hard line break indicator \\ (escaped
+  /// markdown/doxygen command) or @ (doxygen command)
+  ///
+  /// This newline handling is only used when the client requests markdown
+  /// for hover/signature help content.
+  /// Markdown does not add any newlines inside paragraphs unless the user
+  /// explicitly adds them. For hover/signature help content, we still want to
+  /// add newlines in some cases to improve readability, especially when doxygen
+  /// parsing is disabled or not implemented (like for signature help).
+  /// Therefore we add newlines in the above mentioned cases.
+  ///
+  /// In addition to that, we need to consider that the user can configure
+  /// clangd to treat documentation comments as plain text, while the client
+  /// requests markdown.
+  /// In this case, all markdown syntax is escaped and will
+  /// not be rendered as expected by markdown.
+  /// Examples are lists starting with '-' or headings starting with '#'.
+  /// With the above next line heuristics, these cases are also covered by the
+  /// '\\' new line indicator.
+  ///
+  /// FIXME: The heuristic fails e.g. for lists starting with '*' because it is
+  /// also used for emphasis in markdown and should not be treated as a newline.
+  ///
+  /// \param OS The stream to render to.
+  /// \param ParagraphText The text of the paragraph to render.
+  void renderNewlinesMarkdown(llvm::raw_ostream &OS,
+                              std::string &ParagraphText) const;
+
+  /// \brief Go through the contents line by line to handle the newlines
+  /// and required spacing correctly for plain text rendering.
+  ///
+  /// Newlines are added if:
+  /// - the line ends with 2 spaces and a newline follows
+  /// - the line ends with punctuation that indicates a line break (.:,;!?)
+  /// - the next line starts with a hard line break indicator (-@>#`\\ or a
+  ///   digit followed by '.' or ')'), ignoring leading whitespace.
+  ///
+  /// Otherwise, newlines in the input are replaced with a single space.
+  ///
+  /// Multiple spaces are collapsed into a single space.
+  ///
+  /// Lines containing only whitespace are ignored.
+  ///
+  /// This newline handling is only used when the client requests plain
+  /// text for hover/signature help content.
+  /// Therefore with this approach we mimic the behavior of markdown rendering
+  /// for these clients.
+  ///
+  /// \param OS The stream to render to.
+  /// \param ParagraphText The text of the paragraph to render.
+  void renderNewlinesPlaintext(llvm::raw_ostream &OS,
+                               std::string &ParagraphText) const;
 };
 
 /// Represents a sequence of one or more documents. Knows how to print them in a
diff --git a/clang-tools-extra/clangd/unittests/HoverTests.cpp b/clang-tools-extra/clangd/unittests/HoverTests.cpp
index e9abf71e6d1b6..f24a3ea015fc2 100644
--- a/clang-tools-extra/clangd/unittests/HoverTests.cpp
+++ b/clang-tools-extra/clangd/unittests/HoverTests.cpp
@@ -4067,8 +4067,8 @@ longer doc)"},
 
 @brief brief doc
 
-longer doc
-@param a this is a param
+longer doc  
+@param a this is a param  
 @return it returns something
 
 ---
@@ -4121,9 +4121,9 @@ longer doc)"},
 
 @brief brief doc
 
-longer doc
-@param a this is a param
-@param b does not exist
+longer doc  
+@param a this is a param  
+@param b does not exist  
 @return it returns something
 
 ---
@@ -4266,19 +4266,19 @@ TEST(Hover, ParseDocumentation) {
                },
                {
                    "foo.\nbar",
-                   "foo.\nbar",
-                   "foo.\nbar",
+                   "foo.  \nbar",
+                   "foo.  \nbar",
                    "foo.\nbar",
                },
                {
                    "foo. \nbar",
-                   "foo. \nbar",
-                   "foo. \nbar",
+                   "foo.   \nbar",
+                   "foo.   \nbar",
                    "foo.\nbar",
                },
                {
                    "foo\n*bar",
-                   "foo\n\\*bar",
+                   "foo  \n\\*bar",
                    "foo\n*bar",
                    "foo\n*bar",
                },
@@ -4305,6 +4305,24 @@ TEST(Hover, ParseDocumentation) {
                    "\\`not\nparsed\\`",
                    "`not\nparsed`",
                    "`not parsed`",
+               },
+               {
+                   R"(@brief this is a typical use case
+@param x this is x
+\param y this is y
+@return something)",
+                   R"(@brief this is a typical use case  
+@param x this is x  
+\\param y this is y  
+@return something)",
+                   R"(@brief this is a typical use case  
+@param x this is x  
+\param y this is y  
+@return something)",
+                   R"(@brief this is a typical use case
+@param x this is x
+\param y this is y
+@return something)",
                }};
 
   for (const auto &C : Cases) {
diff --git a/clang-tools-extra/clangd/unittests/SymbolDocumentationTests.cpp b/clang-tools-extra/clangd/unittests/SymbolDocumentationTests.cpp
index 31a6a149e33c4..7085e01737552 100644
--- a/clang-tools-extra/clangd/unittests/SymbolDocumentationTests.cpp
+++ b/clang-tools-extra/clangd/unittests/SymbolDocumentationTests.cpp
@@ -190,10 +190,10 @@ More description documentation)",
 normal text<i>this is an italic text</i>
 <code>this is a code block</code>)",
           R"(\<b>this is a bold text\</b>
-normal text\<i>this is an italic text\</i>
+normal text\<i>this is an italic text\</i>  
 \<code>this is a code block\</code>)",
           R"(\<b>this is a bold text\</b>
-normal text\<i>this is an italic text\</i>
+normal text\<i>this is an italic text\</i>  
 \<code>this is a code block\</code>)",
           "<b>this is a bold text</b> normal text<i>this is an italic text</i> "
           "<code>this is a code block</code>",
diff --git a/clang-tools-extra/clangd/unittests/support/MarkupTests.cpp b/clang-tools-extra/clangd/unittests/support/MarkupTests.cpp
index 5f91f31557176..af4782c07ae52 100644
--- a/clang-tools-extra/clangd/unittests/support/MarkupTests.cpp
+++ b/clang-tools-extra/clangd/unittests/support/MarkupTests.cpp
@@ -304,9 +304,9 @@ TEST(Paragraph, SeparationOfChunks) {
 
   P.appendSpace().appendCode("code").appendText(".\n  newline");
   EXPECT_EQ(P.asEscapedMarkdown(),
-            "after `foobar` bat`no` `space` text `code`.\n  newline");
+            "after `foobar` bat`no` `space` text `code`.  \n  newline");
   EXPECT_EQ(P.asMarkdown(),
-            "after `foobar` bat`no` `space` text `code`.\n  newline");
+            "after `foobar` bat`no` `space` text `code`.  \n  newline");
   EXPECT_EQ(P.asPlainText(), "after foobar batno space text code.\nnewline");
 }
 
@@ -371,21 +371,117 @@ TEST(Paragraph, SeparationOfChunks3) {
   EXPECT_EQ(P.asPlainText(), "after\nfoobar");
 
   P.appendText("- bat\n");
-  EXPECT_EQ(P.asEscapedMarkdown(), "after  \n  foobar\n\\- bat");
+  EXPECT_EQ(P.asEscapedMarkdown(), "after  \n  foobar  \n\\- bat");
   EXPECT_EQ(P.asMarkdown(), "after  \n  foobar\n- bat");
   EXPECT_EQ(P.asPlainText(), "after\nfoobar\n- bat");
 
   P.appendText("- baz");
-  EXPECT_EQ(P.asEscapedMarkdown(), "after  \n  foobar\n\\- bat\n\\- baz");
+  EXPECT_EQ(P.asEscapedMarkdown(), "after  \n  foobar  \n\\- bat  \n\\- baz");
   EXPECT_EQ(P.asMarkdown(), "after  \n  foobar\n- bat\n- baz");
   EXPECT_EQ(P.asPlainText(), "after\nfoobar\n- bat\n- baz");
 
   P.appendText(" faz ");
-  EXPECT_EQ(P.asEscapedMarkdown(), "after  \n  foobar\n\\- bat\n\\- baz faz");
+  EXPECT_EQ(P.asEscaped...
[truncated]

@llvmbot
Copy link
Member

llvmbot commented Oct 5, 2025

@llvm/pr-subscribers-clang-tools-extra

Author: None (tcottin)

Changes

Fix clangd/clangd#2513

This regression was introduced with #140498.

The issue is that with #140498 the extraction of the documentation comment changed from line based to paragraph based.
This also removed some required line breaks inside paragraphs, which used to be added before the change.

This PR adds the missing line breaks again.


Patch is 24.60 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/162029.diff

5 Files Affected:

  • (modified) clang-tools-extra/clangd/support/Markup.cpp (+119-75)
  • (modified) clang-tools-extra/clangd/support/Markup.h (+78-3)
  • (modified) clang-tools-extra/clangd/unittests/HoverTests.cpp (+28-10)
  • (modified) clang-tools-extra/clangd/unittests/SymbolDocumentationTests.cpp (+2-2)
  • (modified) clang-tools-extra/clangd/unittests/support/MarkupTests.cpp (+101-5)
diff --git a/clang-tools-extra/clangd/support/Markup.cpp b/clang-tools-extra/clangd/support/Markup.cpp
index 89bdc656d440f..e4247974ff9eb 100644
--- a/clang-tools-extra/clangd/support/Markup.cpp
+++ b/clang-tools-extra/clangd/support/Markup.cpp
@@ -450,31 +450,62 @@ std::string Block::asPlainText() const {
   return llvm::StringRef(OS.str()).trim().str();
 }
 
+void Paragraph::renderNewlinesMarkdown(llvm::raw_ostream &OS,
+                                       std::string &ParagraphText) const {
+  llvm::StringRef Line, Rest;
+
+  for (std::tie(Line, Rest) =
+           llvm::StringRef(ParagraphText).ltrim("\n").rtrim().split('\n');
+       !(Line.empty() && Rest.empty());
+       std::tie(Line, Rest) = Rest.split('\n')) {
+
+    if (Line.empty()) {
+      // Blank lines are preserved in markdown.
+      OS << '\n';
+      continue;
+    }
+
+    OS << Line;
+
+    if (!Rest.empty() && isHardLineBreakAfter(Line, Rest, /*IsMarkdown=*/true))
+      // In markdown, 2 spaces before a line break forces a line break.
+      OS << "  ";
+    OS << '\n';
+  }
+}
+
 void Paragraph::renderEscapedMarkdown(llvm::raw_ostream &OS) const {
   bool NeedsSpace = false;
   bool HasChunks = false;
+  std::string ParagraphText;
+  ParagraphText.reserve(EstimatedStringSize);
+  llvm::raw_string_ostream ParagraphTextOS(ParagraphText);
   for (auto &C : Chunks) {
     if (C.SpaceBefore || NeedsSpace)
-      OS << " ";
+      ParagraphTextOS << " ";
     switch (C.Kind) {
     case ChunkKind::PlainText:
-      OS << renderText(C.Contents, !HasChunks, /*EscapeMarkdown=*/true);
+      ParagraphTextOS << renderText(C.Contents, !HasChunks,
+                                    /*EscapeMarkdown=*/true);
       break;
     case ChunkKind::InlineCode:
-      OS << renderInlineBlock(C.Contents);
+      ParagraphTextOS << renderInlineBlock(C.Contents);
       break;
     case ChunkKind::Bold:
-      OS << renderText("**" + C.Contents + "**", !HasChunks,
-                       /*EscapeMarkdown=*/true);
+      ParagraphTextOS << renderText("**" + C.Contents + "**", !HasChunks,
+                                    /*EscapeMarkdown=*/true);
       break;
     case ChunkKind::Emphasized:
-      OS << renderText("*" + C.Contents + "*", !HasChunks,
-                       /*EscapeMarkdown=*/true);
+      ParagraphTextOS << renderText("*" + C.Contents + "*", !HasChunks,
+                                    /*EscapeMarkdown=*/true);
       break;
     }
     HasChunks = true;
     NeedsSpace = C.SpaceAfter;
   }
+
+  renderNewlinesMarkdown(OS, ParagraphText);
+
   // A paragraph in markdown is separated by a blank line.
   OS << "\n\n";
 }
@@ -482,28 +513,39 @@ void Paragraph::renderEscapedMarkdown(llvm::raw_ostream &OS) const {
 void Paragraph::renderMarkdown(llvm::raw_ostream &OS) const {
   bool NeedsSpace = false;
   bool HasChunks = false;
+  std::string ParagraphText;
+  ParagraphText.reserve(EstimatedStringSize);
+  llvm::raw_string_ostream ParagraphTextOS(ParagraphText);
   for (auto &C : Chunks) {
     if (C.SpaceBefore || NeedsSpace)
-      OS << " ";
+      ParagraphTextOS << " ";
     switch (C.Kind) {
     case ChunkKind::PlainText:
-      OS << renderText(C.Contents, !HasChunks, /*EscapeMarkdown=*/false);
+      ParagraphTextOS << renderText(C.Contents, !HasChunks,
+                                    /*EscapeMarkdown=*/false);
       break;
     case ChunkKind::InlineCode:
-      OS << renderInlineBlock(C.Contents);
+      ParagraphTextOS << renderInlineBlock(C.Contents);
       break;
     case ChunkKind::Bold:
-      OS << "**" << renderText(C.Contents, !HasChunks, /*EscapeMarkdown=*/false)
-         << "**";
+      ParagraphTextOS << "**"
+                      << renderText(C.Contents, !HasChunks,
+                                    /*EscapeMarkdown=*/false)
+                      << "**";
       break;
     case ChunkKind::Emphasized:
-      OS << "*" << renderText(C.Contents, !HasChunks, /*EscapeMarkdown=*/false)
-         << "*";
+      ParagraphTextOS << "*"
+                      << renderText(C.Contents, !HasChunks,
+                                    /*EscapeMarkdown=*/false)
+                      << "*";
       break;
     }
     HasChunks = true;
     NeedsSpace = C.SpaceAfter;
   }
+
+  renderNewlinesMarkdown(OS, ParagraphText);
+
   // A paragraph in markdown is separated by a blank line.
   OS << "\n\n";
 }
@@ -512,8 +554,6 @@ std::unique_ptr<Block> Paragraph::clone() const {
   return std::make_unique<Paragraph>(*this);
 }
 
-/// Choose a marker to delimit `Text` from a prioritized list of options.
-/// This is more readable than escaping for plain-text.
 llvm::StringRef Paragraph::chooseMarker(llvm::ArrayRef<llvm::StringRef> Options,
                                         llvm::StringRef Text) const {
   // Prefer a delimiter whose characters don't appear in the text.
@@ -523,23 +563,39 @@ llvm::StringRef Paragraph::chooseMarker(llvm::ArrayRef<llvm::StringRef> Options,
   return Options.front();
 }
 
-bool Paragraph::punctuationIndicatesLineBreak(llvm::StringRef Line) const {
+bool Paragraph::punctuationIndicatesLineBreak(llvm::StringRef Line,
+                                              bool IsMarkdown) const {
   constexpr llvm::StringLiteral Punctuation = R"txt(.:,;!?)txt";
 
+  if (!IsMarkdown && Line.ends_with("  "))
+    return true;
+
   Line = Line.rtrim();
   return !Line.empty() && Punctuation.contains(Line.back());
 }
 
-bool Paragraph::isHardLineBreakIndicator(llvm::StringRef Rest) const {
+bool Paragraph::isHardLineBreakIndicator(llvm::StringRef Rest,
+                                         bool IsMarkdown) const {
+  // Plaintext indicators:
   // '-'/'*' md list, '@'/'\' documentation command, '>' md blockquote,
-  // '#' headings, '`' code blocks, two spaces (markdown force newline)
-  constexpr llvm::StringLiteral LinebreakIndicators = R"txt(-*@\>#`)txt";
+  // '#' headings, '`' code blocks
+  constexpr llvm::StringLiteral LinebreakIndicatorsPlainText =
+      R"txt(-*@\>#`)txt";
+  // Markdown indicators:
+  // Only '@' and '\' documentation commands/escaped markdown syntax.
+  constexpr llvm::StringLiteral LinebreakIndicatorsMarkdown = R"txt(@\)txt";
 
   Rest = Rest.ltrim(" \t");
   if (Rest.empty())
     return false;
 
-  if (LinebreakIndicators.contains(Rest.front()))
+  if (IsMarkdown) {
+    if (LinebreakIndicatorsMarkdown.contains(Rest.front()))
+      return true;
+    return false;
+  }
+
+  if (LinebreakIndicatorsPlainText.contains(Rest.front()))
     return true;
 
   if (llvm::isDigit(Rest.front())) {
@@ -550,64 +606,18 @@ bool Paragraph::isHardLineBreakIndicator(llvm::StringRef Rest) const {
   return false;
 }
 
-bool Paragraph::isHardLineBreakAfter(llvm::StringRef Line,
-                                     llvm::StringRef Rest) const {
-  // In Markdown, 2 spaces before a line break forces a line break.
-  // Add a line break for plaintext in this case too.
+bool Paragraph::isHardLineBreakAfter(llvm::StringRef Line, llvm::StringRef Rest,
+                                     bool IsMarkdown) const {
   // Should we also consider whether Line is short?
-  return Line.ends_with("  ") || punctuationIndicatesLineBreak(Line) ||
-         isHardLineBreakIndicator(Rest);
+  return (punctuationIndicatesLineBreak(Line, IsMarkdown) ||
+          isHardLineBreakIndicator(Rest, IsMarkdown));
 }
 
-void Paragraph::renderPlainText(llvm::raw_ostream &OS) const {
-  bool NeedsSpace = false;
-  std::string ConcatenatedText;
-  ConcatenatedText.reserve(EstimatedStringSize);
-
-  llvm::raw_string_ostream ConcatenatedOS(ConcatenatedText);
-
-  for (auto &C : Chunks) {
-
-    if (C.Kind == ChunkKind::PlainText) {
-      if (C.SpaceBefore || NeedsSpace)
-        ConcatenatedOS << ' ';
-
-      ConcatenatedOS << C.Contents;
-      NeedsSpace = llvm::isSpace(C.Contents.back()) || C.SpaceAfter;
-      continue;
-    }
-
-    if (C.SpaceBefore || NeedsSpace)
-      ConcatenatedOS << ' ';
-    llvm::StringRef Marker = "";
-    if (C.Preserve && C.Kind == ChunkKind::InlineCode)
-      Marker = chooseMarker({"`", "'", "\""}, C.Contents);
-    else if (C.Kind == ChunkKind::Bold)
-      Marker = "**";
-    else if (C.Kind == ChunkKind::Emphasized)
-      Marker = "*";
-    ConcatenatedOS << Marker << C.Contents << Marker;
-    NeedsSpace = C.SpaceAfter;
-  }
-
-  // We go through the contents line by line to handle the newlines
-  // and required spacing correctly.
-  //
-  // Newlines are added if:
-  // - the line ends with 2 spaces and a newline follows
-  // - the line ends with punctuation that indicates a line break (.:,;!?)
-  // - the next line starts with a hard line break indicator (-@>#`, or a digit
-  //   followed by '.' or ')'), ignoring leading whitespace.
-  //
-  // Otherwise, newlines in the input are replaced with a single space.
-  //
-  // Multiple spaces are collapsed into a single space.
-  //
-  // Lines containing only whitespace are ignored.
+void Paragraph::renderNewlinesPlaintext(llvm::raw_ostream &OS,
+                                        std::string &ParagraphText) const {
   llvm::StringRef Line, Rest;
 
-  for (std::tie(Line, Rest) =
-           llvm::StringRef(ConcatenatedText).trim().split('\n');
+  for (std::tie(Line, Rest) = llvm::StringRef(ParagraphText).trim().split('\n');
        !(Line.empty() && Rest.empty());
        std::tie(Line, Rest) = Rest.split('\n')) {
 
@@ -628,7 +638,7 @@ void Paragraph::renderPlainText(llvm::raw_ostream &OS) const {
 
     OS << canonicalizeSpaces(Line);
 
-    if (isHardLineBreakAfter(Line, Rest))
+    if (isHardLineBreakAfter(Line, Rest, /*IsMarkdown=*/false))
       OS << '\n';
     else if (!Rest.empty())
       // Since we removed any trailing whitespace from the input using trim(),
@@ -636,6 +646,40 @@ void Paragraph::renderPlainText(llvm::raw_ostream &OS) const {
       // Therefore, we can add a space without worrying about trailing spaces.
       OS << ' ';
   }
+}
+
+void Paragraph::renderPlainText(llvm::raw_ostream &OS) const {
+  bool NeedsSpace = false;
+  std::string ParagraphText;
+  ParagraphText.reserve(EstimatedStringSize);
+
+  llvm::raw_string_ostream ParagraphTextOS(ParagraphText);
+
+  for (auto &C : Chunks) {
+
+    if (C.Kind == ChunkKind::PlainText) {
+      if (C.SpaceBefore || NeedsSpace)
+        ParagraphTextOS << ' ';
+
+      ParagraphTextOS << C.Contents;
+      NeedsSpace = llvm::isSpace(C.Contents.back()) || C.SpaceAfter;
+      continue;
+    }
+
+    if (C.SpaceBefore || NeedsSpace)
+      ParagraphTextOS << ' ';
+    llvm::StringRef Marker = "";
+    if (C.Preserve && C.Kind == ChunkKind::InlineCode)
+      Marker = chooseMarker({"`", "'", "\""}, C.Contents);
+    else if (C.Kind == ChunkKind::Bold)
+      Marker = "**";
+    else if (C.Kind == ChunkKind::Emphasized)
+      Marker = "*";
+    ParagraphTextOS << Marker << C.Contents << Marker;
+    NeedsSpace = C.SpaceAfter;
+  }
+
+  renderNewlinesPlaintext(OS, ParagraphText);
 
   // Paragraphs are separated by a blank line.
   OS << "\n\n";
diff --git a/clang-tools-extra/clangd/support/Markup.h b/clang-tools-extra/clangd/support/Markup.h
index eea6328f69a12..2400f723c0dc5 100644
--- a/clang-tools-extra/clangd/support/Markup.h
+++ b/clang-tools-extra/clangd/support/Markup.h
@@ -92,9 +92,84 @@ class Paragraph : public Block {
 
   llvm::StringRef chooseMarker(llvm::ArrayRef<llvm::StringRef> Options,
                                llvm::StringRef Text) const;
-  bool punctuationIndicatesLineBreak(llvm::StringRef Line) const;
-  bool isHardLineBreakIndicator(llvm::StringRef Rest) const;
-  bool isHardLineBreakAfter(llvm::StringRef Line, llvm::StringRef Rest) const;
+
+  /// Checks if the given line ends with punctuation that indicates a line break
+  /// (.:,;!?).
+  ///
+  /// If \p IsMarkdown is false, lines ending with 2 spaces are also considered
+  /// as indicating a line break. This is not needed for markdown because the
+  /// client renderer will handle this case.
+  bool punctuationIndicatesLineBreak(llvm::StringRef Line,
+                                     bool IsMarkdown) const;
+
+  /// Checks if the given line starts with a hard line break indicator.
+  ///
+  /// If \p IsMarkdown is true, only '@' and '\' are considered as indicators.
+  /// Otherwise, '-', '*', '@', '\', '>', '#', '`' and a digit followed by '.'
+  /// or ')' are also considered as indicators.
+  bool isHardLineBreakIndicator(llvm::StringRef Rest, bool IsMarkdown) const;
+
+  /// Checks if a hard line break should be added after the given line.
+  bool isHardLineBreakAfter(llvm::StringRef Line, llvm::StringRef Rest,
+                            bool IsMarkdown) const;
+
+  /// \brief Go through the contents line by line to handle the newlines
+  /// and required spacing correctly for markdown rendering.
+  ///
+  /// Newlines are added if:
+  /// - the line ends with punctuation that indicates a line break (.:,;!?)
+  /// - the next line starts with a hard line break indicator \\ (escaped
+  /// markdown/doxygen command) or @ (doxygen command)
+  ///
+  /// This newline handling is only used when the client requests markdown
+  /// for hover/signature help content.
+  /// Markdown does not add any newlines inside paragraphs unless the user
+  /// explicitly adds them. For hover/signature help content, we still want to
+  /// add newlines in some cases to improve readability, especially when doxygen
+  /// parsing is disabled or not implemented (like for signature help).
+  /// Therefore we add newlines in the above mentioned cases.
+  ///
+  /// In addition to that, we need to consider that the user can configure
+  /// clangd to treat documentation comments as plain text, while the client
+  /// requests markdown.
+  /// In this case, all markdown syntax is escaped and will
+  /// not be rendered as expected by markdown.
+  /// Examples are lists starting with '-' or headings starting with '#'.
+  /// With the above next line heuristics, these cases are also covered by the
+  /// '\\' new line indicator.
+  ///
+  /// FIXME: The heuristic fails e.g. for lists starting with '*' because it is
+  /// also used for emphasis in markdown and should not be treated as a newline.
+  ///
+  /// \param OS The stream to render to.
+  /// \param ParagraphText The text of the paragraph to render.
+  void renderNewlinesMarkdown(llvm::raw_ostream &OS,
+                              std::string &ParagraphText) const;
+
+  /// \brief Go through the contents line by line to handle the newlines
+  /// and required spacing correctly for plain text rendering.
+  ///
+  /// Newlines are added if:
+  /// - the line ends with 2 spaces and a newline follows
+  /// - the line ends with punctuation that indicates a line break (.:,;!?)
+  /// - the next line starts with a hard line break indicator (-@>#`\\ or a
+  ///   digit followed by '.' or ')'), ignoring leading whitespace.
+  ///
+  /// Otherwise, newlines in the input are replaced with a single space.
+  ///
+  /// Multiple spaces are collapsed into a single space.
+  ///
+  /// Lines containing only whitespace are ignored.
+  ///
+  /// This newline handling is only used when the client requests plain
+  /// text for hover/signature help content.
+  /// Therefore with this approach we mimic the behavior of markdown rendering
+  /// for these clients.
+  ///
+  /// \param OS The stream to render to.
+  /// \param ParagraphText The text of the paragraph to render.
+  void renderNewlinesPlaintext(llvm::raw_ostream &OS,
+                               std::string &ParagraphText) const;
 };
 
 /// Represents a sequence of one or more documents. Knows how to print them in a
diff --git a/clang-tools-extra/clangd/unittests/HoverTests.cpp b/clang-tools-extra/clangd/unittests/HoverTests.cpp
index e9abf71e6d1b6..f24a3ea015fc2 100644
--- a/clang-tools-extra/clangd/unittests/HoverTests.cpp
+++ b/clang-tools-extra/clangd/unittests/HoverTests.cpp
@@ -4067,8 +4067,8 @@ longer doc)"},
 
 @brief brief doc
 
-longer doc
-@param a this is a param
+longer doc  
+@param a this is a param  
 @return it returns something
 
 ---
@@ -4121,9 +4121,9 @@ longer doc)"},
 
 @brief brief doc
 
-longer doc
-@param a this is a param
-@param b does not exist
+longer doc  
+@param a this is a param  
+@param b does not exist  
 @return it returns something
 
 ---
@@ -4266,19 +4266,19 @@ TEST(Hover, ParseDocumentation) {
                },
                {
                    "foo.\nbar",
-                   "foo.\nbar",
-                   "foo.\nbar",
+                   "foo.  \nbar",
+                   "foo.  \nbar",
                    "foo.\nbar",
                },
                {
                    "foo. \nbar",
-                   "foo. \nbar",
-                   "foo. \nbar",
+                   "foo.   \nbar",
+                   "foo.   \nbar",
                    "foo.\nbar",
                },
                {
                    "foo\n*bar",
-                   "foo\n\\*bar",
+                   "foo  \n\\*bar",
                    "foo\n*bar",
                    "foo\n*bar",
                },
@@ -4305,6 +4305,24 @@ TEST(Hover, ParseDocumentation) {
                    "\\`not\nparsed\\`",
                    "`not\nparsed`",
                    "`not parsed`",
+               },
+               {
+                   R"(@brief this is a typical use case
+@param x this is x
+\param y this is y
+@return something)",
+                   R"(@brief this is a typical use case  
+@param x this is x  
+\\param y this is y  
+@return something)",
+                   R"(@brief this is a typical use case  
+@param x this is x  
+\param y this is y  
+@return something)",
+                   R"(@brief this is a typical use case
+@param x this is x
+\param y this is y
+@return something)",
                }};
 
   for (const auto &C : Cases) {
diff --git a/clang-tools-extra/clangd/unittests/SymbolDocumentationTests.cpp b/clang-tools-extra/clangd/unittests/SymbolDocumentationTests.cpp
index 31a6a149e33c4..7085e01737552 100644
--- a/clang-tools-extra/clangd/unittests/SymbolDocumentationTests.cpp
+++ b/clang-tools-extra/clangd/unittests/SymbolDocumentationTests.cpp
@@ -190,10 +190,10 @@ More description documentation)",
 normal text<i>this is an italic text</i>
 <code>this is a code block</code>)",
           R"(\<b>this is a bold text\</b>
-normal text\<i>this is an italic text\</i>
+normal text\<i>this is an italic text\</i>  
 \<code>this is a code block\</code>)",
           R"(\<b>this is a bold text\</b>
-normal text\<i>this is an italic text\</i>
+normal text\<i>this is an italic text\</i>  
 \<code>this is a code block\</code>)",
           "<b>this is a bold text</b> normal text<i>this is an italic text</i> "
           "<code>this is a code block</code>",
diff --git a/clang-tools-extra/clangd/unittests/support/MarkupTests.cpp b/clang-tools-extra/clangd/unittests/support/MarkupTests.cpp
index 5f91f31557176..af4782c07ae52 100644
--- a/clang-tools-extra/clangd/unittests/support/MarkupTests.cpp
+++ b/clang-tools-extra/clangd/unittests/support/MarkupTests.cpp
@@ -304,9 +304,9 @@ TEST(Paragraph, SeparationOfChunks) {
 
   P.appendSpace().appendCode("code").appendText(".\n  newline");
   EXPECT_EQ(P.asEscapedMarkdown(),
-            "after `foobar` bat`no` `space` text `code`.\n  newline");
+            "after `foobar` bat`no` `space` text `code`.  \n  newline");
   EXPECT_EQ(P.asMarkdown(),
-            "after `foobar` bat`no` `space` text `code`.\n  newline");
+            "after `foobar` bat`no` `space` text `code`.  \n  newline");
   EXPECT_EQ(P.asPlainText(), "after foobar batno space text code.\nnewline");
 }
 
@@ -371,21 +371,117 @@ TEST(Paragraph, SeparationOfChunks3) {
   EXPECT_EQ(P.asPlainText(), "after\nfoobar");
 
   P.appendText("- bat\n");
-  EXPECT_EQ(P.asEscapedMarkdown(), "after  \n  foobar\n\\- bat");
+  EXPECT_EQ(P.asEscapedMarkdown(), "after  \n  foobar  \n\\- bat");
   EXPECT_EQ(P.asMarkdown(), "after  \n  foobar\n- bat");
   EXPECT_EQ(P.asPlainText(), "after\nfoobar\n- bat");
 
   P.appendText("- baz");
-  EXPECT_EQ(P.asEscapedMarkdown(), "after  \n  foobar\n\\- bat\n\\- baz");
+  EXPECT_EQ(P.asEscapedMarkdown(), "after  \n  foobar  \n\\- bat  \n\\- baz");
   EXPECT_EQ(P.asMarkdown(), "after  \n  foobar\n- bat\n- baz");
   EXPECT_EQ(P.asPlainText(), "after\nfoobar\n- bat\n- baz");
 
   P.appendText(" faz ");
-  EXPECT_EQ(P.asEscapedMarkdown(), "after  \n  foobar\n\\- bat\n\\- baz faz");
+  EXPECT_EQ(P.asEscaped...
[truncated]

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

Signature information missing line breaks in VSCode
2 participants