Skip to content

Commit 4678f16

Browse files
authored
[clangd] Fix regression regarding new line handling for hover/signature help content (#162029)
Fix clangd/clangd#2513 This regression was introduced with #140498. The issue is that with #140498 the extraction of the documentation comment changed from line based to paragraph based. This also removed some required line breaks inside paragraphs, which used to be added before the change. This PR adds the missing line breaks again.
1 parent 1d7d26c commit 4678f16

File tree

5 files changed

+330
-101
lines changed

5 files changed

+330
-101
lines changed

clang-tools-extra/clangd/support/Markup.cpp

Lines changed: 115 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -475,60 +475,101 @@ std::string Block::asPlainText() const {
475475
return llvm::StringRef(OS.str()).trim().str();
476476
}
477477

478+
void Paragraph::renderNewlinesMarkdown(llvm::raw_ostream &OS,
479+
llvm::StringRef ParagraphText) const {
480+
llvm::StringRef Line, Rest;
481+
482+
for (std::tie(Line, Rest) = ParagraphText.ltrim("\n").rtrim().split('\n');
483+
!(Line.empty() && Rest.empty());
484+
std::tie(Line, Rest) = Rest.split('\n')) {
485+
486+
if (Line.empty()) {
487+
// Blank lines are preserved in markdown.
488+
OS << '\n';
489+
continue;
490+
}
491+
492+
OS << Line;
493+
494+
if (!Rest.empty() && isHardLineBreakAfter(Line, Rest, /*IsMarkdown=*/true))
495+
// In markdown, 2 spaces before a line break forces a line break.
496+
OS << " ";
497+
OS << '\n';
498+
}
499+
}
500+
478501
void Paragraph::renderEscapedMarkdown(llvm::raw_ostream &OS) const {
479502
bool NeedsSpace = false;
480503
bool HasChunks = false;
504+
std::string ParagraphText;
505+
ParagraphText.reserve(EstimatedStringSize);
506+
llvm::raw_string_ostream ParagraphTextOS(ParagraphText);
481507
for (auto &C : Chunks) {
482508
if (C.SpaceBefore || NeedsSpace)
483-
OS << " ";
509+
ParagraphTextOS << " ";
484510
switch (C.Kind) {
485511
case ChunkKind::PlainText:
486-
OS << renderText(C.Contents, !HasChunks, /*EscapeMarkdown=*/true);
512+
ParagraphTextOS << renderText(C.Contents, !HasChunks,
513+
/*EscapeMarkdown=*/true);
487514
break;
488515
case ChunkKind::InlineCode:
489-
OS << renderInlineBlock(C.Contents);
516+
ParagraphTextOS << renderInlineBlock(C.Contents);
490517
break;
491518
case ChunkKind::Bold:
492-
OS << renderText("**" + C.Contents + "**", !HasChunks,
493-
/*EscapeMarkdown=*/true);
519+
ParagraphTextOS << renderText("**" + C.Contents + "**", !HasChunks,
520+
/*EscapeMarkdown=*/true);
494521
break;
495522
case ChunkKind::Emphasized:
496-
OS << renderText("*" + C.Contents + "*", !HasChunks,
497-
/*EscapeMarkdown=*/true);
523+
ParagraphTextOS << renderText("*" + C.Contents + "*", !HasChunks,
524+
/*EscapeMarkdown=*/true);
498525
break;
499526
}
500527
HasChunks = true;
501528
NeedsSpace = C.SpaceAfter;
502529
}
530+
531+
renderNewlinesMarkdown(OS, ParagraphText);
532+
503533
// A paragraph in markdown is separated by a blank line.
504534
OS << "\n\n";
505535
}
506536

507537
void Paragraph::renderMarkdown(llvm::raw_ostream &OS) const {
508538
bool NeedsSpace = false;
509539
bool HasChunks = false;
540+
std::string ParagraphText;
541+
ParagraphText.reserve(EstimatedStringSize);
542+
llvm::raw_string_ostream ParagraphTextOS(ParagraphText);
510543
for (auto &C : Chunks) {
511544
if (C.SpaceBefore || NeedsSpace)
512-
OS << " ";
545+
ParagraphTextOS << " ";
513546
switch (C.Kind) {
514547
case ChunkKind::PlainText:
515-
OS << renderText(C.Contents, !HasChunks, /*EscapeMarkdown=*/false);
548+
ParagraphTextOS << renderText(C.Contents, !HasChunks,
549+
/*EscapeMarkdown=*/false);
516550
break;
517551
case ChunkKind::InlineCode:
518-
OS << renderInlineBlock(C.Contents);
552+
ParagraphTextOS << renderInlineBlock(C.Contents);
519553
break;
520554
case ChunkKind::Bold:
521-
OS << "**" << renderText(C.Contents, !HasChunks, /*EscapeMarkdown=*/false)
522-
<< "**";
555+
ParagraphTextOS << "**"
556+
<< renderText(C.Contents, !HasChunks,
557+
/*EscapeMarkdown=*/false)
558+
<< "**";
523559
break;
524560
case ChunkKind::Emphasized:
525-
OS << "*" << renderText(C.Contents, !HasChunks, /*EscapeMarkdown=*/false)
526-
<< "*";
561+
ParagraphTextOS << "*"
562+
<< renderText(C.Contents, !HasChunks,
563+
/*EscapeMarkdown=*/false)
564+
<< "*";
527565
break;
528566
}
529567
HasChunks = true;
530568
NeedsSpace = C.SpaceAfter;
531569
}
570+
571+
renderNewlinesMarkdown(OS, ParagraphText);
572+
532573
// A paragraph in markdown is separated by a blank line.
533574
OS << "\n\n";
534575
}
@@ -537,8 +578,6 @@ std::unique_ptr<Block> Paragraph::clone() const {
537578
return std::make_unique<Paragraph>(*this);
538579
}
539580

540-
/// Choose a marker to delimit `Text` from a prioritized list of options.
541-
/// This is more readable than escaping for plain-text.
542581
llvm::StringRef Paragraph::chooseMarker(llvm::ArrayRef<llvm::StringRef> Options,
543582
llvm::StringRef Text) const {
544583
// Prefer a delimiter whose characters don't appear in the text.
@@ -548,23 +587,36 @@ llvm::StringRef Paragraph::chooseMarker(llvm::ArrayRef<llvm::StringRef> Options,
548587
return Options.front();
549588
}
550589

551-
bool Paragraph::punctuationIndicatesLineBreak(llvm::StringRef Line) const {
590+
bool Paragraph::punctuationIndicatesLineBreak(llvm::StringRef Line,
591+
bool IsMarkdown) const {
552592
constexpr llvm::StringLiteral Punctuation = R"txt(.:,;!?)txt";
553593

594+
if (!IsMarkdown && Line.ends_with(" "))
595+
return true;
596+
554597
Line = Line.rtrim();
555598
return !Line.empty() && Punctuation.contains(Line.back());
556599
}
557600

558-
bool Paragraph::isHardLineBreakIndicator(llvm::StringRef Rest) const {
601+
bool Paragraph::isHardLineBreakIndicator(llvm::StringRef Rest,
602+
bool IsMarkdown) const {
603+
// Plaintext indicators:
559604
// '-'/'*' md list, '@'/'\' documentation command, '>' md blockquote,
560-
// '#' headings, '`' code blocks, two spaces (markdown force newline)
561-
constexpr llvm::StringLiteral LinebreakIndicators = R"txt(-*@\>#`)txt";
605+
// '#' headings, '`' code blocks
606+
constexpr llvm::StringLiteral LinebreakIndicatorsPlainText =
607+
R"txt(-*@\>#`)txt";
608+
// Markdown indicators:
609+
// Only '@' and '\' documentation commands/escaped markdown syntax.
610+
constexpr llvm::StringLiteral LinebreakIndicatorsMarkdown = R"txt(@\)txt";
562611

563612
Rest = Rest.ltrim(" \t");
564613
if (Rest.empty())
565614
return false;
566615

567-
if (LinebreakIndicators.contains(Rest.front()))
616+
if (IsMarkdown)
617+
return LinebreakIndicatorsMarkdown.contains(Rest.front());
618+
619+
if (LinebreakIndicatorsPlainText.contains(Rest.front()))
568620
return true;
569621

570622
if (llvm::isDigit(Rest.front())) {
@@ -575,64 +627,18 @@ bool Paragraph::isHardLineBreakIndicator(llvm::StringRef Rest) const {
575627
return false;
576628
}
577629

578-
bool Paragraph::isHardLineBreakAfter(llvm::StringRef Line,
579-
llvm::StringRef Rest) const {
580-
// In Markdown, 2 spaces before a line break forces a line break.
581-
// Add a line break for plaintext in this case too.
630+
bool Paragraph::isHardLineBreakAfter(llvm::StringRef Line, llvm::StringRef Rest,
631+
bool IsMarkdown) const {
582632
// Should we also consider whether Line is short?
583-
return Line.ends_with(" ") || punctuationIndicatesLineBreak(Line) ||
584-
isHardLineBreakIndicator(Rest);
633+
return punctuationIndicatesLineBreak(Line, IsMarkdown) ||
634+
isHardLineBreakIndicator(Rest, IsMarkdown);
585635
}
586636

587-
void Paragraph::renderPlainText(llvm::raw_ostream &OS) const {
588-
bool NeedsSpace = false;
589-
std::string ConcatenatedText;
590-
ConcatenatedText.reserve(EstimatedStringSize);
591-
592-
llvm::raw_string_ostream ConcatenatedOS(ConcatenatedText);
593-
594-
for (auto &C : Chunks) {
595-
596-
if (C.Kind == ChunkKind::PlainText) {
597-
if (C.SpaceBefore || NeedsSpace)
598-
ConcatenatedOS << ' ';
599-
600-
ConcatenatedOS << C.Contents;
601-
NeedsSpace = llvm::isSpace(C.Contents.back()) || C.SpaceAfter;
602-
continue;
603-
}
604-
605-
if (C.SpaceBefore || NeedsSpace)
606-
ConcatenatedOS << ' ';
607-
llvm::StringRef Marker = "";
608-
if (C.Preserve && C.Kind == ChunkKind::InlineCode)
609-
Marker = chooseMarker({"`", "'", "\""}, C.Contents);
610-
else if (C.Kind == ChunkKind::Bold)
611-
Marker = "**";
612-
else if (C.Kind == ChunkKind::Emphasized)
613-
Marker = "*";
614-
ConcatenatedOS << Marker << C.Contents << Marker;
615-
NeedsSpace = C.SpaceAfter;
616-
}
617-
618-
// We go through the contents line by line to handle the newlines
619-
// and required spacing correctly.
620-
//
621-
// Newlines are added if:
622-
// - the line ends with 2 spaces and a newline follows
623-
// - the line ends with punctuation that indicates a line break (.:,;!?)
624-
// - the next line starts with a hard line break indicator (-@>#`, or a digit
625-
// followed by '.' or ')'), ignoring leading whitespace.
626-
//
627-
// Otherwise, newlines in the input are replaced with a single space.
628-
//
629-
// Multiple spaces are collapsed into a single space.
630-
//
631-
// Lines containing only whitespace are ignored.
637+
void Paragraph::renderNewlinesPlaintext(llvm::raw_ostream &OS,
638+
llvm::StringRef ParagraphText) const {
632639
llvm::StringRef Line, Rest;
633640

634-
for (std::tie(Line, Rest) =
635-
llvm::StringRef(ConcatenatedText).trim().split('\n');
641+
for (std::tie(Line, Rest) = ParagraphText.trim().split('\n');
636642
!(Line.empty() && Rest.empty());
637643
std::tie(Line, Rest) = Rest.split('\n')) {
638644

@@ -653,14 +659,48 @@ void Paragraph::renderPlainText(llvm::raw_ostream &OS) const {
653659

654660
OS << canonicalizeSpaces(Line);
655661

656-
if (isHardLineBreakAfter(Line, Rest))
662+
if (isHardLineBreakAfter(Line, Rest, /*IsMarkdown=*/false))
657663
OS << '\n';
658664
else if (!Rest.empty())
659665
// Since we removed any trailing whitespace from the input using trim(),
660666
// we know that the next line contains non-whitespace characters.
661667
// Therefore, we can add a space without worrying about trailing spaces.
662668
OS << ' ';
663669
}
670+
}
671+
672+
void Paragraph::renderPlainText(llvm::raw_ostream &OS) const {
673+
bool NeedsSpace = false;
674+
std::string ParagraphText;
675+
ParagraphText.reserve(EstimatedStringSize);
676+
677+
llvm::raw_string_ostream ParagraphTextOS(ParagraphText);
678+
679+
for (auto &C : Chunks) {
680+
681+
if (C.Kind == ChunkKind::PlainText) {
682+
if (C.SpaceBefore || NeedsSpace)
683+
ParagraphTextOS << ' ';
684+
685+
ParagraphTextOS << C.Contents;
686+
NeedsSpace = llvm::isSpace(C.Contents.back()) || C.SpaceAfter;
687+
continue;
688+
}
689+
690+
if (C.SpaceBefore || NeedsSpace)
691+
ParagraphTextOS << ' ';
692+
llvm::StringRef Marker = "";
693+
if (C.Preserve && C.Kind == ChunkKind::InlineCode)
694+
Marker = chooseMarker({"`", "'", "\""}, C.Contents);
695+
else if (C.Kind == ChunkKind::Bold)
696+
Marker = "**";
697+
else if (C.Kind == ChunkKind::Emphasized)
698+
Marker = "*";
699+
ParagraphTextOS << Marker << C.Contents << Marker;
700+
NeedsSpace = C.SpaceAfter;
701+
}
702+
703+
renderNewlinesPlaintext(OS, ParagraphText);
664704

665705
// Paragraphs are separated by a blank line.
666706
OS << "\n\n";

clang-tools-extra/clangd/support/Markup.h

Lines changed: 78 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -92,9 +92,84 @@ class Paragraph : public Block {
9292

9393
llvm::StringRef chooseMarker(llvm::ArrayRef<llvm::StringRef> Options,
9494
llvm::StringRef Text) const;
95-
bool punctuationIndicatesLineBreak(llvm::StringRef Line) const;
96-
bool isHardLineBreakIndicator(llvm::StringRef Rest) const;
97-
bool isHardLineBreakAfter(llvm::StringRef Line, llvm::StringRef Rest) const;
95+
96+
/// Checks if the given line ends with punctuation that indicates a line break
97+
/// (.:,;!?).
98+
///
99+
/// If \p IsMarkdown is false, lines ending with 2 spaces are also considered
100+
/// as indicating a line break. This is not needed for markdown because the
101+
/// client renderer will handle this case.
102+
bool punctuationIndicatesLineBreak(llvm::StringRef Line,
103+
bool IsMarkdown) const;
104+
105+
/// Checks if the given line starts with a hard line break indicator.
106+
///
107+
/// If \p IsMarkdown is true, only '@' and '\' are considered as indicators.
108+
/// Otherwise, '-', '*', '@', '\', '>', '#', '`' and a digit followed by '.'
109+
/// or ')' are also considered as indicators.
110+
bool isHardLineBreakIndicator(llvm::StringRef Rest, bool IsMarkdown) const;
111+
112+
/// Checks if a hard line break should be added after the given line.
113+
bool isHardLineBreakAfter(llvm::StringRef Line, llvm::StringRef Rest,
114+
bool IsMarkdown) const;
115+
116+
/// \brief Go through the contents line by line to handle the newlines
117+
/// and required spacing correctly for markdown rendering.
118+
///
119+
/// Newlines are added if:
120+
/// - the line ends with punctuation that indicates a line break (.:,;!?)
121+
/// - the next line starts with a hard line break indicator \\ (escaped
122+
/// markdown/doxygen command) or @ (doxygen command)
123+
///
124+
/// This newline handling is only used when the client requests markdown
125+
/// for hover/signature help content.
126+
/// Markdown does not add any newlines inside paragraphs unless the user
127+
/// explicitly adds them. For hover/signature help content, we still want to
128+
/// add newlines in some cases to improve readability, especially when doxygen
129+
/// parsing is disabled or not implemented (like for signature help).
130+
/// Therefore we add newlines in the above mentioned cases.
131+
///
132+
/// In addition to that, we need to consider that the user can configure
133+
/// clangd to treat documentation comments as plain text, while the client
134+
/// requests markdown.
135+
/// In this case, all markdown syntax is escaped and will
136+
/// not be rendered as expected by markdown.
137+
/// Examples are lists starting with '-' or headings starting with '#'.
138+
/// With the above next line heuristics, these cases are also covered by the
139+
/// '\\' new line indicator.
140+
///
141+
/// FIXME: The heuristic fails e.g. for lists starting with '*' because it is
142+
/// also used for emphasis in markdown and should not be treated as a newline.
143+
///
144+
/// \param OS The stream to render to.
145+
/// \param ParagraphText The text of the paragraph to render.
146+
void renderNewlinesMarkdown(llvm::raw_ostream &OS,
147+
llvm::StringRef ParagraphText) const;
148+
149+
/// \brief Go through the contents line by line to handle the newlines
150+
/// and required spacing correctly for plain text rendering.
151+
///
152+
/// Newlines are added if:
153+
/// - the line ends with 2 spaces and a newline follows
154+
/// - the line ends with punctuation that indicates a line break (.:,;!?)
155+
/// - the next line starts with a hard line break indicator (-@>#`\\ or a
156+
/// digit followed by '.' or ')'), ignoring leading whitespace.
157+
///
158+
/// Otherwise, newlines in the input are replaced with a single space.
159+
///
160+
/// Multiple spaces are collapsed into a single space.
161+
///
162+
/// Lines containing only whitespace are ignored.
163+
///
164+
/// This newline handling is only used when the client requests plain
165+
/// text for hover/signature help content.
166+
/// Therefore with this approach we mimic the behavior of markdown rendering
167+
/// for these clients.
168+
///
169+
/// \param OS The stream to render to.
170+
/// \param ParagraphText The text of the paragraph to render.
171+
void renderNewlinesPlaintext(llvm::raw_ostream &OS,
172+
llvm::StringRef ParagraphText) const;
98173
};
99174

100175
/// Represents a sequence of one or more documents. Knows how to print them in a

0 commit comments

Comments
 (0)