Skip to content

Commit b1b2251

Browse files
committed
handle new lines in paragraphs correctly for markdown and escaped markdown
1 parent c54d0d7 commit b1b2251

File tree

5 files changed

+328
-95
lines changed

5 files changed

+328
-95
lines changed

clang-tools-extra/clangd/support/Markup.cpp

Lines changed: 119 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -450,60 +450,102 @@ std::string Block::asPlainText() const {
450450
return llvm::StringRef(OS.str()).trim().str();
451451
}
452452

453+
void Paragraph::renderNewlinesMarkdown(llvm::raw_ostream &OS,
454+
std::string &ParagraphText) const {
455+
llvm::StringRef Line, Rest;
456+
457+
for (std::tie(Line, Rest) =
458+
llvm::StringRef(ParagraphText).ltrim("\n").rtrim().split('\n');
459+
!(Line.empty() && Rest.empty());
460+
std::tie(Line, Rest) = Rest.split('\n')) {
461+
462+
if (Line.empty()) {
463+
// Blank lines are preserved in markdown.
464+
OS << '\n';
465+
continue;
466+
}
467+
468+
OS << Line;
469+
470+
if (!Rest.empty() && isHardLineBreakAfter(Line, Rest, /*IsMarkdown=*/true))
471+
// In markdown, 2 spaces before a line break forces a line break.
472+
OS << " ";
473+
OS << '\n';
474+
}
475+
}
476+
453477
void Paragraph::renderEscapedMarkdown(llvm::raw_ostream &OS) const {
454478
bool NeedsSpace = false;
455479
bool HasChunks = false;
480+
std::string ParagraphText;
481+
ParagraphText.reserve(EstimatedStringSize);
482+
llvm::raw_string_ostream ParagraphTextOS(ParagraphText);
456483
for (auto &C : Chunks) {
457484
if (C.SpaceBefore || NeedsSpace)
458-
OS << " ";
485+
ParagraphTextOS << " ";
459486
switch (C.Kind) {
460487
case ChunkKind::PlainText:
461-
OS << renderText(C.Contents, !HasChunks, /*EscapeMarkdown=*/true);
488+
ParagraphTextOS << renderText(C.Contents, !HasChunks,
489+
/*EscapeMarkdown=*/true);
462490
break;
463491
case ChunkKind::InlineCode:
464-
OS << renderInlineBlock(C.Contents);
492+
ParagraphTextOS << renderInlineBlock(C.Contents);
465493
break;
466494
case ChunkKind::Bold:
467-
OS << renderText("**" + C.Contents + "**", !HasChunks,
468-
/*EscapeMarkdown=*/true);
495+
ParagraphTextOS << renderText("**" + C.Contents + "**", !HasChunks,
496+
/*EscapeMarkdown=*/true);
469497
break;
470498
case ChunkKind::Emphasized:
471-
OS << renderText("*" + C.Contents + "*", !HasChunks,
472-
/*EscapeMarkdown=*/true);
499+
ParagraphTextOS << renderText("*" + C.Contents + "*", !HasChunks,
500+
/*EscapeMarkdown=*/true);
473501
break;
474502
}
475503
HasChunks = true;
476504
NeedsSpace = C.SpaceAfter;
477505
}
506+
507+
renderNewlinesMarkdown(OS, ParagraphText);
508+
478509
// A paragraph in markdown is separated by a blank line.
479510
OS << "\n\n";
480511
}
481512

482513
void Paragraph::renderMarkdown(llvm::raw_ostream &OS) const {
483514
bool NeedsSpace = false;
484515
bool HasChunks = false;
516+
std::string ParagraphText;
517+
ParagraphText.reserve(EstimatedStringSize);
518+
llvm::raw_string_ostream ParagraphTextOS(ParagraphText);
485519
for (auto &C : Chunks) {
486520
if (C.SpaceBefore || NeedsSpace)
487-
OS << " ";
521+
ParagraphTextOS << " ";
488522
switch (C.Kind) {
489523
case ChunkKind::PlainText:
490-
OS << renderText(C.Contents, !HasChunks, /*EscapeMarkdown=*/false);
524+
ParagraphTextOS << renderText(C.Contents, !HasChunks,
525+
/*EscapeMarkdown=*/false);
491526
break;
492527
case ChunkKind::InlineCode:
493-
OS << renderInlineBlock(C.Contents);
528+
ParagraphTextOS << renderInlineBlock(C.Contents);
494529
break;
495530
case ChunkKind::Bold:
496-
OS << "**" << renderText(C.Contents, !HasChunks, /*EscapeMarkdown=*/false)
497-
<< "**";
531+
ParagraphTextOS << "**"
532+
<< renderText(C.Contents, !HasChunks,
533+
/*EscapeMarkdown=*/false)
534+
<< "**";
498535
break;
499536
case ChunkKind::Emphasized:
500-
OS << "*" << renderText(C.Contents, !HasChunks, /*EscapeMarkdown=*/false)
501-
<< "*";
537+
ParagraphTextOS << "*"
538+
<< renderText(C.Contents, !HasChunks,
539+
/*EscapeMarkdown=*/false)
540+
<< "*";
502541
break;
503542
}
504543
HasChunks = true;
505544
NeedsSpace = C.SpaceAfter;
506545
}
546+
547+
renderNewlinesMarkdown(OS, ParagraphText);
548+
507549
// A paragraph in markdown is separated by a blank line.
508550
OS << "\n\n";
509551
}
@@ -512,8 +554,6 @@ std::unique_ptr<Block> Paragraph::clone() const {
512554
return std::make_unique<Paragraph>(*this);
513555
}
514556

515-
/// Choose a marker to delimit `Text` from a prioritized list of options.
516-
/// This is more readable than escaping for plain-text.
517557
llvm::StringRef Paragraph::chooseMarker(llvm::ArrayRef<llvm::StringRef> Options,
518558
llvm::StringRef Text) const {
519559
// Prefer a delimiter whose characters don't appear in the text.
@@ -523,23 +563,39 @@ llvm::StringRef Paragraph::chooseMarker(llvm::ArrayRef<llvm::StringRef> Options,
523563
return Options.front();
524564
}
525565

526-
bool Paragraph::punctuationIndicatesLineBreak(llvm::StringRef Line) const {
566+
bool Paragraph::punctuationIndicatesLineBreak(llvm::StringRef Line,
567+
bool IsMarkdown) const {
527568
constexpr llvm::StringLiteral Punctuation = R"txt(.:,;!?)txt";
528569

570+
if (!IsMarkdown && Line.ends_with(" "))
571+
return true;
572+
529573
Line = Line.rtrim();
530574
return !Line.empty() && Punctuation.contains(Line.back());
531575
}
532576

533-
bool Paragraph::isHardLineBreakIndicator(llvm::StringRef Rest) const {
577+
bool Paragraph::isHardLineBreakIndicator(llvm::StringRef Rest,
578+
bool IsMarkdown) const {
579+
// Plaintext indicators:
534580
// '-'/'*' md list, '@'/'\' documentation command, '>' md blockquote,
535-
// '#' headings, '`' code blocks, two spaces (markdown force newline)
536-
constexpr llvm::StringLiteral LinebreakIndicators = R"txt(-*@\>#`)txt";
581+
// '#' headings, '`' code blocks
582+
constexpr llvm::StringLiteral LinebreakIndicatorsPlainText =
583+
R"txt(-*@\>#`)txt";
584+
// Markdown indicators:
585+
// Only '@' and '\' documentation commands/escaped markdown syntax.
586+
constexpr llvm::StringLiteral LinebreakIndicatorsMarkdown = R"txt(@\)txt";
537587

538588
Rest = Rest.ltrim(" \t");
539589
if (Rest.empty())
540590
return false;
541591

542-
if (LinebreakIndicators.contains(Rest.front()))
592+
if (IsMarkdown) {
593+
if (LinebreakIndicatorsMarkdown.contains(Rest.front()))
594+
return true;
595+
return false;
596+
}
597+
598+
if (LinebreakIndicatorsPlainText.contains(Rest.front()))
543599
return true;
544600

545601
if (llvm::isDigit(Rest.front())) {
@@ -550,64 +606,18 @@ bool Paragraph::isHardLineBreakIndicator(llvm::StringRef Rest) const {
550606
return false;
551607
}
552608

553-
bool Paragraph::isHardLineBreakAfter(llvm::StringRef Line,
554-
llvm::StringRef Rest) const {
555-
// In Markdown, 2 spaces before a line break forces a line break.
556-
// Add a line break for plaintext in this case too.
609+
bool Paragraph::isHardLineBreakAfter(llvm::StringRef Line, llvm::StringRef Rest,
610+
bool IsMarkdown) const {
557611
// Should we also consider whether Line is short?
558-
return Line.ends_with(" ") || punctuationIndicatesLineBreak(Line) ||
559-
isHardLineBreakIndicator(Rest);
612+
return (punctuationIndicatesLineBreak(Line, IsMarkdown) ||
613+
isHardLineBreakIndicator(Rest, IsMarkdown));
560614
}
561615

562-
void Paragraph::renderPlainText(llvm::raw_ostream &OS) const {
563-
bool NeedsSpace = false;
564-
std::string ConcatenatedText;
565-
ConcatenatedText.reserve(EstimatedStringSize);
566-
567-
llvm::raw_string_ostream ConcatenatedOS(ConcatenatedText);
568-
569-
for (auto &C : Chunks) {
570-
571-
if (C.Kind == ChunkKind::PlainText) {
572-
if (C.SpaceBefore || NeedsSpace)
573-
ConcatenatedOS << ' ';
574-
575-
ConcatenatedOS << C.Contents;
576-
NeedsSpace = llvm::isSpace(C.Contents.back()) || C.SpaceAfter;
577-
continue;
578-
}
579-
580-
if (C.SpaceBefore || NeedsSpace)
581-
ConcatenatedOS << ' ';
582-
llvm::StringRef Marker = "";
583-
if (C.Preserve && C.Kind == ChunkKind::InlineCode)
584-
Marker = chooseMarker({"`", "'", "\""}, C.Contents);
585-
else if (C.Kind == ChunkKind::Bold)
586-
Marker = "**";
587-
else if (C.Kind == ChunkKind::Emphasized)
588-
Marker = "*";
589-
ConcatenatedOS << Marker << C.Contents << Marker;
590-
NeedsSpace = C.SpaceAfter;
591-
}
592-
593-
// We go through the contents line by line to handle the newlines
594-
// and required spacing correctly.
595-
//
596-
// Newlines are added if:
597-
// - the line ends with 2 spaces and a newline follows
598-
// - the line ends with punctuation that indicates a line break (.:,;!?)
599-
// - the next line starts with a hard line break indicator (-@>#`, or a digit
600-
// followed by '.' or ')'), ignoring leading whitespace.
601-
//
602-
// Otherwise, newlines in the input are replaced with a single space.
603-
//
604-
// Multiple spaces are collapsed into a single space.
605-
//
606-
// Lines containing only whitespace are ignored.
616+
void Paragraph::renderNewlinesPlaintext(llvm::raw_ostream &OS,
617+
std::string &ParagraphText) const {
607618
llvm::StringRef Line, Rest;
608619

609-
for (std::tie(Line, Rest) =
610-
llvm::StringRef(ConcatenatedText).trim().split('\n');
620+
for (std::tie(Line, Rest) = llvm::StringRef(ParagraphText).trim().split('\n');
611621
!(Line.empty() && Rest.empty());
612622
std::tie(Line, Rest) = Rest.split('\n')) {
613623

@@ -628,14 +638,48 @@ void Paragraph::renderPlainText(llvm::raw_ostream &OS) const {
628638

629639
OS << canonicalizeSpaces(Line);
630640

631-
if (isHardLineBreakAfter(Line, Rest))
641+
if (isHardLineBreakAfter(Line, Rest, /*IsMarkdown=*/false))
632642
OS << '\n';
633643
else if (!Rest.empty())
634644
// Since we removed any trailing whitespace from the input using trim(),
635645
// we know that the next line contains non-whitespace characters.
636646
// Therefore, we can add a space without worrying about trailing spaces.
637647
OS << ' ';
638648
}
649+
}
650+
651+
void Paragraph::renderPlainText(llvm::raw_ostream &OS) const {
652+
bool NeedsSpace = false;
653+
std::string ParagraphText;
654+
ParagraphText.reserve(EstimatedStringSize);
655+
656+
llvm::raw_string_ostream ParagraphTextOS(ParagraphText);
657+
658+
for (auto &C : Chunks) {
659+
660+
if (C.Kind == ChunkKind::PlainText) {
661+
if (C.SpaceBefore || NeedsSpace)
662+
ParagraphTextOS << ' ';
663+
664+
ParagraphTextOS << C.Contents;
665+
NeedsSpace = llvm::isSpace(C.Contents.back()) || C.SpaceAfter;
666+
continue;
667+
}
668+
669+
if (C.SpaceBefore || NeedsSpace)
670+
ParagraphTextOS << ' ';
671+
llvm::StringRef Marker = "";
672+
if (C.Preserve && C.Kind == ChunkKind::InlineCode)
673+
Marker = chooseMarker({"`", "'", "\""}, C.Contents);
674+
else if (C.Kind == ChunkKind::Bold)
675+
Marker = "**";
676+
else if (C.Kind == ChunkKind::Emphasized)
677+
Marker = "*";
678+
ParagraphTextOS << Marker << C.Contents << Marker;
679+
NeedsSpace = C.SpaceAfter;
680+
}
681+
682+
renderNewlinesPlaintext(OS, ParagraphText);
639683

640684
// Paragraphs are separated by a blank line.
641685
OS << "\n\n";

clang-tools-extra/clangd/support/Markup.h

Lines changed: 78 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -92,9 +92,84 @@ class Paragraph : public Block {
9292

9393
llvm::StringRef chooseMarker(llvm::ArrayRef<llvm::StringRef> Options,
9494
llvm::StringRef Text) const;
95-
bool punctuationIndicatesLineBreak(llvm::StringRef Line) const;
96-
bool isHardLineBreakIndicator(llvm::StringRef Rest) const;
97-
bool isHardLineBreakAfter(llvm::StringRef Line, llvm::StringRef Rest) const;
95+
96+
/// Checks if the given line ends with punctuation that indicates a line break
97+
/// (.:,;!?).
98+
///
99+
/// If \p IsMarkdown is false, lines ending with 2 spaces are also considered
100+
/// as indicating a line break. This is not needed for markdown because the
101+
/// client renderer will handle this case.
102+
bool punctuationIndicatesLineBreak(llvm::StringRef Line,
103+
bool IsMarkdown) const;
104+
105+
/// Checks if the given line starts with a hard line break indicator.
106+
///
107+
/// If \p IsMarkdown is true, only '@' and '\' are considered as indicators.
108+
/// Otherwise, '-', '*', '@', '\', '>', '#', '`' and a digit followed by '.'
109+
/// or ')' are also considered as indicators.
110+
bool isHardLineBreakIndicator(llvm::StringRef Rest, bool IsMarkdown) const;
111+
112+
/// Checks if a hard line break should be added after the given line.
113+
bool isHardLineBreakAfter(llvm::StringRef Line, llvm::StringRef Rest,
114+
bool IsMarkdown) const;
115+
116+
/// \brief Go through the contents line by line to handle the newlines
117+
/// and required spacing correctly for markdown rendering.
118+
///
119+
/// Newlines are added if:
120+
/// - the line ends with punctuation that indicates a line break (.:,;!?)
121+
/// - the next line starts with a hard line break indicator \\ (escaped
122+
/// markdown/doxygen command) or @ (doxygen command)
123+
///
124+
/// This newline handling is only used when the client requests markdown
125+
/// for hover/signature help content.
126+
/// Markdown does not add any newlines inside paragraphs unless the user
127+
/// explicitly adds them. For hover/signature help content, we still want to
128+
/// add newlines in some cases to improve readability, especially when doxygen
129+
/// parsing is disabled or not implemented (like for signature help).
130+
/// Therefore we add newlines in the above mentioned cases.
131+
///
132+
/// In addition to that, we need to consider that the user can configure
133+
/// clangd to treat documentation comments as plain text, while the client
134+
/// requests markdown.
135+
/// In this case, all markdown syntax is escaped and will
136+
/// not be rendered as expected by markdown.
137+
/// Examples are lists starting with '-' or headings starting with '#'.
138+
/// With the above next line heuristics, these cases are also covered by the
139+
/// '\\' new line indicator.
140+
///
141+
/// FIXME: The heuristic fails e.g. for lists starting with '*' because it is
142+
/// also used for emphasis in markdown and should not be treated as a newline.
143+
///
144+
/// \param OS The stream to render to.
145+
/// \param ParagraphText The text of the paragraph to render.
146+
void renderNewlinesMarkdown(llvm::raw_ostream &OS,
147+
std::string &ParagraphText) const;
148+
149+
/// \brief Go through the contents line by line to handle the newlines
150+
/// and required spacing correctly for plain text rendering.
151+
///
152+
/// Newlines are added if:
153+
/// - the line ends with 2 spaces and a newline follows
154+
/// - the line ends with punctuation that indicates a line break (.:,;!?)
155+
/// - the next line starts with a hard line break indicator (-@>#`\\ or a
156+
/// digit followed by '.' or ')'), ignoring leading whitespace.
157+
///
158+
/// Otherwise, newlines in the input are replaced with a single space.
159+
///
160+
/// Multiple spaces are collapsed into a single space.
161+
///
162+
/// Lines containing only whitespace are ignored.
163+
///
164+
/// This newline handling is only used when the client requests plain
165+
/// text for hover/signature help content.
166+
/// Therefore with this approach we mimic the behavior of markdown rendering
167+
/// for these clients.
168+
///
169+
/// \param OS The stream to render to.
170+
/// \param ParagraphText The text of the paragraph to render.
171+
void renderNewlinesPlaintext(llvm::raw_ostream &OS,
172+
std::string &ParagraphText) const;
98173
};
99174

100175
/// Represents a sequence of one or more documents. Knows how to print them in a

0 commit comments

Comments
 (0)