@@ -168,7 +168,8 @@ bool needsLeadingEscapePlaintext(char C, llvm::StringRef Before,
168168// /
169169// / \param C The character to check.
170170// / \param After The string that follows \p C .
171- // This is used to determine if \p C is part of a tag or an entity reference.
171+ // / This is used to determine if \p C is part of a tag or an entity reference.
172+ // /
172173// / \returns true if \p C should be escaped, false otherwise.
173174bool needsLeadingEscapeMarkdown (char C, llvm::StringRef After) {
174175 switch (C) {
@@ -198,23 +199,25 @@ bool needsLeadingEscape(char C, llvm::StringRef Before, llvm::StringRef After,
198199 return needsLeadingEscapeMarkdown (C, After);
199200}
200201
201- // / Escape a markdown text block. Ensures the punctuation will not introduce
202+ // / Escape a markdown text block.
203+ // / If \p EscapeMarkdown is true it ensures the punctuation will not introduce
202204// / any of the markdown constructs.
205+ // / Else, markdown syntax is not escaped, only HTML tags and entities.
203206std::string renderText (llvm::StringRef Input, bool StartsLine,
204- bool EscapeMarkdown = false ) {
207+ bool EscapeMarkdown) {
205208 std::string R;
209+ R.reserve (Input.size ());
206210
207211 // split the input into lines, and escape each line separately.
208212 llvm::StringRef Line, Rest;
209213
210- bool StartsLineIntern = StartsLine;
211214 bool IsFirstLine = true ;
212215
213216 for (std::tie (Line, Rest) = Input.split (' \n ' );
214217 !(Line.empty () && Rest.empty ());
215218 std::tie (Line, Rest) = Rest.split (' \n ' )) {
216219
217- StartsLineIntern = IsFirstLine ? StartsLine : true ;
220+ bool StartsLineIntern = IsFirstLine ? StartsLine : true ;
218221
219222 // Ignore leading spaces for the escape logic, but preserve them in the
220223 // output.
@@ -243,6 +246,7 @@ std::string renderText(llvm::StringRef Input, bool StartsLine,
243246// / is surrounded by backticks and the inner contents are properly escaped.
244247std::string renderInlineBlock (llvm::StringRef Input) {
245248 std::string R;
249+ R.reserve (Input.size ());
246250 // Double all backticks to make sure we don't close the inline block early.
247251 for (size_t From = 0 ; From < Input.size ();) {
248252 size_t Next = Input.find (" `" , From);
@@ -401,17 +405,21 @@ class Heading : public Paragraph {
401405 Heading (size_t Level) : Level(Level) {}
402406
403407 void renderEscapedMarkdown (llvm::raw_ostream &OS) const override {
404- OS << std::string (Level, ' # ' ) << ' ' ;
408+ insertHeadingMarkers (OS) ;
405409 Paragraph::renderEscapedMarkdown (OS);
406410 }
407411
408412 void renderMarkdown (llvm::raw_ostream &OS) const override {
409- OS << std::string (Level, ' # ' ) << ' ' ;
413+ insertHeadingMarkers (OS) ;
410414 Paragraph::renderMarkdown (OS);
411415 }
412416
413417private:
414418 size_t Level;
419+
420+ void insertHeadingMarkers (llvm::raw_ostream &OS) const {
421+ OS << std::string (Level, ' #' ) << ' ' ;
422+ }
415423};
416424
417425} // namespace
@@ -445,16 +453,18 @@ void Paragraph::renderEscapedMarkdown(llvm::raw_ostream &OS) const {
445453 OS << " " ;
446454 switch (C.Kind ) {
447455 case ChunkKind::PlainText:
448- OS << renderText (C.Contents , !HasChunks, true );
456+ OS << renderText (C.Contents , !HasChunks, /* EscapeMarkdown= */ true );
449457 break ;
450458 case ChunkKind::InlineCode:
451459 OS << renderInlineBlock (C.Contents );
452460 break ;
453461 case ChunkKind::Bold:
454- OS << renderText (" **" + C.Contents + " **" , !HasChunks, true );
462+ OS << renderText (" **" + C.Contents + " **" , !HasChunks,
463+ /* EscapeMarkdown=*/ true );
455464 break ;
456465 case ChunkKind::Emphasized:
457- OS << renderText (" *" + C.Contents + " *" , !HasChunks, true );
466+ OS << renderText (" *" + C.Contents + " *" , !HasChunks,
467+ /* EscapeMarkdown=*/ true );
458468 break ;
459469 }
460470 HasChunks = true ;
@@ -472,16 +482,18 @@ void Paragraph::renderMarkdown(llvm::raw_ostream &OS) const {
472482 OS << " " ;
473483 switch (C.Kind ) {
474484 case ChunkKind::PlainText:
475- OS << renderText (C.Contents , !HasChunks);
485+ OS << renderText (C.Contents , !HasChunks, /* EscapeMarkdown= */ false );
476486 break ;
477487 case ChunkKind::InlineCode:
478488 OS << renderInlineBlock (C.Contents );
479489 break ;
480490 case ChunkKind::Bold:
481- OS << " **" << renderText (C.Contents , !HasChunks) << " **" ;
491+ OS << " **" << renderText (C.Contents , !HasChunks, /* EscapeMarkdown=*/ false )
492+ << " **" ;
482493 break ;
483494 case ChunkKind::Emphasized:
484- OS << " *" << renderText (C.Contents , !HasChunks) << " *" ;
495+ OS << " *" << renderText (C.Contents , !HasChunks, /* EscapeMarkdown=*/ false )
496+ << " *" ;
485497 break ;
486498 }
487499 HasChunks = true ;
@@ -545,6 +557,8 @@ bool Paragraph::isHardLineBreakAfter(llvm::StringRef Line,
545557void Paragraph::renderPlainText (llvm::raw_ostream &OS) const {
546558 bool NeedsSpace = false ;
547559 std::string ConcatenatedText;
560+ ConcatenatedText.reserve (EstimatedStringSize);
561+
548562 llvm::raw_string_ostream ConcatenatedOS (ConcatenatedText);
549563
550564 for (auto &C : Chunks) {
@@ -573,13 +587,36 @@ void Paragraph::renderPlainText(llvm::raw_ostream &OS) const {
573587
574588 // We go through the contents line by line to handle the newlines
575589 // and required spacing correctly.
590+ //
591+ // Newlines are added if:
592+ // - the line ends with 2 spaces and a newline follows
593+ // - the line ends with punctuation that indicates a line break (.:,;!?)
594+ // - the next line starts with a hard line break indicator (-@>#`, or a digit
595+ // followed by '.' or ')'), ignoring leading whitespace.
596+ //
597+ // Otherwise, newlines in the input are replaced with a single space.
598+ //
599+ // Multiple spaces are collapsed into a single space.
600+ //
601+ // Lines containing only whitespace are ignored.
576602 llvm::StringRef Line, Rest;
577603
578604 for (std::tie (Line, Rest) =
579605 llvm::StringRef (ConcatenatedText).trim ().split (' \n ' );
580606 !(Line.empty () && Rest.empty ());
581607 std::tie (Line, Rest) = Rest.split (' \n ' )) {
582608
609+ // Remove lines which only contain whitespace.
610+ //
611+ // Note: this also handles the case when there are multiple newlines
612+ // in a row, since all leading newlines are removed.
613+ //
614+ // The documentation parsing treats multiple newlines as paragraph
615+ // separators, hence it will create a new Paragraph instead of adding
616+ // multiple newlines to the same Paragraph.
617+ // Therfore multiple newlines are never added to a paragraph
618+ // except if the user explicitly adds them using
619+ // e.g. appendText("user text\n\nnext text").
583620 Line = Line.ltrim ();
584621 if (Line.empty ())
585622 continue ;
@@ -589,6 +626,9 @@ void Paragraph::renderPlainText(llvm::raw_ostream &OS) const {
589626 if (isHardLineBreakAfter (Line, Rest))
590627 OS << ' \n ' ;
591628 else if (!Rest.empty ())
629+ // Since we removed any trailing whitespace from the input using trim(),
630+ // we know that the next line contains non-whitespace characters.
631+ // Therefore, we can add a space without worrying about trailing spaces.
592632 OS << ' ' ;
593633 }
594634
@@ -606,7 +646,7 @@ void BulletList::renderEscapedMarkdown(llvm::raw_ostream &OS) const {
606646 // rid of the copies, if it turns out to be a bottleneck.
607647 OS << " - " << indentLines (M) << ' \n ' ;
608648 }
609- // We need a new line after list to terminate it in markdown.
649+ // We add 2 newlines after list to terminate it in markdown.
610650 OS << " \n\n " ;
611651}
612652
@@ -617,7 +657,7 @@ void BulletList::renderMarkdown(llvm::raw_ostream &OS) const {
617657 // rid of the copies, if it turns out to be a bottleneck.
618658 OS << " - " << indentLines (M) << ' \n ' ;
619659 }
620- // We need a new line after list to terminate it in markdown.
660+ // We add 2 newlines after list to terminate it in markdown.
621661 OS << " \n\n " ;
622662}
623663
@@ -641,8 +681,10 @@ Paragraph &Paragraph::appendChunk(llvm::StringRef Contents, ChunkKind K) {
641681 return *this ;
642682 Chunks.emplace_back ();
643683 Chunk &C = Chunks.back ();
644- C.Contents = std::move ( Contents) ;
684+ C.Contents = Contents;
645685 C.Kind = K;
686+
687+ EstimatedStringSize += Contents.size ();
646688 return *this ;
647689}
648690
@@ -652,7 +694,7 @@ Paragraph &Paragraph::appendText(llvm::StringRef Text) {
652694 return *this ;
653695 }
654696
655- return appendChunk (Text, ChunkKind::PlainText);
697+ return appendChunk (std::move ( Text) , ChunkKind::PlainText);
656698}
657699
658700Paragraph &Paragraph::appendEmphasizedText (llvm::StringRef Text) {
@@ -677,6 +719,8 @@ Paragraph &Paragraph::appendCode(llvm::StringRef Code, bool Preserve) {
677719 C.Preserve = Preserve;
678720 // Disallow adjacent code spans without spaces, markdown can't render them.
679721 C.SpaceBefore = AdjacentCode;
722+
723+ EstimatedStringSize += Norm.size ();
680724 return *this ;
681725}
682726
0 commit comments