Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions clang-tools-extra/clangd/Hover.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1608,11 +1608,19 @@ void parseDocumentationParagraph(llvm::StringRef Text, markup::Paragraph &Out) {
}

void parseDocumentation(llvm::StringRef Input, markup::Document &Output) {
// A documentation string is treated as a sequence of paragraphs,
// where the paragraphs are seperated by at least one empty line
// (meaning 2 consecutive newline characters).
// Possible leading empty lines (introduced by an odd number > 1 of
// empty lines between 2 paragraphs) will be removed later in the Markup
// renderer.
llvm::StringRef Paragraph, Rest;
for (std::tie(Paragraph, Rest) = Input.split("\n\n");
!(Paragraph.empty() && Rest.empty());
std::tie(Paragraph, Rest) = Rest.split("\n\n")) {

// The Paragraph will be empty if there is an even number of newline
// characters between two paragraphs, so we skip it.
if (!Paragraph.empty())
parseDocumentationParagraph(Paragraph, Output.addParagraph());
}
Expand Down
78 changes: 61 additions & 17 deletions clang-tools-extra/clangd/support/Markup.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,8 @@ bool needsLeadingEscapePlaintext(char C, llvm::StringRef Before,
///
/// \param C The character to check.
/// \param After The string that follows \p C .
// This is used to determine if \p C is part of a tag or an entity reference.
/// This is used to determine if \p C is part of a tag or an entity reference.
///
/// \returns true if \p C should be escaped, false otherwise.
bool needsLeadingEscapeMarkdown(char C, llvm::StringRef After) {
switch (C) {
Expand Down Expand Up @@ -198,23 +199,25 @@ bool needsLeadingEscape(char C, llvm::StringRef Before, llvm::StringRef After,
return needsLeadingEscapeMarkdown(C, After);
}

/// Escape a markdown text block. Ensures the punctuation will not introduce
/// Escape a markdown text block.
/// If \p EscapeMarkdown is true it ensures the punctuation will not introduce
/// any of the markdown constructs.
/// Else, markdown syntax is not escaped, only HTML tags and entities.
std::string renderText(llvm::StringRef Input, bool StartsLine,
bool EscapeMarkdown = false) {
bool EscapeMarkdown) {
std::string R;
R.reserve(Input.size());

// split the input into lines, and escape each line separately.
llvm::StringRef Line, Rest;

bool StartsLineIntern = StartsLine;
bool IsFirstLine = true;

for (std::tie(Line, Rest) = Input.split('\n');
!(Line.empty() && Rest.empty());
std::tie(Line, Rest) = Rest.split('\n')) {

StartsLineIntern = IsFirstLine ? StartsLine : true;
bool StartsLineIntern = IsFirstLine ? StartsLine : true;

// Ignore leading spaces for the escape logic, but preserve them in the
// output.
Expand Down Expand Up @@ -243,6 +246,7 @@ std::string renderText(llvm::StringRef Input, bool StartsLine,
/// is surrounded by backticks and the inner contents are properly escaped.
std::string renderInlineBlock(llvm::StringRef Input) {
std::string R;
R.reserve(Input.size());
// Double all backticks to make sure we don't close the inline block early.
for (size_t From = 0; From < Input.size();) {
size_t Next = Input.find("`", From);
Expand Down Expand Up @@ -401,17 +405,21 @@ class Heading : public Paragraph {
Heading(size_t Level) : Level(Level) {}

void renderEscapedMarkdown(llvm::raw_ostream &OS) const override {
OS << std::string(Level, '#') << ' ';
insertHeadingMarkers(OS);
Paragraph::renderEscapedMarkdown(OS);
}

void renderMarkdown(llvm::raw_ostream &OS) const override {
OS << std::string(Level, '#') << ' ';
insertHeadingMarkers(OS);
Paragraph::renderMarkdown(OS);
}

private:
size_t Level;

void insertHeadingMarkers(llvm::raw_ostream &OS) const {
OS << std::string(Level, '#') << ' ';
}
};

} // namespace
Expand Down Expand Up @@ -445,16 +453,18 @@ void Paragraph::renderEscapedMarkdown(llvm::raw_ostream &OS) const {
OS << " ";
switch (C.Kind) {
case ChunkKind::PlainText:
OS << renderText(C.Contents, !HasChunks, true);
OS << renderText(C.Contents, !HasChunks, /*EscapeMarkdown=*/true);
break;
case ChunkKind::InlineCode:
OS << renderInlineBlock(C.Contents);
break;
case ChunkKind::Bold:
OS << renderText("**" + C.Contents + "**", !HasChunks, true);
OS << renderText("**" + C.Contents + "**", !HasChunks,
/*EscapeMarkdown=*/true);
break;
case ChunkKind::Emphasized:
OS << renderText("*" + C.Contents + "*", !HasChunks, true);
OS << renderText("*" + C.Contents + "*", !HasChunks,
/*EscapeMarkdown=*/true);
break;
}
HasChunks = true;
Expand All @@ -472,16 +482,18 @@ void Paragraph::renderMarkdown(llvm::raw_ostream &OS) const {
OS << " ";
switch (C.Kind) {
case ChunkKind::PlainText:
OS << renderText(C.Contents, !HasChunks);
OS << renderText(C.Contents, !HasChunks, /*EscapeMarkdown=*/false);
break;
case ChunkKind::InlineCode:
OS << renderInlineBlock(C.Contents);
break;
case ChunkKind::Bold:
OS << "**" << renderText(C.Contents, !HasChunks) << "**";
OS << "**" << renderText(C.Contents, !HasChunks, /*EscapeMarkdown=*/false)
<< "**";
break;
case ChunkKind::Emphasized:
OS << "*" << renderText(C.Contents, !HasChunks) << "*";
OS << "*" << renderText(C.Contents, !HasChunks, /*EscapeMarkdown=*/false)
<< "*";
break;
}
HasChunks = true;
Expand Down Expand Up @@ -545,6 +557,8 @@ bool Paragraph::isHardLineBreakAfter(llvm::StringRef Line,
void Paragraph::renderPlainText(llvm::raw_ostream &OS) const {
bool NeedsSpace = false;
std::string ConcatenatedText;
ConcatenatedText.reserve(EstimatedStringSize);

llvm::raw_string_ostream ConcatenatedOS(ConcatenatedText);

for (auto &C : Chunks) {
Expand Down Expand Up @@ -573,13 +587,36 @@ void Paragraph::renderPlainText(llvm::raw_ostream &OS) const {

// We go through the contents line by line to handle the newlines
// and required spacing correctly.
//
// Newlines are added if:
// - the line ends with 2 spaces and a newline follows
// - the line ends with punctuation that indicates a line break (.:,;!?)
// - the next line starts with a hard line break indicator (-@>#`, or a digit
// followed by '.' or ')'), ignoring leading whitespace.
//
// Otherwise, newlines in the input are replaced with a single space.
//
// Multiple spaces are collapsed into a single space.
//
// Lines containing only whitespace are ignored.
llvm::StringRef Line, Rest;

for (std::tie(Line, Rest) =
llvm::StringRef(ConcatenatedText).trim().split('\n');
!(Line.empty() && Rest.empty());
std::tie(Line, Rest) = Rest.split('\n')) {

// Remove lines which only contain whitespace.
//
// Note: this also handles the case when there are multiple newlines
// in a row, since all leading newlines are removed.
//
// The documentation parsing treats multiple newlines as paragraph
// separators, hence it will create a new Paragraph instead of adding
// multiple newlines to the same Paragraph.
// Therfore multiple newlines are never added to a paragraph
// except if the user explicitly adds them using
// e.g. appendText("user text\n\nnext text").
Line = Line.ltrim();
if (Line.empty())
continue;
Expand All @@ -589,6 +626,9 @@ void Paragraph::renderPlainText(llvm::raw_ostream &OS) const {
if (isHardLineBreakAfter(Line, Rest))
OS << '\n';
else if (!Rest.empty())
// Since we removed any trailing whitespace from the input using trim(),
// we know that the next line contains non-whitespace characters.
// Therefore, we can add a space without worrying about trailing spaces.
OS << ' ';
}

Expand All @@ -606,7 +646,7 @@ void BulletList::renderEscapedMarkdown(llvm::raw_ostream &OS) const {
// rid of the copies, if it turns out to be a bottleneck.
OS << "- " << indentLines(M) << '\n';
}
// We need a new line after list to terminate it in markdown.
// We add 2 newlines after list to terminate it in markdown.
OS << "\n\n";
}

Expand All @@ -617,7 +657,7 @@ void BulletList::renderMarkdown(llvm::raw_ostream &OS) const {
// rid of the copies, if it turns out to be a bottleneck.
OS << "- " << indentLines(M) << '\n';
}
// We need a new line after list to terminate it in markdown.
// We add 2 newlines after list to terminate it in markdown.
OS << "\n\n";
}

Expand All @@ -641,8 +681,10 @@ Paragraph &Paragraph::appendChunk(llvm::StringRef Contents, ChunkKind K) {
return *this;
Chunks.emplace_back();
Chunk &C = Chunks.back();
C.Contents = std::move(Contents);
C.Contents = Contents;
C.Kind = K;

EstimatedStringSize += Contents.size();
return *this;
}

Expand All @@ -652,7 +694,7 @@ Paragraph &Paragraph::appendText(llvm::StringRef Text) {
return *this;
}

return appendChunk(Text, ChunkKind::PlainText);
return appendChunk(std::move(Text), ChunkKind::PlainText);
}

Paragraph &Paragraph::appendEmphasizedText(llvm::StringRef Text) {
Expand All @@ -677,6 +719,8 @@ Paragraph &Paragraph::appendCode(llvm::StringRef Code, bool Preserve) {
C.Preserve = Preserve;
// Disallow adjacent code spans without spaces, markdown can't render them.
C.SpaceBefore = AdjacentCode;

EstimatedStringSize += Norm.size();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Norm is likely empty here because of the std::move() above. Probably the simplest fix would be to move this size calculation above.

return *this;
}

Expand Down
19 changes: 12 additions & 7 deletions clang-tools-extra/clangd/support/Markup.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,14 @@ class Block {

virtual bool isRuler() const { return false; }
virtual ~Block() = default;

protected:
Copy link
Contributor

@emaxx-google emaxx-google Jul 23, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: If the field is Paragraph-specific, should we just move it into that class and make it private?

/// Estimated size of the string representation of this block.
/// Used to reserve space in the output string.
/// Each time block content is added, this value is updated.
/// This is an estimate, so it may not be accurate but can help
/// reducing dynamically reallocating string memory.
unsigned EstimatedStringSize = 0;
};

/// Represents parts of the markup that can contain strings, like inline code,
Expand Down Expand Up @@ -67,7 +75,7 @@ class Paragraph : public Block {
Paragraph &appendSpace();

private:
typedef enum { PlainText, InlineCode, Bold, Emphasized } ChunkKind;
enum ChunkKind { PlainText, InlineCode, Bold, Emphasized };
struct Chunk {
ChunkKind Kind = PlainText;
// Preserve chunk markers in plaintext.
Expand All @@ -90,12 +98,6 @@ class Paragraph : public Block {
bool isHardLineBreakAfter(llvm::StringRef Line, llvm::StringRef Rest) const;
};

class ListItemParagraph : public Paragraph {
public:
void renderEscapedMarkdown(llvm::raw_ostream &OS) const override;
void renderMarkdown(llvm::raw_ostream &OS) const override;
};

/// Represents a sequence of one or more documents. Knows how to print them in a
/// list like format, e.g. by prepending with "- " and indentation.
class BulletList : public Block {
Expand Down Expand Up @@ -142,6 +144,9 @@ class Document {

BulletList &addBulletList();

/// Doesn't contain any trailing newlines and escaped markdown syntax.
/// It is expected that the result of this function
/// is rendered as markdown.
std::string asEscapedMarkdown() const;
/// Doesn't contain any trailing newlines.
/// It is expected that the result of this function
Expand Down
30 changes: 30 additions & 0 deletions clang-tools-extra/clangd/unittests/HoverTests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3822,6 +3822,36 @@ TEST(Hover, ParseDocumentation) {
"foo\n\n bar",
"foo\n\nbar",
},
{
"foo\n\n\n\nbar",
"foo\n\nbar",
"foo\n\nbar",
"foo\n\nbar",
},
{
"foo\n\n\n\n\tbar",
"foo\n\n\tbar",
"foo\n\n\tbar",
"foo\n\nbar",
},
{
"foo\n\n\n\n bar",
"foo\n\n bar",
"foo\n\n bar",
"foo\n\nbar",
},
{
"foo\n\n\n\n bar",
"foo\n\n bar",
"foo\n\n bar",
"foo\n\nbar",
},
{
"foo\n\n\n\n bar",
"foo\n\n bar",
"foo\n\n bar",
"foo\n\nbar",
},
{
"foo.\nbar",
"foo.\nbar",
Expand Down
Loading