|
13 | 13 | #include "clang/AST/CommentCommandTraits.h"
|
14 | 14 | #include "clang/AST/CommentVisitor.h"
|
15 | 15 | #include "llvm/ADT/DenseMap.h"
|
| 16 | +#include "llvm/ADT/StringExtras.h" |
16 | 17 | #include "llvm/ADT/StringRef.h"
|
17 | 18 |
|
18 | 19 | namespace clang {
|
@@ -249,7 +250,53 @@ class BlockCommentToMarkupDocument
|
249 | 250 | }
|
250 | 251 | }
|
251 | 252 |
|
| 253 | + void visitCodeCommand(const comments::VerbatimBlockComment *VB) { |
| 254 | + std::string CodeLang = ""; |
| 255 | + auto *FirstLine = VB->child_begin(); |
| 256 | + // The \\code command has an optional language argument. |
| 257 | + // This argument is currently not parsed by the clang doxygen parser. |
| 258 | + // Therefore we try to extract it from the first line of the verbatim |
| 259 | + // block. |
| 260 | + if (VB->getNumLines() > 0) { |
| 261 | + if (const auto *Line = |
| 262 | + cast<comments::VerbatimBlockLineComment>(*FirstLine)) { |
| 263 | + llvm::StringRef Text = Line->getText(); |
| 264 | + // Language is a single word enclosed in {}. |
| 265 | + if (llvm::none_of(Text, llvm::isSpace) && Text.consume_front("{") && |
| 266 | + Text.consume_back("}")) { |
| 267 | + // drop a potential . since this is not supported in Markdown |
| 268 | + // fenced code blocks. |
| 269 | + Text.consume_front("."); |
| 270 | + // Language is alphanumeric or '+'. |
| 271 | + CodeLang = Text.take_while([](char C) { |
| 272 | + return llvm::isAlnum(C) || C == '+'; |
| 273 | + }) |
| 274 | + .str(); |
| 275 | + // Skip the first line for the verbatim text. |
| 276 | + ++FirstLine; |
| 277 | + } |
| 278 | + } |
| 279 | + } |
| 280 | + |
| 281 | + std::string CodeBlockText; |
| 282 | + |
| 283 | + for (const auto *LI = FirstLine; LI != VB->child_end(); ++LI) { |
| 284 | + if (const auto *Line = cast<comments::VerbatimBlockLineComment>(*LI)) { |
| 285 | + CodeBlockText += Line->getText().str() + "\n"; |
| 286 | + } |
| 287 | + } |
| 288 | + |
| 289 | + Out.addCodeBlock(CodeBlockText, CodeLang); |
| 290 | + } |
| 291 | + |
252 | 292 | void visitVerbatimBlockComment(const comments::VerbatimBlockComment *VB) {
|
| 293 | + // The \\code command is a special verbatim block command which we handle |
| 294 | + // separately. |
| 295 | + if (VB->getCommandID() == comments::CommandTraits::KCI_code) { |
| 296 | + visitCodeCommand(VB); |
| 297 | + return; |
| 298 | + } |
| 299 | + |
253 | 300 | commandToMarkup(Out.addParagraph(), VB->getCommandName(Traits),
|
254 | 301 | VB->getCommandMarker(), "");
|
255 | 302 |
|
@@ -292,6 +339,110 @@ class BlockCommentToMarkupDocument
|
292 | 339 | }
|
293 | 340 | };
|
294 | 341 |
|
| 342 | +void SymbolDocCommentVisitor::preprocessDocumentation(StringRef Doc) { |
| 343 | + enum State { |
| 344 | + Normal, |
| 345 | + FencedCodeblock, |
| 346 | + } State = Normal; |
| 347 | + std::string CodeFence; |
| 348 | + |
| 349 | + llvm::raw_string_ostream OS(CommentWithMarkers); |
| 350 | + |
| 351 | + // The documentation string is processed line by line. |
| 352 | + // The raw documentation string does not contain the comment markers |
| 353 | + // (e.g. /// or /** */). |
| 354 | + // But the comment lexer expects doxygen markers, so add them back. |
| 355 | + // We need to use the /// style doxygen markers because the comment could |
| 356 | + // contain the closing tag "*/" of a C Style "/** */" comment |
| 357 | + // which would break the parsing if we would just enclose the comment text |
| 358 | + // with "/** */". |
| 359 | + |
| 360 | + // Escape doxygen commands inside markdown inline code spans. |
| 361 | + // This is required to not let the doxygen parser interpret them as |
| 362 | + // commands. |
| 363 | + // Note: This is a heuristic which may fail in some cases. |
| 364 | + bool InCodeSpan = false; |
| 365 | + |
| 366 | + llvm::StringRef Line, Rest; |
| 367 | + for (std::tie(Line, Rest) = Doc.split('\n'); !(Line.empty() && Rest.empty()); |
| 368 | + std::tie(Line, Rest) = Rest.split('\n')) { |
| 369 | + |
| 370 | + // Detect code fence (``` or ~~~) |
| 371 | + if (State == Normal) { |
| 372 | + llvm::StringRef Trimmed = Line.ltrim(); |
| 373 | + if (Trimmed.starts_with("```") || Trimmed.starts_with("~~~")) { |
| 374 | + // https://www.doxygen.nl/manual/markdown.html#md_fenced |
| 375 | + CodeFence = |
| 376 | + Trimmed.take_while([](char C) { return C == '`' || C == '~'; }) |
| 377 | + .str(); |
| 378 | + // Try to detect language: first word after fence. Could also be |
| 379 | + // enclosed in {} |
| 380 | + llvm::StringRef AfterFence = |
| 381 | + Trimmed.drop_front(CodeFence.size()).ltrim(); |
| 382 | + // ignore '{' at the beginning of the language name to not duplicate it |
| 383 | + // for the doxygen command |
| 384 | + AfterFence.consume_front("{"); |
| 385 | + // The name is alphanumeric or '.' or '+' |
| 386 | + StringRef CodeLang = AfterFence.take_while( |
| 387 | + [](char C) { return llvm::isAlnum(C) || C == '.' || C == '+'; }); |
| 388 | + |
| 389 | + OS << "///@code"; |
| 390 | + |
| 391 | + if (!CodeLang.empty()) |
| 392 | + OS << "{" << CodeLang.str() << "}"; |
| 393 | + |
| 394 | + OS << "\n"; |
| 395 | + |
| 396 | + State = FencedCodeblock; |
| 397 | + continue; |
| 398 | + } |
| 399 | + |
| 400 | + // FIXME: handle indented code blocks too? |
| 401 | + // In doxygen, the indentation which triggers a code block depends on the |
| 402 | + // indentation of the previous paragraph. |
| 403 | + // https://www.doxygen.nl/manual/markdown.html#mddox_code_blocks |
| 404 | + } else if (State == FencedCodeblock) { |
| 405 | + // End of code fence |
| 406 | + if (Line.ltrim().starts_with(CodeFence)) { |
| 407 | + OS << "///@endcode\n"; |
| 408 | + State = Normal; |
| 409 | + continue; |
| 410 | + } |
| 411 | + OS << "///" << Line << "\n"; |
| 412 | + continue; |
| 413 | + } |
| 414 | + |
| 415 | + // Normal line preprocessing (add doxygen markers, handle escaping) |
| 416 | + OS << "///"; |
| 417 | + |
| 418 | + if (Line.empty() || Line.trim().empty()) { |
| 419 | + OS << "\n"; |
| 420 | + // Empty lines reset the InCodeSpan state. |
| 421 | + InCodeSpan = false; |
| 422 | + continue; |
| 423 | + } |
| 424 | + |
| 425 | + if (Line.starts_with("<")) |
| 426 | + // A comment line starting with '///<' is treated as a doxygen |
| 427 | + // command. To avoid this, we add a space before the '<'. |
| 428 | + OS << ' '; |
| 429 | + |
| 430 | + for (char C : Line) { |
| 431 | + if (C == '`') |
| 432 | + InCodeSpan = !InCodeSpan; |
| 433 | + else if (InCodeSpan && (C == '@' || C == '\\')) |
| 434 | + OS << '\\'; |
| 435 | + OS << C; |
| 436 | + } |
| 437 | + |
| 438 | + OS << "\n"; |
| 439 | + } |
| 440 | + |
| 441 | + // Close any unclosed code block |
| 442 | + if (State == FencedCodeblock) |
| 443 | + OS << "///@endcode\n"; |
| 444 | +} |
| 445 | + |
295 | 446 | void SymbolDocCommentVisitor::visitBlockCommandComment(
|
296 | 447 | const comments::BlockCommandComment *B) {
|
297 | 448 | switch (B->getCommandID()) {
|
|
0 commit comments