Skip to content

Commit 718aac9

Browse files
authored
[clang][Diagnostics] Highlight code snippets (#66514)
Add some primitive syntax highlighting to our code snippet output. This adds "checkpoints" to the Preprocessor, which we can use to start lexing from. When printing a code snippet, we lex from the nearest checkpoint and highlight the tokens based on their token type.
1 parent 863b2c8 commit 718aac9

File tree

7 files changed

+262
-14
lines changed

7 files changed

+262
-14
lines changed

clang/docs/ReleaseNotes.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,8 @@ Attribute Changes in Clang
103103

104104
Improvements to Clang's diagnostics
105105
-----------------------------------
106+
- Clang now applies syntax highlighting to the code snippets it
107+
prints.
106108

107109
Improvements to Clang's time-trace
108110
----------------------------------

clang/include/clang/Frontend/TextDiagnostic.h

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#define LLVM_CLANG_FRONTEND_TEXTDIAGNOSTIC_H
1717

1818
#include "clang/Frontend/DiagnosticRenderer.h"
19+
#include "llvm/Support/raw_ostream.h"
1920

2021
namespace clang {
2122

@@ -33,14 +34,22 @@ namespace clang {
3334
/// printing coming out of libclang.
3435
class TextDiagnostic : public DiagnosticRenderer {
3536
raw_ostream &OS;
37+
const Preprocessor *PP;
3638

3739
public:
38-
TextDiagnostic(raw_ostream &OS,
39-
const LangOptions &LangOpts,
40-
DiagnosticOptions *DiagOpts);
40+
TextDiagnostic(raw_ostream &OS, const LangOptions &LangOpts,
41+
DiagnosticOptions *DiagOpts, const Preprocessor *PP = nullptr);
4142

4243
~TextDiagnostic() override;
4344

45+
struct StyleRange {
46+
unsigned Start;
47+
unsigned End;
48+
enum llvm::raw_ostream::Colors Color;
49+
StyleRange(unsigned S, unsigned E, enum llvm::raw_ostream::Colors C)
50+
: Start(S), End(E), Color(C){};
51+
};
52+
4453
/// Print the diagonstic level to a raw_ostream.
4554
///
4655
/// This is a static helper that handles colorizing the level and formatting
@@ -104,7 +113,8 @@ class TextDiagnostic : public DiagnosticRenderer {
104113
ArrayRef<FixItHint> Hints);
105114

106115
void emitSnippet(StringRef SourceLine, unsigned MaxLineNoDisplayWidth,
107-
unsigned LineNo);
116+
unsigned LineNo, unsigned DisplayLineNo,
117+
ArrayRef<StyleRange> Styles);
108118

109119
void emitParseableFixits(ArrayRef<FixItHint> Hints, const SourceManager &SM);
110120
};

clang/include/clang/Lex/Preprocessor.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,13 @@ class Preprocessor {
284284
/// The kind of translation unit we are processing.
285285
const TranslationUnitKind TUKind;
286286

287+
/// Returns a pointer into the given file's buffer that's guaranteed
288+
/// to be between tokens. The returned pointer is always before \p Start.
289+
/// The maximum distance betweenthe returned pointer and \p Start is
290+
/// limited by a constant value, but also an implementation detail.
291+
/// If no such check point exists, \c nullptr is returned.
292+
const char *getCheckPoint(FileID FID, const char *Start) const;
293+
287294
private:
288295
/// The code-completion handler.
289296
CodeCompletionHandler *CodeComplete = nullptr;
@@ -311,6 +318,9 @@ class Preprocessor {
311318
/// The import path for named module that we're currently processing.
312319
SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> NamedModuleImportPath;
313320

321+
llvm::DenseMap<FileID, SmallVector<const char *>> CheckPoints;
322+
unsigned CheckPointCounter = 0;
323+
314324
/// Whether the import is an `@import` or a standard c++ modules import.
315325
bool IsAtImport = false;
316326

clang/lib/Frontend/TextDiagnostic.cpp

Lines changed: 202 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
#include "clang/Basic/FileManager.h"
1313
#include "clang/Basic/SourceManager.h"
1414
#include "clang/Lex/Lexer.h"
15+
#include "clang/Lex/Preprocessor.h"
1516
#include "llvm/ADT/SmallString.h"
1617
#include "llvm/ADT/StringExtras.h"
1718
#include "llvm/Support/ConvertUTF.h"
@@ -41,6 +42,14 @@ static const enum raw_ostream::Colors fatalColor = raw_ostream::RED;
4142
static const enum raw_ostream::Colors savedColor =
4243
raw_ostream::SAVEDCOLOR;
4344

45+
// Magenta is taken for 'warning'. Red is already 'error' and 'cyan'
46+
// is already taken for 'note'. Green is already used to underline
47+
// source ranges. White and black are bad because of the usual
48+
// terminal backgrounds. Which leaves us only with TWO options.
49+
static constexpr raw_ostream::Colors CommentColor = raw_ostream::YELLOW;
50+
static constexpr raw_ostream::Colors LiteralColor = raw_ostream::GREEN;
51+
static constexpr raw_ostream::Colors KeywordColor = raw_ostream::BLUE;
52+
4453
/// Add highlights to differences in template strings.
4554
static void applyTemplateHighlighting(raw_ostream &OS, StringRef Str,
4655
bool &Normal, bool Bold) {
@@ -644,10 +653,10 @@ static bool printWordWrapped(raw_ostream &OS, StringRef Str, unsigned Columns,
644653
return Wrapped;
645654
}
646655

647-
TextDiagnostic::TextDiagnostic(raw_ostream &OS,
648-
const LangOptions &LangOpts,
649-
DiagnosticOptions *DiagOpts)
650-
: DiagnosticRenderer(LangOpts, DiagOpts), OS(OS) {}
656+
TextDiagnostic::TextDiagnostic(raw_ostream &OS, const LangOptions &LangOpts,
657+
DiagnosticOptions *DiagOpts,
658+
const Preprocessor *PP)
659+
: DiagnosticRenderer(LangOpts, DiagOpts), OS(OS), PP(PP) {}
651660

652661
TextDiagnostic::~TextDiagnostic() {}
653662

@@ -1112,6 +1121,162 @@ prepareAndFilterRanges(const SmallVectorImpl<CharSourceRange> &Ranges,
11121121
return LineRanges;
11131122
}
11141123

1124+
/// Creates syntax highlighting information in form of StyleRanges.
1125+
///
1126+
/// The returned unique ptr has always exactly size
1127+
/// (\p EndLineNumber - \p StartLineNumber + 1). Each SmallVector in there
1128+
/// corresponds to syntax highlighting information in one line. In each line,
1129+
/// the StyleRanges are non-overlapping and sorted from start to end of the
1130+
/// line.
1131+
static std::unique_ptr<llvm::SmallVector<TextDiagnostic::StyleRange>[]>
1132+
highlightLines(StringRef FileData, unsigned StartLineNumber,
1133+
unsigned EndLineNumber, const Preprocessor *PP,
1134+
const LangOptions &LangOpts, bool ShowColors, FileID FID,
1135+
const SourceManager &SM) {
1136+
assert(StartLineNumber <= EndLineNumber);
1137+
auto SnippetRanges =
1138+
std::make_unique<SmallVector<TextDiagnostic::StyleRange>[]>(
1139+
EndLineNumber - StartLineNumber + 1);
1140+
1141+
if (!PP || !ShowColors)
1142+
return SnippetRanges;
1143+
1144+
// Might cause emission of another diagnostic.
1145+
if (PP->getIdentifierTable().getExternalIdentifierLookup())
1146+
return SnippetRanges;
1147+
1148+
auto Buff = llvm::MemoryBuffer::getMemBuffer(FileData);
1149+
Lexer L{FID, *Buff, SM, LangOpts};
1150+
L.SetKeepWhitespaceMode(true);
1151+
1152+
const char *FirstLineStart =
1153+
FileData.data() +
1154+
SM.getDecomposedLoc(SM.translateLineCol(FID, StartLineNumber, 1)).second;
1155+
if (const char *CheckPoint = PP->getCheckPoint(FID, FirstLineStart)) {
1156+
assert(CheckPoint >= Buff->getBufferStart() &&
1157+
CheckPoint <= Buff->getBufferEnd());
1158+
assert(CheckPoint <= FirstLineStart);
1159+
size_t Offset = CheckPoint - Buff->getBufferStart();
1160+
L.seek(Offset, /*IsAtStartOfLine=*/false);
1161+
}
1162+
1163+
// Classify the given token and append it to the given vector.
1164+
auto appendStyle =
1165+
[PP, &LangOpts](SmallVector<TextDiagnostic::StyleRange> &Vec,
1166+
const Token &T, unsigned Start, unsigned Length) -> void {
1167+
if (T.is(tok::raw_identifier)) {
1168+
StringRef RawIdent = T.getRawIdentifier();
1169+
// Special case true/false/nullptr/... literals, since they will otherwise
1170+
// be treated as keywords.
1171+
// FIXME: It would be good to have a programmatic way of getting this
1172+
// list.
1173+
if (llvm::StringSwitch<bool>(RawIdent)
1174+
.Case("true", true)
1175+
.Case("false", true)
1176+
.Case("nullptr", true)
1177+
.Case("__func__", true)
1178+
.Case("__objc_yes__", true)
1179+
.Case("__objc_no__", true)
1180+
.Case("__null", true)
1181+
.Case("__FUNCDNAME__", true)
1182+
.Case("__FUNCSIG__", true)
1183+
.Case("__FUNCTION__", true)
1184+
.Case("__FUNCSIG__", true)
1185+
.Default(false)) {
1186+
Vec.emplace_back(Start, Start + Length, LiteralColor);
1187+
} else {
1188+
const IdentifierInfo *II = PP->getIdentifierInfo(RawIdent);
1189+
assert(II);
1190+
if (II->isKeyword(LangOpts))
1191+
Vec.emplace_back(Start, Start + Length, KeywordColor);
1192+
}
1193+
} else if (tok::isLiteral(T.getKind())) {
1194+
Vec.emplace_back(Start, Start + Length, LiteralColor);
1195+
} else {
1196+
assert(T.is(tok::comment));
1197+
Vec.emplace_back(Start, Start + Length, CommentColor);
1198+
}
1199+
};
1200+
1201+
bool Stop = false;
1202+
while (!Stop) {
1203+
Token T;
1204+
Stop = L.LexFromRawLexer(T);
1205+
if (T.is(tok::unknown))
1206+
continue;
1207+
1208+
// We are only interested in identifiers, literals and comments.
1209+
if (!T.is(tok::raw_identifier) && !T.is(tok::comment) &&
1210+
!tok::isLiteral(T.getKind()))
1211+
continue;
1212+
1213+
bool Invalid = false;
1214+
unsigned TokenEndLine = SM.getSpellingLineNumber(T.getEndLoc(), &Invalid);
1215+
if (Invalid || TokenEndLine < StartLineNumber)
1216+
continue;
1217+
1218+
assert(TokenEndLine >= StartLineNumber);
1219+
1220+
unsigned TokenStartLine =
1221+
SM.getSpellingLineNumber(T.getLocation(), &Invalid);
1222+
if (Invalid)
1223+
continue;
1224+
// If this happens, we're done.
1225+
if (TokenStartLine > EndLineNumber)
1226+
break;
1227+
1228+
unsigned StartCol =
1229+
SM.getSpellingColumnNumber(T.getLocation(), &Invalid) - 1;
1230+
if (Invalid)
1231+
continue;
1232+
1233+
// Simple tokens.
1234+
if (TokenStartLine == TokenEndLine) {
1235+
SmallVector<TextDiagnostic::StyleRange> &LineRanges =
1236+
SnippetRanges[TokenStartLine - StartLineNumber];
1237+
appendStyle(LineRanges, T, StartCol, T.getLength());
1238+
continue;
1239+
}
1240+
assert((TokenEndLine - TokenStartLine) >= 1);
1241+
1242+
// For tokens that span multiple lines (think multiline comments), we
1243+
// divide them into multiple StyleRanges.
1244+
unsigned EndCol = SM.getSpellingColumnNumber(T.getEndLoc(), &Invalid) - 1;
1245+
if (Invalid)
1246+
continue;
1247+
1248+
std::string Spelling = Lexer::getSpelling(T, SM, LangOpts);
1249+
1250+
unsigned L = TokenStartLine;
1251+
unsigned LineLength = 0;
1252+
for (unsigned I = 0; I <= Spelling.size(); ++I) {
1253+
// This line is done.
1254+
if (isVerticalWhitespace(Spelling[I]) || I == Spelling.size()) {
1255+
SmallVector<TextDiagnostic::StyleRange> &LineRanges =
1256+
SnippetRanges[L - StartLineNumber];
1257+
1258+
if (L >= StartLineNumber) {
1259+
if (L == TokenStartLine) // First line
1260+
appendStyle(LineRanges, T, StartCol, LineLength);
1261+
else if (L == TokenEndLine) // Last line
1262+
appendStyle(LineRanges, T, 0, EndCol);
1263+
else
1264+
appendStyle(LineRanges, T, 0, LineLength);
1265+
}
1266+
1267+
++L;
1268+
if (L > EndLineNumber)
1269+
break;
1270+
LineLength = 0;
1271+
continue;
1272+
}
1273+
++LineLength;
1274+
}
1275+
}
1276+
1277+
return SnippetRanges;
1278+
}
1279+
11151280
/// Emit a code snippet and caret line.
11161281
///
11171282
/// This routine emits a single line's code snippet and caret line..
@@ -1181,6 +1346,12 @@ void TextDiagnostic::emitSnippetAndCaret(
11811346
OS.indent(MaxLineNoDisplayWidth + 2) << "| ";
11821347
};
11831348

1349+
// Prepare source highlighting information for the lines we're about to
1350+
// emit, starting from the first line.
1351+
std::unique_ptr<SmallVector<StyleRange>[]> SourceStyles =
1352+
highlightLines(BufStart, Lines.first, Lines.second, PP, LangOpts,
1353+
DiagOpts->ShowColors, FID, SM);
1354+
11841355
SmallVector<LineRange> LineRanges =
11851356
prepareAndFilterRanges(Ranges, SM, Lines, FID, LangOpts);
11861357

@@ -1247,7 +1418,8 @@ void TextDiagnostic::emitSnippetAndCaret(
12471418
}
12481419

12491420
// Emit what we have computed.
1250-
emitSnippet(SourceLine, MaxLineNoDisplayWidth, DisplayLineNo);
1421+
emitSnippet(SourceLine, MaxLineNoDisplayWidth, LineNo, DisplayLineNo,
1422+
SourceStyles[LineNo - Lines.first]);
12511423

12521424
if (!CaretLine.empty()) {
12531425
indentForLineNumbers();
@@ -1277,16 +1449,18 @@ void TextDiagnostic::emitSnippetAndCaret(
12771449

12781450
void TextDiagnostic::emitSnippet(StringRef SourceLine,
12791451
unsigned MaxLineNoDisplayWidth,
1280-
unsigned LineNo) {
1452+
unsigned LineNo, unsigned DisplayLineNo,
1453+
ArrayRef<StyleRange> Styles) {
12811454
// Emit line number.
12821455
if (MaxLineNoDisplayWidth > 0) {
1283-
unsigned LineNoDisplayWidth = getNumDisplayWidth(LineNo);
1456+
unsigned LineNoDisplayWidth = getNumDisplayWidth(DisplayLineNo);
12841457
OS.indent(MaxLineNoDisplayWidth - LineNoDisplayWidth + 1)
1285-
<< LineNo << " | ";
1458+
<< DisplayLineNo << " | ";
12861459
}
12871460

12881461
// Print the source line one character at a time.
12891462
bool PrintReversed = false;
1463+
std::optional<llvm::raw_ostream::Colors> CurrentColor;
12901464
size_t I = 0;
12911465
while (I < SourceLine.size()) {
12921466
auto [Str, WasPrintable] =
@@ -1298,10 +1472,29 @@ void TextDiagnostic::emitSnippet(StringRef SourceLine,
12981472
PrintReversed = !PrintReversed;
12991473
if (PrintReversed)
13001474
OS.reverseColor();
1301-
else
1475+
else {
13021476
OS.resetColor();
1477+
CurrentColor = std::nullopt;
1478+
}
1479+
}
1480+
1481+
// Apply syntax highlighting information.
1482+
const auto *CharStyle = llvm::find_if(Styles, [I](const StyleRange &R) {
1483+
return (R.Start < I && R.End >= I);
1484+
});
1485+
1486+
if (CharStyle != Styles.end()) {
1487+
if (!CurrentColor ||
1488+
(CurrentColor && *CurrentColor != CharStyle->Color)) {
1489+
OS.changeColor(CharStyle->Color, false);
1490+
CurrentColor = CharStyle->Color;
1491+
}
1492+
} else if (CurrentColor) {
1493+
OS.resetColor();
1494+
CurrentColor = std::nullopt;
13031495
}
13041496
}
1497+
13051498
OS << Str;
13061499
}
13071500

clang/lib/Frontend/TextDiagnosticPrinter.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ TextDiagnosticPrinter::~TextDiagnosticPrinter() {
3636
void TextDiagnosticPrinter::BeginSourceFile(const LangOptions &LO,
3737
const Preprocessor *PP) {
3838
// Build the TextDiagnostic utility.
39-
TextDiag.reset(new TextDiagnostic(OS, LO, &*DiagOpts));
39+
TextDiag.reset(new TextDiagnostic(OS, LO, &*DiagOpts, PP));
4040
}
4141

4242
void TextDiagnosticPrinter::EndSourceFile() {

0 commit comments

Comments
 (0)