Skip to content

Commit 8249dc2

Browse files
committed
[include-cleaner] Record main-file macro occurences and includes
The occurrences are roots for finding used headers, like walkAST. Includes are the targets we're matching used headers against. Differential Revision: https://reviews.llvm.org/D136723
1 parent 6aa050a commit 8249dc2

File tree

5 files changed

+366
-13
lines changed

5 files changed

+366
-13
lines changed

clang-tools-extra/include-cleaner/include/clang-include-cleaner/Record.h

Lines changed: 48 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,10 @@
2020
#include "llvm/ADT/DenseMap.h"
2121
#include "llvm/ADT/DenseSet.h"
2222
#include "llvm/Support/FileSystem/UniqueID.h"
23+
#include "clang-include-cleaner/Types.h"
24+
#include "llvm/ADT/ArrayRef.h"
25+
#include "llvm/ADT/DenseMap.h"
26+
#include "llvm/ADT/StringMap.h"
2327
#include <memory>
2428
#include <vector>
2529

@@ -29,6 +33,8 @@ class ASTContext;
2933
class CompilerInstance;
3034
class Decl;
3135
class FileEntry;
36+
class Preprocessor;
37+
class PPCallbacks;
3238

3339
namespace include_cleaner {
3440

@@ -75,19 +81,55 @@ class PragmaIncludes {
7581
// FIXME: add selfcontained file.
7682
};
7783

78-
// Contains recorded parser events relevant to include-cleaner.
84+
/// Recorded main-file parser events relevant to include-cleaner.
7985
struct RecordedAST {
80-
// The consumer (when installed into clang) tracks declarations in this.
86+
/// The consumer (when installed into clang) tracks declarations in `*this`.
8187
std::unique_ptr<ASTConsumer> record();
8288

8389
ASTContext *Ctx = nullptr;
84-
// The set of declarations written at file scope inside the main file.
85-
//
86-
// These are the roots of the subtrees that should be traversed to find uses.
87-
// (Traversing the TranslationUnitDecl would find uses inside headers!)
90+
/// The set of declarations written at file scope inside the main file.
91+
///
92+
/// These are the roots of the subtrees that should be traversed to find uses.
93+
/// (Traversing the TranslationUnitDecl would find uses inside headers!)
8894
std::vector<Decl *> Roots;
8995
};
9096

97+
/// Recorded main-file preprocessor events relevant to include-cleaner.
98+
///
99+
/// This doesn't include facts that we record globally for the whole TU, even
100+
/// when they occur in the main file (e.g. IWYU pragmas).
101+
struct RecordedPP {
102+
/// The callback (when installed into clang) tracks macros/includes in this.
103+
std::unique_ptr<PPCallbacks> record(const Preprocessor &PP);
104+
105+
/// Describes where macros were used in the main file.
106+
std::vector<SymbolReference> MacroReferences;
107+
108+
/// A container for all includes present in the main file.
109+
/// Supports efficiently hit-testing Headers against Includes.
110+
/// FIXME: is there a more natural header for this class?
111+
class RecordedIncludes {
112+
public:
113+
void add(const Include &);
114+
115+
/// All #includes seen, in the order they appear.
116+
llvm::ArrayRef<Include> all() const { return All; }
117+
118+
/// Determine #includes that match a header (that provides a used symbol).
119+
///
120+
/// Matching is based on the type of Header specified:
121+
/// - for a physical file like /path/to/foo.h, we check Resolved
122+
/// - for a logical file like <vector>, we check Spelled
123+
llvm::SmallVector<const Include *> match(Header H) const;
124+
125+
private:
126+
std::vector<Include> All;
127+
// Lookup structures for match(), values are index into All.
128+
llvm::StringMap<llvm::SmallVector<unsigned>> BySpelling;
129+
llvm::DenseMap<const FileEntry *, llvm::SmallVector<unsigned>> ByFile;
130+
} Includes;
131+
};
132+
91133
} // namespace include_cleaner
92134
} // namespace clang
93135

clang-tools-extra/include-cleaner/include/clang-include-cleaner/Types.h

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#ifndef CLANG_INCLUDE_CLEANER_TYPES_H
2323
#define CLANG_INCLUDE_CLEANER_TYPES_H
2424

25+
#include "clang/Basic/SourceLocation.h"
2526
#include "clang/Tooling/Inclusions/StandardLibrary.h"
2627
#include <memory>
2728
#include <vector>
@@ -32,35 +33,60 @@ class raw_ostream;
3233
namespace clang {
3334
class Decl;
3435
class FileEntry;
36+
class IdentifierInfo;
3537
namespace include_cleaner {
3638

39+
/// We consider a macro to be a different symbol each time it is defined.
40+
struct Macro {
41+
IdentifierInfo *Name;
42+
/// The location of the Name where the macro is defined.
43+
SourceLocation Definition;
44+
45+
bool operator==(const Macro &S) const {
46+
return Definition == S.Definition;
47+
}
48+
};
49+
3750
/// An entity that can be referenced in the code.
3851
struct Symbol {
3952
enum Kind {
4053
/// A canonical clang declaration.
4154
Declaration,
55+
/// A preprocessor macro, as defined in a specific location.
56+
Macro,
4257
/// A recognized symbol from the standard library, like std::string.
4358
Standard,
4459
};
4560

4661
Symbol(const Decl &D) : Storage(&D) {}
62+
Symbol(struct Macro M) : Storage(M) {}
4763
Symbol(tooling::stdlib::Symbol S) : Storage(S) {}
4864

4965
Kind kind() const { return static_cast<Kind>(Storage.index()); }
5066
bool operator==(const Symbol &RHS) const { return Storage == RHS.Storage; }
5167

68+
const Decl &declaration() const { return *std::get<Declaration>(Storage); }
69+
struct Macro macro() const { return std::get<Macro>(Storage); }
5270
tooling::stdlib::Symbol standard() const {
5371
return std::get<Standard>(Storage);
5472
}
55-
const Decl &declaration() const { return *std::get<Declaration>(Storage); }
5673

5774
private:
5875
// FIXME: Add support for macros.
5976
// Order must match Kind enum!
60-
std::variant<const Decl *, tooling::stdlib::Symbol> Storage;
77+
std::variant<const Decl *, struct Macro, tooling::stdlib::Symbol> Storage;
6178
};
6279
llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Symbol &);
6380

81+
/// Indicates that a piece of code refers to a symbol.
82+
struct SymbolReference {
83+
/// The symbol referred to.
84+
Symbol Symbol;
85+
/// The point in the code that refers to the symbol.
86+
SourceLocation RefLocation;
87+
};
88+
llvm::raw_ostream &operator<<(llvm::raw_ostream &, const SymbolReference &);
89+
6490
/// Represents a file that provides some symbol. Might not be includeable, e.g.
6591
/// built-in or main-file itself.
6692
struct Header {
@@ -89,8 +115,17 @@ struct Header {
89115
};
90116
llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Header &);
91117

118+
/// A single #include directive written in the main file.
119+
struct Include {
120+
llvm::StringRef Spelled; // e.g. vector
121+
const FileEntry *Resolved = nullptr; // e.g. /path/to/c++/v1/vector
122+
// nullptr if the header was not found
123+
SourceLocation HashLocation; // of hash in #include <vector>
124+
unsigned Line = 0; // 1-based line number for #include
125+
};
126+
llvm::raw_ostream &operator<<(llvm::raw_ostream &, const Include &);
127+
92128
} // namespace include_cleaner
93129
} // namespace clang
94130

95131
#endif
96-

clang-tools-extra/include-cleaner/lib/Record.cpp

Lines changed: 113 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,91 @@
1212
#include "clang/AST/DeclGroup.h"
1313
#include "clang/Basic/SourceManager.h"
1414
#include "clang/Frontend/CompilerInstance.h"
15+
#include "clang/Lex/MacroInfo.h"
1516
#include "clang/Lex/PPCallbacks.h"
1617
#include "clang/Lex/Preprocessor.h"
1718

1819
namespace clang::include_cleaner {
20+
namespace {
21+
22+
class PPRecorder : public PPCallbacks {
23+
public:
24+
PPRecorder(RecordedPP &Recorded, const Preprocessor &PP)
25+
: Recorded(Recorded), PP(PP), SM(PP.getSourceManager()) {}
26+
27+
void FileChanged(SourceLocation Loc, FileChangeReason Reason,
28+
SrcMgr::CharacteristicKind FileType,
29+
FileID PrevFID) override {
30+
Active = SM.isWrittenInMainFile(Loc);
31+
}
32+
33+
void InclusionDirective(SourceLocation Hash, const Token &IncludeTok,
34+
StringRef SpelledFilename, bool IsAngled,
35+
CharSourceRange FilenameRange,
36+
llvm::Optional<FileEntryRef> File,
37+
StringRef SearchPath, StringRef RelativePath,
38+
const Module *, SrcMgr::CharacteristicKind) override {
39+
if (!Active)
40+
return;
41+
42+
Include I;
43+
I.HashLocation = Hash;
44+
I.Resolved = File ? &File->getFileEntry() : nullptr;
45+
I.Line = SM.getSpellingLineNumber(Hash);
46+
I.Spelled = SpelledFilename;
47+
Recorded.Includes.add(I);
48+
}
49+
50+
void MacroExpands(const Token &MacroName, const MacroDefinition &MD,
51+
SourceRange Range, const MacroArgs *Args) override {
52+
if (!Active)
53+
return;
54+
recordMacroRef(MacroName, *MD.getMacroInfo());
55+
}
56+
57+
void MacroDefined(const Token &MacroName, const MacroDirective *MD) override {
58+
if (!Active)
59+
return;
60+
61+
const auto *MI = MD->getMacroInfo();
62+
// The tokens of a macro definition could refer to a macro.
63+
// Formally this reference isn't resolved until this macro is expanded,
64+
// but we want to treat it as a reference anyway.
65+
for (const auto &Tok : MI->tokens()) {
66+
auto *II = Tok.getIdentifierInfo();
67+
// Could this token be a reference to a macro? (Not param to this macro).
68+
if (!II || !II->hadMacroDefinition() ||
69+
llvm::is_contained(MI->params(), II))
70+
continue;
71+
if (const MacroInfo *MI = PP.getMacroInfo(II))
72+
recordMacroRef(Tok, *MI);
73+
}
74+
}
75+
76+
void MacroUndefined(const Token &MacroName, const MacroDefinition &MD,
77+
const MacroDirective *) override {
78+
if (!Active)
79+
return;
80+
if (const auto *MI = MD.getMacroInfo())
81+
recordMacroRef(MacroName, *MI);
82+
}
83+
84+
private:
85+
void recordMacroRef(const Token &Tok, const MacroInfo &MI) {
86+
if (MI.isBuiltinMacro())
87+
return; // __FILE__ is not a reference.
88+
Recorded.MacroReferences.push_back(
89+
SymbolReference{Macro{Tok.getIdentifierInfo(), MI.getDefinitionLoc()},
90+
Tok.getLocation()});
91+
}
92+
93+
bool Active = false;
94+
RecordedPP &Recorded;
95+
const Preprocessor &PP;
96+
const SourceManager &SM;
97+
};
98+
99+
} // namespace
19100

20101
// FIXME: this is a mirror of clang::clangd::parseIWYUPragma, move to libTooling
21102
// to share the code?
@@ -142,4 +223,36 @@ std::unique_ptr<ASTConsumer> RecordedAST::record() {
142223
return std::make_unique<Recorder>(this);
143224
}
144225

226+
void RecordedPP::RecordedIncludes::add(const Include &I) {
227+
unsigned Index = All.size();
228+
All.push_back(I);
229+
auto BySpellingIt = BySpelling.try_emplace(I.Spelled).first;
230+
All.back().Spelled = BySpellingIt->first(); // Now we own the backing string.
231+
232+
BySpellingIt->second.push_back(Index);
233+
if (I.Resolved)
234+
ByFile[I.Resolved].push_back(Index);
235+
}
236+
237+
llvm::SmallVector<const Include *>
238+
RecordedPP::RecordedIncludes::match(Header H) const {
239+
llvm::SmallVector<const Include *> Result;
240+
switch (H.kind()) {
241+
case Header::Physical:
242+
for (unsigned I : ByFile.lookup(H.physical()))
243+
Result.push_back(&All[I]);
244+
break;
245+
case Header::Standard:
246+
for (unsigned I : BySpelling.lookup(H.standard().name().trim("<>")))
247+
Result.push_back(&All[I]);
248+
break;
249+
}
250+
return Result;
251+
}
252+
253+
std::unique_ptr<PPCallbacks> RecordedPP::record(const Preprocessor &PP) {
254+
return std::make_unique<PPRecorder>(*this, PP);
255+
}
256+
257+
145258
} // namespace clang::include_cleaner

clang-tools-extra/include-cleaner/lib/Types.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
#include "clang-include-cleaner/Types.h"
1010
#include "clang/AST/Decl.h"
1111
#include "clang/Basic/FileEntry.h"
12+
#include "llvm/ADT/StringExtras.h"
1213
#include "llvm/Support/raw_ostream.h"
1314

1415
namespace clang::include_cleaner {
@@ -19,6 +20,8 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Symbol &S) {
1920
if (const auto *ND = llvm::dyn_cast<NamedDecl>(&S.declaration()))
2021
return OS << ND->getNameAsString();
2122
return OS << S.declaration().getDeclKindName();
23+
case Symbol::Macro:
24+
return OS << S.macro().Name;
2225
case Symbol::Standard:
2326
return OS << S.standard().scope() << S.standard().name();
2427
}
@@ -35,4 +38,18 @@ llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Header &H) {
3538
llvm_unreachable("Unhandled Header kind");
3639
}
3740

41+
llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Include &I) {
42+
return OS << I.Line << ": " << I.Spelled << " => "
43+
<< (I.Resolved ? I.Resolved->getName() : "<missing>");
44+
}
45+
46+
llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const SymbolReference &R) {
47+
// We can't decode the Location without SourceManager. Its raw representation
48+
// isn't completely useless (and distinguishes SymbolReference from Symbol).
49+
return OS << R.Symbol << "@0x"
50+
<< llvm::utohexstr(R.RefLocation.getRawEncoding(),
51+
/*Width=*/CHAR_BIT *
52+
sizeof(SourceLocation::UIntTy));
53+
}
54+
3855
} // namespace clang::include_cleaner

0 commit comments

Comments
 (0)