Skip to content

Commit 478863e

Browse files
committed
[clangd] Basic IncludeCleaner support for c/c++ standard library
There are some limitations here, so this is behind a flag for now (in addition to the config setting for the overall feature). - symbols without exactly one associated header aren't handled right - no macro support - referencing std::size_t usually doesn't leave any trace in the AST that the alias in std was used, so we associate with stddef.h instead of cstddef. (An AST issue not specific to stdlib, but much worse there) Differential Revision: https://reviews.llvm.org/D114077
1 parent b9ed95a commit 478863e

File tree

7 files changed

+487
-40
lines changed

7 files changed

+487
-40
lines changed

clang-tools-extra/clangd/Headers.cpp

Lines changed: 161 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -61,10 +61,19 @@ class IncludeStructure::RecordHeaders : public PPCallbacks,
6161
SM.getLineNumber(SM.getFileID(HashLoc), Inc.HashOffset) - 1;
6262
Inc.FileKind = FileKind;
6363
Inc.Directive = IncludeTok.getIdentifierInfo()->getPPKeywordID();
64-
if (File)
65-
Inc.HeaderID = static_cast<unsigned>(Out->getOrCreateID(File));
6664
if (LastPragmaKeepInMainFileLine == Inc.HashLine)
6765
Inc.BehindPragmaKeep = true;
66+
if (File) {
67+
IncludeStructure::HeaderID HID = Out->getOrCreateID(File);
68+
Inc.HeaderID = static_cast<unsigned>(HID);
69+
if (IsAngled)
70+
if (auto StdlibHeader = stdlib::Header::named(Inc.Written)) {
71+
auto &IDs = Out->StdlibHeaders[*StdlibHeader];
72+
// Few physical files for one stdlib header name, linear scan is ok.
73+
if (!llvm::is_contained(IDs, HID))
74+
IDs.push_back(HID);
75+
}
76+
}
6877
}
6978

7079
// Record include graph (not just for main-file includes)
@@ -340,5 +349,155 @@ bool operator==(const Inclusion &LHS, const Inclusion &RHS) {
340349
std::tie(RHS.Directive, RHS.FileKind, RHS.HashOffset, RHS.HashLine,
341350
RHS.Resolved, RHS.Written);
342351
}
352+
353+
namespace stdlib {
354+
static llvm::StringRef *HeaderNames;
355+
static std::pair<llvm::StringRef, llvm::StringRef> *SymbolNames;
356+
static unsigned *SymbolHeaderIDs;
357+
static llvm::DenseMap<llvm::StringRef, unsigned> *HeaderIDs;
358+
// Maps symbol name -> Symbol::ID, within a namespace.
359+
using NSSymbolMap = llvm::DenseMap<llvm::StringRef, unsigned>;
360+
static llvm::DenseMap<llvm::StringRef, NSSymbolMap *> *NamespaceSymbols;
361+
362+
static int initialize() {
363+
unsigned SymCount = 0;
364+
#define SYMBOL(Name, NS, Header) ++SymCount;
365+
#include "CSymbolMap.inc"
366+
#include "StdSymbolMap.inc"
367+
#undef SYMBOL
368+
SymbolNames = new std::remove_reference_t<decltype(*SymbolNames)>[SymCount];
369+
SymbolHeaderIDs =
370+
new std::remove_reference_t<decltype(*SymbolHeaderIDs)>[SymCount];
371+
NamespaceSymbols = new std::remove_reference_t<decltype(*NamespaceSymbols)>;
372+
HeaderIDs = new std::remove_reference_t<decltype(*HeaderIDs)>;
373+
374+
auto AddNS = [&](llvm::StringRef NS) -> NSSymbolMap & {
375+
auto R = NamespaceSymbols->try_emplace(NS, nullptr);
376+
if (R.second)
377+
R.first->second = new NSSymbolMap();
378+
return *R.first->second;
379+
};
380+
381+
auto AddHeader = [&](llvm::StringRef Header) -> unsigned {
382+
return HeaderIDs->try_emplace(Header, HeaderIDs->size()).first->second;
383+
};
384+
385+
auto Add = [&, SymIndex(0)](llvm::StringRef Name, llvm::StringRef NS,
386+
llvm::StringRef HeaderName) mutable {
387+
if (NS == "None")
388+
NS = "";
389+
390+
SymbolNames[SymIndex] = {NS, Name};
391+
SymbolHeaderIDs[SymIndex] = AddHeader(HeaderName);
392+
393+
NSSymbolMap &NSSymbols = AddNS(NS);
394+
NSSymbols.try_emplace(Name, SymIndex);
395+
396+
++SymIndex;
397+
};
398+
#define SYMBOL(Name, NS, Header) Add(#Name, #NS, #Header);
399+
#include "CSymbolMap.inc"
400+
#include "StdSymbolMap.inc"
401+
#undef SYMBOL
402+
403+
HeaderNames = new llvm::StringRef[HeaderIDs->size()];
404+
for (const auto &E : *HeaderIDs)
405+
HeaderNames[E.second] = E.first;
406+
407+
return 0;
408+
}
409+
410+
static void ensureInitialized() {
411+
static int Dummy = initialize();
412+
(void)Dummy;
413+
}
414+
415+
llvm::Optional<Header> Header::named(llvm::StringRef Name) {
416+
ensureInitialized();
417+
auto It = HeaderIDs->find(Name);
418+
if (It == HeaderIDs->end())
419+
return llvm::None;
420+
return Header(It->second);
421+
}
422+
llvm::StringRef Header::name() const { return HeaderNames[ID]; }
423+
llvm::StringRef Symbol::scope() const { return SymbolNames[ID].first; }
424+
llvm::StringRef Symbol::name() const { return SymbolNames[ID].second; }
425+
llvm::Optional<Symbol> Symbol::named(llvm::StringRef Scope,
426+
llvm::StringRef Name) {
427+
ensureInitialized();
428+
if (NSSymbolMap *NSSymbols = NamespaceSymbols->lookup(Scope)) {
429+
auto It = NSSymbols->find(Name);
430+
if (It != NSSymbols->end())
431+
return Symbol(It->second);
432+
}
433+
return llvm::None;
434+
}
435+
Header Symbol::header() const { return Header(SymbolHeaderIDs[ID]); }
436+
llvm::SmallVector<Header> Symbol::headers() const {
437+
return {header()}; // FIXME: multiple in case of ambiguity
438+
}
439+
440+
Recognizer::Recognizer() { ensureInitialized(); }
441+
442+
NSSymbolMap *Recognizer::namespaceSymbols(const NamespaceDecl *D) {
443+
auto It = NamespaceCache.find(D);
444+
if (It != NamespaceCache.end())
445+
return It->second;
446+
447+
NSSymbolMap *Result = [&]() -> NSSymbolMap * {
448+
if (!D) // Nullptr means the global namespace
449+
return NamespaceSymbols->lookup("");
450+
if (D->isAnonymousNamespace())
451+
return nullptr;
452+
if (D->isInlineNamespace()) {
453+
if (auto *Parent = llvm::dyn_cast_or_null<NamespaceDecl>(D->getParent()))
454+
return namespaceSymbols(Parent);
455+
return nullptr;
456+
}
457+
return NamespaceSymbols->lookup(printNamespaceScope(*D));
458+
}();
459+
NamespaceCache.try_emplace(D, Result);
460+
return Result;
461+
}
462+
463+
llvm::Optional<Symbol> Recognizer::operator()(const Decl *D) {
464+
// If D is std::vector::iterator, `vector` is the outer symbol to look up.
465+
// We keep all the candidate DCs as some may turn out to be anon enums.
466+
// Do this resolution lazily as we may turn out not to have a std namespace.
467+
llvm::SmallVector<const DeclContext *> IntermediateDecl;
468+
const DeclContext *DC = D->getDeclContext();
469+
while (DC && !DC->isNamespace()) {
470+
if (NamedDecl::classofKind(DC->getDeclKind()))
471+
IntermediateDecl.push_back(DC);
472+
DC = DC->getParent();
473+
}
474+
NSSymbolMap *Symbols = namespaceSymbols(cast_or_null<NamespaceDecl>(DC));
475+
if (!Symbols)
476+
return llvm::None;
477+
478+
llvm::StringRef Name = [&]() -> llvm::StringRef {
479+
for (const auto *SymDC : llvm::reverse(IntermediateDecl)) {
480+
DeclarationName N = cast<NamedDecl>(SymDC)->getDeclName();
481+
if (const auto *II = N.getAsIdentifierInfo())
482+
return II->getName();
483+
if (!N.isEmpty())
484+
return ""; // e.g. operator<: give up
485+
}
486+
if (const auto *ND = llvm::dyn_cast<NamedDecl>(D))
487+
if (const auto *II = ND->getIdentifier())
488+
return II->getName();
489+
return "";
490+
}();
491+
if (Name.empty())
492+
return llvm::None;
493+
494+
auto It = Symbols->find(Name);
495+
if (It == Symbols->end())
496+
return llvm::None;
497+
return Symbol(It->second);
498+
}
499+
500+
} // namespace stdlib
501+
343502
} // namespace clangd
344503
} // namespace clang

clang-tools-extra/clangd/Headers.h

Lines changed: 106 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,78 @@
3232
#include <string>
3333

3434
namespace clang {
35+
class Decl;
36+
class NamespaceDecl;
3537
namespace clangd {
3638

39+
// clangd has a built-in database of standard library symbols.
40+
namespace stdlib {
41+
42+
// A standard library header, such as <iostream>
43+
// Lightweight class, in fact just an index into a table.
44+
class Header {
45+
public:
46+
static llvm::Optional<Header> named(llvm::StringRef Name);
47+
48+
friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Header &H) {
49+
return OS << H.name();
50+
}
51+
llvm::StringRef name() const;
52+
53+
private:
54+
Header(unsigned ID) : ID(ID) {}
55+
unsigned ID;
56+
friend class Symbol;
57+
friend llvm::DenseMapInfo<Header>;
58+
friend bool operator==(const Header &L, const Header &R) {
59+
return L.ID == R.ID;
60+
}
61+
};
62+
63+
// A top-level standard library symbol, such as std::vector
64+
// Lightweight class, in fact just an index into a table.
65+
class Symbol {
66+
public:
67+
static llvm::Optional<Symbol> named(llvm::StringRef Scope,
68+
llvm::StringRef Name);
69+
70+
friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS, const Symbol &S) {
71+
return OS << S.scope() << S.name();
72+
}
73+
llvm::StringRef scope() const;
74+
llvm::StringRef name() const;
75+
// The preferred header for this symbol (e.g. the suggested insertion).
76+
Header header() const;
77+
// Some symbols may be provided my multiple headers.
78+
llvm::SmallVector<Header> headers() const;
79+
80+
private:
81+
Symbol(unsigned ID) : ID(ID) {}
82+
unsigned ID;
83+
friend class Recognizer;
84+
friend llvm::DenseMapInfo<Symbol>;
85+
friend bool operator==(const Symbol &L, const Symbol &R) {
86+
return L.ID == R.ID;
87+
}
88+
};
89+
90+
// A functor to find the stdlib::Symbol associated with a decl.
91+
//
92+
// For non-top-level decls (std::vector<int>::iterator), returns the top-level
93+
// symbol (std::vector).
94+
class Recognizer {
95+
public:
96+
Recognizer();
97+
llvm::Optional<Symbol> operator()(const Decl *D);
98+
99+
private:
100+
using NSSymbolMap = llvm::DenseMap<llvm::StringRef, unsigned>;
101+
NSSymbolMap *namespaceSymbols(const NamespaceDecl *D);
102+
llvm::DenseMap<const DeclContext *, NSSymbolMap *> NamespaceCache;
103+
};
104+
105+
} // namespace stdlib
106+
37107
/// Returns true if \p Include is literal include like "path" or <path>.
38108
bool isLiteralInclude(llvm::StringRef Include);
39109

@@ -160,6 +230,8 @@ class IncludeStructure {
160230
// Maps HeaderID to the ids of the files included from it.
161231
llvm::DenseMap<HeaderID, SmallVector<HeaderID>> IncludeChildren;
162232

233+
llvm::DenseMap<stdlib::Header, llvm::SmallVector<HeaderID>> StdlibHeaders;
234+
163235
std::vector<Inclusion> MainFileIncludes;
164236

165237
// We reserve HeaderID(0) for the main file and will manually check for that
@@ -250,13 +322,11 @@ namespace llvm {
250322
// Support HeaderIDs as DenseMap keys.
251323
template <> struct DenseMapInfo<clang::clangd::IncludeStructure::HeaderID> {
252324
static inline clang::clangd::IncludeStructure::HeaderID getEmptyKey() {
253-
return static_cast<clang::clangd::IncludeStructure::HeaderID>(
254-
DenseMapInfo<unsigned>::getEmptyKey());
325+
return static_cast<clang::clangd::IncludeStructure::HeaderID>(-1);
255326
}
256327

257328
static inline clang::clangd::IncludeStructure::HeaderID getTombstoneKey() {
258-
return static_cast<clang::clangd::IncludeStructure::HeaderID>(
259-
DenseMapInfo<unsigned>::getTombstoneKey());
329+
return static_cast<clang::clangd::IncludeStructure::HeaderID>(-2);
260330
}
261331

262332
static unsigned
@@ -270,6 +340,38 @@ template <> struct DenseMapInfo<clang::clangd::IncludeStructure::HeaderID> {
270340
}
271341
};
272342

343+
template <> struct DenseMapInfo<clang::clangd::stdlib::Header> {
344+
static inline clang::clangd::stdlib::Header getEmptyKey() {
345+
return clang::clangd::stdlib::Header(-1);
346+
}
347+
static inline clang::clangd::stdlib::Header getTombstoneKey() {
348+
return clang::clangd::stdlib::Header(-2);
349+
}
350+
static unsigned getHashValue(const clang::clangd::stdlib::Header &H) {
351+
return hash_value(H.ID);
352+
}
353+
static bool isEqual(const clang::clangd::stdlib::Header &LHS,
354+
const clang::clangd::stdlib::Header &RHS) {
355+
return LHS == RHS;
356+
}
357+
};
358+
359+
template <> struct DenseMapInfo<clang::clangd::stdlib::Symbol> {
360+
static inline clang::clangd::stdlib::Symbol getEmptyKey() {
361+
return clang::clangd::stdlib::Symbol(-1);
362+
}
363+
static inline clang::clangd::stdlib::Symbol getTombstoneKey() {
364+
return clang::clangd::stdlib::Symbol(-2);
365+
}
366+
static unsigned getHashValue(const clang::clangd::stdlib::Symbol &S) {
367+
return hash_value(S.ID);
368+
}
369+
static bool isEqual(const clang::clangd::stdlib::Symbol &LHS,
370+
const clang::clangd::stdlib::Symbol &RHS) {
371+
return LHS == RHS;
372+
}
373+
};
374+
273375
} // namespace llvm
274376

275377
#endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_HEADERS_H

0 commit comments

Comments
 (0)