Skip to content

Commit 5d9a934

Browse files
committed
[clang][cas] Switch to chained representation of IncludeTree::FileList
Instead of flattening file lists when importing PCH/modules, just create a chained reference to the list and flatten it only in FileList::forEachFile. This reduces the storage cost and performance overhead of merging file lists. (cherry picked from commit ee7ddda) (cherry picked from commit 2fbfa49)
1 parent 29f4935 commit 5d9a934

File tree

4 files changed

+163
-84
lines changed

4 files changed

+163
-84
lines changed

clang/include/clang/CAS/IncludeTree.h

Lines changed: 8 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -264,25 +264,15 @@ class IncludeTree::File : public IncludeTreeBase<File> {
264264
}
265265
};
266266

267-
/// A flat list of \c File entries. This is used along with a simple
268-
/// implementation of a \p vfs::FileSystem produced via
269-
/// \p createIncludeTreeFileSystem().
267+
/// A list of \c File entries. Multiple \c FileList can be combined without
268+
/// copying their contents. This is used along with a simple implementation of a
269+
/// \p vfs::FileSystem produced via \p createIncludeTreeFileSystem().
270270
class IncludeTree::FileList : public IncludeTreeBase<FileList> {
271271
public:
272272
static constexpr StringRef getNodeKind() { return "List"; }
273273

274274
using FileSizeTy = uint32_t;
275275

276-
size_t getNumFiles() const { return getNumReferences(); }
277-
278-
ObjectRef getFileRef(size_t I) const {
279-
assert(I < getNumFiles());
280-
return getReference(I);
281-
}
282-
283-
Expected<File> getFile(size_t I) { return getFile(getFileRef(I)); }
284-
FileSizeTy getFileSize(size_t I) const;
285-
286276
/// \returns each \c File entry along with its file size.
287277
llvm::Error
288278
forEachFile(llvm::function_ref<llvm::Error(File, FileSizeTy)> Callback);
@@ -294,7 +284,8 @@ class IncludeTree::FileList : public IncludeTreeBase<FileList> {
294284
ObjectRef FileRef;
295285
FileSizeTy Size;
296286
};
297-
static Expected<FileList> create(ObjectStore &DB, ArrayRef<FileEntry> Files);
287+
static Expected<FileList> create(ObjectStore &DB, ArrayRef<FileEntry> Files,
288+
ArrayRef<ObjectRef> FileLists);
298289

299290
static Expected<FileList> get(ObjectStore &CAS, ObjectRef Ref);
300291

@@ -308,6 +299,9 @@ class IncludeTree::FileList : public IncludeTreeBase<FileList> {
308299
assert(isValid(*this));
309300
}
310301

302+
size_t getNumFilesCurrentList() const;
303+
FileSizeTy getFileSize(size_t I) const;
304+
311305
Expected<File> getFile(ObjectRef Ref) {
312306
auto Node = getCAS().getProxy(Ref);
313307
if (!Node)
@@ -744,26 +738,4 @@ createIncludeTreeFileSystem(IncludeTreeRoot &Root);
744738
} // namespace cas
745739
} // namespace clang
746740

747-
namespace llvm {
748-
template <> struct DenseMapInfo<clang::cas::IncludeTree::FileList::FileEntry> {
749-
using FileEntry = clang::cas::IncludeTree::FileList::FileEntry;
750-
751-
static FileEntry getEmptyKey() {
752-
return {cas::ObjectRef::getDenseMapEmptyKey(), 0};
753-
}
754-
755-
static FileEntry getTombstoneKey() {
756-
return {cas::ObjectRef::getDenseMapTombstoneKey(), 0};
757-
}
758-
759-
static unsigned getHashValue(FileEntry F) {
760-
return F.FileRef.getDenseMapHash();
761-
}
762-
763-
static bool isEqual(FileEntry LHS, FileEntry RHS) {
764-
return LHS.FileRef == RHS.FileRef && LHS.Size == RHS.Size;
765-
}
766-
};
767-
} // namespace llvm
768-
769741
#endif

clang/lib/CAS/IncludeTree.cpp

Lines changed: 66 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7,8 +7,11 @@
77
//===----------------------------------------------------------------------===//
88

99
#include "clang/CAS/IncludeTree.h"
10+
#include "llvm/ADT/ArrayRef.h"
1011
#include "llvm/ADT/SmallBitVector.h"
1112
#include "llvm/Support/EndianStream.h"
13+
#include "llvm/Support/Error.h"
14+
#include <utility>
1215

1316
using namespace clang;
1417
using namespace clang::cas;
@@ -220,10 +223,15 @@ IncludeTree::ModuleImport::create(ObjectStore &DB, StringRef ModuleName,
220223
return IncludeTreeBase::create(DB, {}, Buffer);
221224
}
222225

226+
size_t IncludeTree::FileList::getNumFilesCurrentList() const {
227+
return llvm::support::endian::read<uint32_t, llvm::support::little>(
228+
getData().data());
229+
}
230+
223231
IncludeTree::FileList::FileSizeTy
224232
IncludeTree::FileList::getFileSize(size_t I) const {
225-
assert(I < getNumFiles());
226-
StringRef Data = getData();
233+
assert(I < getNumFilesCurrentList());
234+
StringRef Data = getData().drop_front(sizeof(uint32_t));
227235
assert(Data.size() >= (I + 1) * sizeof(FileSizeTy));
228236
return llvm::support::endian::read<FileSizeTy, llvm::support::little>(
229237
Data.data() + I * sizeof(FileSizeTy));
@@ -232,29 +240,44 @@ IncludeTree::FileList::getFileSize(size_t I) const {
232240
llvm::Error IncludeTree::FileList::forEachFile(
233241
llvm::function_ref<llvm::Error(File, FileSizeTy)> Callback) {
234242
size_t I = 0;
243+
size_t FileCount = getNumFilesCurrentList();
235244
return forEachReference([&](ObjectRef Ref) -> llvm::Error {
236-
auto Include = getFile(Ref);
237-
if (!Include)
238-
return Include.takeError();
239-
return Callback(std::move(*Include), getFileSize(I++));
245+
if (I < FileCount) {
246+
auto Include = getFile(Ref);
247+
if (!Include)
248+
return Include.takeError();
249+
return Callback(std::move(*Include), getFileSize(I++));
250+
}
251+
// Otherwise, it's a chained FileList.
252+
++I;
253+
auto Proxy = getCAS().getProxy(Ref);
254+
if (!Proxy)
255+
return Proxy.takeError();
256+
FileList FL(std::move(*Proxy));
257+
return FL.forEachFile(Callback);
240258
});
241259
}
242260

243261
Expected<IncludeTree::FileList>
244-
IncludeTree::FileList::create(ObjectStore &DB, ArrayRef<FileEntry> Files) {
262+
IncludeTree::FileList::create(ObjectStore &DB, ArrayRef<FileEntry> Files,
263+
ArrayRef<ObjectRef> FileLists) {
245264
SmallVector<ObjectRef, 16> Refs;
246-
Refs.reserve(Files.size());
265+
Refs.reserve(Files.size() + FileLists.size());
247266
SmallString<256> Buffer;
248-
Buffer.reserve(Files.size() * sizeof(FileSizeTy));
267+
Buffer.reserve(sizeof(uint32_t) + Files.size() * sizeof(FileSizeTy));
249268

250269
llvm::raw_svector_ostream BufOS(Buffer);
251270
llvm::support::endian::Writer Writer(BufOS, llvm::support::little);
271+
Writer.write(static_cast<uint32_t>(Files.size()));
252272

253273
for (const FileEntry &Entry : Files) {
254274
assert(File::isValid(DB, Entry.FileRef));
255275
Refs.push_back(Entry.FileRef);
256276
Writer.write(Entry.Size);
257277
}
278+
279+
Refs.append(FileLists.begin(), FileLists.end());
280+
258281
return IncludeTreeBase::create(DB, Refs, Buffer);
259282
}
260283

@@ -273,9 +296,13 @@ bool IncludeTree::FileList::isValid(const ObjectProxy &Node) {
273296
if (!IncludeTreeBase::isValid(Node))
274297
return false;
275298
IncludeTreeBase Base(Node);
276-
unsigned NumFiles = Base.getNumReferences();
277-
return NumFiles != 0 &&
278-
Base.getData().size() == NumFiles * sizeof(FileSizeTy);
299+
StringRef Data = Base.getData();
300+
if (Data.size() < sizeof(uint32_t))
301+
return false;
302+
unsigned NumFiles =
303+
llvm::support::endian::read<uint32_t, llvm::support::little>(Data.data());
304+
return NumFiles != 0 && NumFiles <= Base.getNumReferences() &&
305+
Data.size() == sizeof(uint32_t) + NumFiles * sizeof(FileSizeTy);
279306
}
280307

281308
static constexpr char ModuleFlagFramework = 1 << 0;
@@ -845,20 +872,47 @@ class IncludeTreeFileSystem : public llvm::vfs::FileSystem {
845872
};
846873
} // namespace
847874

875+
static llvm::Error diagnoseFileChange(IncludeTree::File F, ObjectRef Content) {
876+
auto FilenameBlob = F.getFilename();
877+
if (!FilenameBlob)
878+
return FilenameBlob.takeError();
879+
cas::ObjectStore &DB = F.getCAS();
880+
std::string Filename(FilenameBlob->getData());
881+
std::string OldID = DB.getID(Content).toString();
882+
std::string NewID = DB.getID(F.getContentsRef()).toString();
883+
return llvm::createStringError(llvm::inconvertibleErrorCode(),
884+
"file '%s' changed during build; include-tree "
885+
"contents changed from %s to %s",
886+
Filename.c_str(), OldID.c_str(),
887+
NewID.c_str());
888+
}
889+
848890
Expected<IntrusiveRefCntPtr<llvm::vfs::FileSystem>>
849891
cas::createIncludeTreeFileSystem(IncludeTreeRoot &Root) {
850892
auto FileList = Root.getFileList();
851893
if (!FileList)
852894
return FileList.takeError();
853895

896+
// Map from FilenameRef to ContentsRef.
897+
llvm::DenseMap<ObjectRef, ObjectRef> SeenContents;
898+
854899
IntrusiveRefCntPtr<IncludeTreeFileSystem> IncludeTreeFS =
855900
new IncludeTreeFileSystem(Root.getCAS());
856901
llvm::Error E = FileList->forEachFile(
857902
[&](IncludeTree::File File,
858903
IncludeTree::FileList::FileSizeTy Size) -> llvm::Error {
904+
auto InsertPair = SeenContents.insert(
905+
std::make_pair(File.getFilenameRef(), File.getContentsRef()));
906+
if (!InsertPair.second) {
907+
if (InsertPair.first->second != File.getContentsRef())
908+
return diagnoseFileChange(File, InsertPair.first->second);
909+
return llvm::Error::success();
910+
}
911+
859912
auto FilenameBlob = File.getFilename();
860913
if (!FilenameBlob)
861914
return FilenameBlob.takeError();
915+
862916
SmallString<128> Filename(FilenameBlob->getData());
863917
// Strip './' in the filename to match the behaviour of ASTWriter; we
864918
// also strip './' in IncludeTreeFileSystem::getPath.

clang/lib/Tooling/DependencyScanning/IncludeTreeActionController.cpp

Lines changed: 20 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -129,7 +129,8 @@ class IncludeTreeBuilder {
129129
// are recorded in the PCH, ordered by \p FileEntry::UID index.
130130
SmallVector<StringRef> PreIncludedFileNames;
131131
llvm::BitVector SeenIncludeFiles;
132-
llvm::SetVector<cas::IncludeTree::FileList::FileEntry> IncludedFiles;
132+
SmallVector<cas::IncludeTree::FileList::FileEntry> IncludedFiles;
133+
SmallVector<cas::ObjectRef> IncludedFileLists;
133134
Optional<cas::ObjectRef> PredefinesBufferRef;
134135
Optional<cas::ObjectRef> ModuleIncludesBufferRef;
135136
Optional<cas::ObjectRef> ModuleMapRef;
@@ -454,9 +455,23 @@ void IncludeTreeBuilder::handleHasIncludeCheck(Preprocessor &PP, bool Result) {
454455
IncludeStack.back().HasIncludeChecks.push_back(Result);
455456
}
456457

458+
// FIXME: duplicates code in PPDirectives
459+
static bool isForModuleBuilding(const Module *M, StringRef CurrentModule,
460+
StringRef ModuleName) {
461+
StringRef TopLevelName = M->getTopLevelModuleName();
462+
463+
// When building framework Foo, we wanna make sure that Foo *and* Foo_Private
464+
// are textually included and no modules are built for both.
465+
if (M->getTopLevelModule()->IsFramework && CurrentModule == ModuleName &&
466+
!CurrentModule.endswith("_Private") && TopLevelName.endswith("_Private"))
467+
TopLevelName = TopLevelName.drop_back(8);
468+
469+
return TopLevelName == CurrentModule;
470+
}
471+
457472
void IncludeTreeBuilder::moduleImport(Preprocessor &PP, const Module *M,
458473
SourceLocation EndLoc) {
459-
bool VisibilityOnly = M->isForBuilding(PP.getLangOpts());
474+
bool VisibilityOnly = isForModuleBuilding(M, PP.getLangOpts().CurrentModule, PP.getLangOpts().ModuleName);
460475
auto Import = check(cas::IncludeTree::ModuleImport::create(
461476
DB, M->getFullModuleName(), VisibilityOnly));
462477
if (!Import)
@@ -655,7 +670,7 @@ IncludeTreeBuilder::finishIncludeTree(CompilerInstance &ScanInstance,
655670
}
656671

657672
auto FileList =
658-
cas::IncludeTree::FileList::create(DB, IncludedFiles.getArrayRef());
673+
cas::IncludeTree::FileList::create(DB, IncludedFiles, IncludedFileLists);
659674
if (!FileList)
660675
return FileList.takeError();
661676

@@ -681,16 +696,8 @@ Error IncludeTreeBuilder::addModuleInputs(ASTReader &Reader) {
681696
Optional<cas::IncludeTreeRoot> Root;
682697
if (Error E = cas::IncludeTreeRoot::get(DB, *Ref).moveInto(Root))
683698
return E;
684-
Optional<cas::IncludeTree::FileList> Files;
685-
if (Error E = Root->getFileList().moveInto(Files))
686-
return E;
687699

688-
Error E = Files->forEachFile([&](auto IF, auto Size) -> Error {
689-
IncludedFiles.insert({IF.getRef(), Size});
690-
return Error::success();
691-
});
692-
if (E)
693-
return E;
700+
IncludedFileLists.push_back(Root->getFileListRef());
694701
}
695702

696703
return Error::success();
@@ -771,7 +778,7 @@ IncludeTreeBuilder::addToFileList(FileManager &FM, const FileEntry *FE) {
771778
auto FileNode = createIncludeFile(Filename, **CASContents);
772779
if (!FileNode)
773780
return FileNode.takeError();
774-
IncludedFiles.insert(
781+
IncludedFiles.push_back(
775782
{FileNode->getRef(),
776783
static_cast<cas::IncludeTree::FileList::FileSizeTy>(FE->getSize())});
777784
return FileNode->getRef();

clang/unittests/CAS/IncludeTreeTest.cpp

Lines changed: 69 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
#include "llvm/CAS/CASProvidingFileSystem.h"
55
#include "llvm/CAS/CachingOnDiskFileSystem.h"
66
#include "llvm/CAS/ObjectStore.h"
7+
#include "llvm/Support/Error.h"
78
#include "llvm/Testing/Support/Error.h"
89
#include "gtest/gtest.h"
910

@@ -135,29 +136,74 @@ TEST(IncludeTree, IncludeTreeScan) {
135136

136137
Optional<IncludeTree::FileList> FileList;
137138
ASSERT_THAT_ERROR(Root->getFileList().moveInto(FileList), llvm::Succeeded());
138-
ASSERT_EQ(FileList->getNumFiles(), size_t(4));
139-
{
140-
Optional<IncludeTree::File> File;
141-
ASSERT_THAT_ERROR(FileList->getFile(0).moveInto(File), llvm::Succeeded());
142-
EXPECT_EQ(File->getRef(), MainFile->getRef());
143-
EXPECT_EQ(FileList->getFileSize(0), MainContents.size());
144-
}
145-
{
146-
Optional<IncludeTree::File> File;
147-
ASSERT_THAT_ERROR(FileList->getFile(1).moveInto(File), llvm::Succeeded());
148-
EXPECT_EQ(File->getRef(), A1File->getRef());
149-
EXPECT_EQ(FileList->getFileSize(1), A1Contents.size());
150-
}
151-
{
152-
Optional<IncludeTree::File> File;
153-
ASSERT_THAT_ERROR(FileList->getFile(2).moveInto(File), llvm::Succeeded());
154-
EXPECT_EQ(File->getRef(), B1File->getRef());
155-
EXPECT_EQ(FileList->getFileSize(2), IncludeTree::FileList::FileSizeTy(0));
156-
}
157-
{
139+
140+
SmallVector<std::pair<IncludeTree::File, IncludeTree::FileList::FileSizeTy>>
141+
Files;
142+
ASSERT_THAT_ERROR(FileList->forEachFile([&](auto F, auto S) -> llvm::Error {
143+
Files.push_back({F, S});
144+
return llvm::Error::success();
145+
}),
146+
llvm::Succeeded());
147+
148+
ASSERT_EQ(Files.size(), size_t(4));
149+
EXPECT_EQ(Files[0].first.getRef(), MainFile->getRef());
150+
EXPECT_EQ(Files[0].second, MainContents.size());
151+
EXPECT_EQ(Files[1].first.getRef(), A1File->getRef());
152+
EXPECT_EQ(Files[1].second, A1Contents.size());
153+
EXPECT_EQ(Files[2].first.getRef(), B1File->getRef());
154+
EXPECT_EQ(Files[2].second, IncludeTree::FileList::FileSizeTy(0));
155+
EXPECT_EQ(Files[3].first.getRef(), SysFile->getRef());
156+
EXPECT_EQ(Files[3].second, IncludeTree::FileList::FileSizeTy(0));
157+
}
158+
159+
TEST(IncludeTree, IncludeTreeFileList) {
160+
std::shared_ptr<ObjectStore> DB = llvm::cas::createInMemoryCAS();
161+
SmallVector<IncludeTree::File> Files;
162+
for (unsigned I = 0; I < 10; ++I) {
158163
Optional<IncludeTree::File> File;
159-
ASSERT_THAT_ERROR(FileList->getFile(3).moveInto(File), llvm::Succeeded());
160-
EXPECT_EQ(File->getRef(), SysFile->getRef());
161-
EXPECT_EQ(FileList->getFileSize(3), IncludeTree::FileList::FileSizeTy(0));
164+
std::string Path = "/file" + std::to_string(I);
165+
static constexpr StringRef Bytes = "123456789";
166+
Optional<ObjectRef> Content;
167+
ASSERT_THAT_ERROR(
168+
DB->storeFromString({}, Bytes.substr(0, I)).moveInto(Content),
169+
llvm::Succeeded());
170+
ASSERT_THAT_ERROR(
171+
IncludeTree::File::create(*DB, Path, *Content).moveInto(File),
172+
llvm::Succeeded());
173+
Files.push_back(std::move(*File));
162174
}
175+
176+
auto MakeFileList = [&](unsigned Begin, unsigned End,
177+
ArrayRef<ObjectRef> Lists) {
178+
SmallVector<IncludeTree::FileList::FileEntry> Entries;
179+
for (; Begin != End; ++Begin)
180+
Entries.push_back({Files[Begin].getRef(), Begin});
181+
return IncludeTree::FileList::create(*DB, Entries, Lists);
182+
};
183+
184+
Optional<IncludeTree::FileList> L89, L7, L29, L;
185+
ASSERT_THAT_ERROR(MakeFileList(8, 10, {}).moveInto(L89), llvm::Succeeded());
186+
EXPECT_EQ(L89->getNumReferences(), 2u);
187+
ASSERT_THAT_ERROR(MakeFileList(7, 8, {}).moveInto(L7), llvm::Succeeded());
188+
EXPECT_EQ(L7->getNumReferences(), 1u);
189+
ASSERT_THAT_ERROR(
190+
MakeFileList(2, 7, {L7->getRef(), L89->getRef()}).moveInto(L29),
191+
llvm::Succeeded());
192+
EXPECT_EQ(L29->getNumReferences(), 7u); // 2,3,4,5,6, {7}, {8, 9}
193+
ASSERT_THAT_ERROR(MakeFileList(0, 2, {L29->getRef()}).moveInto(L),
194+
llvm::Succeeded());
195+
EXPECT_EQ(L->getNumReferences(), 3u); // 0, 1, {2, ...}
196+
197+
size_t I = 0;
198+
ASSERT_THAT_ERROR(
199+
L->forEachFile([&](IncludeTree::File F, auto Size) -> llvm::Error {
200+
EXPECT_EQ(F.getFilenameRef(), Files[I].getFilenameRef())
201+
<< "filename mismatch at " << I;
202+
EXPECT_EQ(F.getContentsRef(), Files[I].getContentsRef())
203+
<< "contents mismatch at " << I;
204+
EXPECT_EQ(Size, I) << "size mismatch at " << I;
205+
I += 1;
206+
return llvm::Error::success();
207+
}),
208+
llvm::Succeeded());
163209
}

0 commit comments

Comments
 (0)