Skip to content

Commit d5226b1

Browse files
committed
[clang][cas] Skip duplicates in forEachFile
If we see the same File or FileList more than once, skip visiting it again. This improves performance if the same module is reached through multiple dependency chains, or the same file is in multiple modules/PCH/TU file lists explicitly. (cherry picked from commit 591fa52) (cherry picked from commit 653e01b)
1 parent e3288df commit d5226b1

File tree

3 files changed

+74
-6
lines changed

3 files changed

+74
-6
lines changed

clang/include/clang/CAS/IncludeTree.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -309,6 +309,10 @@ class IncludeTree::FileList : public IncludeTreeBase<FileList> {
309309
return File(std::move(*Node));
310310
}
311311

312+
llvm::Error
313+
forEachFileImpl(llvm::DenseSet<ObjectRef> &Seen,
314+
llvm::function_ref<llvm::Error(File, FileSizeTy)> Callback);
315+
312316
static bool isValid(const ObjectProxy &Node);
313317
static bool isValid(ObjectStore &CAS, ObjectRef Ref) {
314318
auto Node = CAS.getProxy(Ref);

clang/lib/CAS/IncludeTree.cpp

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -237,27 +237,38 @@ IncludeTree::FileList::getFileSize(size_t I) const {
237237
Data.data() + I * sizeof(FileSizeTy));
238238
}
239239

240-
llvm::Error IncludeTree::FileList::forEachFile(
240+
llvm::Error IncludeTree::FileList::forEachFileImpl(
241+
llvm::DenseSet<ObjectRef> &Seen,
241242
llvm::function_ref<llvm::Error(File, FileSizeTy)> Callback) {
242-
size_t I = 0;
243+
size_t Next = 0;
243244
size_t FileCount = getNumFilesCurrentList();
244245
return forEachReference([&](ObjectRef Ref) -> llvm::Error {
245-
if (I < FileCount) {
246+
size_t Index = Next++;
247+
if (!Seen.insert(Ref).second)
248+
return llvm::Error::success();
249+
250+
if (Index < FileCount) {
246251
auto Include = getFile(Ref);
247252
if (!Include)
248253
return Include.takeError();
249-
return Callback(std::move(*Include), getFileSize(I++));
254+
return Callback(std::move(*Include), getFileSize(Index));
250255
}
256+
251257
// Otherwise, it's a chained FileList.
252-
++I;
253258
auto Proxy = getCAS().getProxy(Ref);
254259
if (!Proxy)
255260
return Proxy.takeError();
256261
FileList FL(std::move(*Proxy));
257-
return FL.forEachFile(Callback);
262+
return FL.forEachFileImpl(Seen, Callback);
258263
});
259264
}
260265

266+
llvm::Error IncludeTree::FileList::forEachFile(
267+
llvm::function_ref<llvm::Error(File, FileSizeTy)> Callback) {
268+
llvm::DenseSet<ObjectRef> Seen;
269+
return forEachFileImpl(Seen, Callback);
270+
}
271+
261272
Expected<IncludeTree::FileList>
262273
IncludeTree::FileList::create(ObjectStore &DB, ArrayRef<FileEntry> Files,
263274
ArrayRef<ObjectRef> FileLists) {

clang/unittests/CAS/IncludeTreeTest.cpp

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -197,6 +197,8 @@ TEST(IncludeTree, IncludeTreeFileList) {
197197
size_t I = 0;
198198
ASSERT_THAT_ERROR(
199199
L->forEachFile([&](IncludeTree::File F, auto Size) -> llvm::Error {
200+
if (I >= Files.size())
201+
return llvm::Error::success(); // diagnosed later.
200202
EXPECT_EQ(F.getFilenameRef(), Files[I].getFilenameRef())
201203
<< "filename mismatch at " << I;
202204
EXPECT_EQ(F.getContentsRef(), Files[I].getContentsRef())
@@ -206,4 +208,55 @@ TEST(IncludeTree, IncludeTreeFileList) {
206208
return llvm::Error::success();
207209
}),
208210
llvm::Succeeded());
211+
EXPECT_EQ(I, Files.size());
212+
}
213+
214+
TEST(IncludeTree, IncludeTreeFileListDuplicates) {
215+
std::shared_ptr<ObjectStore> DB = llvm::cas::createInMemoryCAS();
216+
SmallVector<IncludeTree::File> Files;
217+
for (unsigned I = 0; I < 10; ++I) {
218+
std::optional<IncludeTree::File> File;
219+
std::string Path = "/file" + std::to_string(I);
220+
static constexpr StringRef Bytes = "123456789";
221+
std::optional<ObjectRef> Content;
222+
ASSERT_THAT_ERROR(
223+
DB->storeFromString({}, Bytes.substr(0, I)).moveInto(Content),
224+
llvm::Succeeded());
225+
ASSERT_THAT_ERROR(
226+
IncludeTree::File::create(*DB, Path, *Content).moveInto(File),
227+
llvm::Succeeded());
228+
Files.push_back(std::move(*File));
229+
}
230+
231+
auto MakeFileList = [&](unsigned Begin, unsigned End,
232+
ArrayRef<ObjectRef> Lists) {
233+
SmallVector<IncludeTree::FileList::FileEntry> Entries;
234+
for (; Begin != End; ++Begin)
235+
Entries.push_back({Files[Begin].getRef(), Begin});
236+
return IncludeTree::FileList::create(*DB, Entries, Lists);
237+
};
238+
239+
std::optional<IncludeTree::FileList> L89, L;
240+
ASSERT_THAT_ERROR(MakeFileList(8, 10, {}).moveInto(L89), llvm::Succeeded());
241+
EXPECT_EQ(L89->getNumReferences(), 2u);
242+
ASSERT_THAT_ERROR(
243+
MakeFileList(0, 9, {L89->getRef(), L89->getRef()}).moveInto(L),
244+
llvm::Succeeded());
245+
EXPECT_EQ(L->getNumReferences(), 11u); // 0, 1, ..., 8, {8, 9}, {8, 9}
246+
247+
size_t I = 0;
248+
ASSERT_THAT_ERROR(
249+
L->forEachFile([&](IncludeTree::File F, auto Size) -> llvm::Error {
250+
if (I >= Files.size())
251+
return llvm::Error::success(); // diagnosed later.
252+
EXPECT_EQ(F.getFilenameRef(), Files[I].getFilenameRef())
253+
<< "filename mismatch at " << I;
254+
EXPECT_EQ(F.getContentsRef(), Files[I].getContentsRef())
255+
<< "contents mismatch at " << I;
256+
EXPECT_EQ(Size, I) << "size mismatch at " << I;
257+
I += 1;
258+
return llvm::Error::success();
259+
}),
260+
llvm::Succeeded());
261+
EXPECT_EQ(I, Files.size());
209262
}

0 commit comments

Comments
 (0)