Skip to content

Commit 40bc6cd

Browse files
committed
Simplify Clang module collection
1 parent db68225 commit 40bc6cd

File tree

2 files changed

+39
-98
lines changed

2 files changed

+39
-98
lines changed

clang/lib/Driver/ModulesDriver.cpp

Lines changed: 24 additions & 86 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
#include "clang/Tooling/DependencyScanning/DependencyScanningWorker.h"
2929
#include "clang/Tooling/DependencyScanning/ModuleDepCollector.h"
3030
#include "llvm/ADT/DenseMap.h"
31+
#include "llvm/ADT/DenseSet.h"
3132
#include "llvm/ADT/DirectedGraph.h"
3233
#include "llvm/ADT/GraphTraits.h"
3334
#include "llvm/ADT/IntrusiveRefCntPtr.h"
@@ -504,14 +505,15 @@ struct DependencyScanResult {
504505
llvm::SmallVector<std::optional<InputDependencies>> InputDeps;
505506

506507
/// The full Clang module dependenies for this compilation.
507-
SmallVector<std::unique_ptr<ModuleDeps>> ClangModuleDeps;
508+
SmallVector<ModuleDeps, 0> ClangModuleDeps;
508509
};
509510

510511
/// Merges and deterministically orders scan results from multiple threads
511512
/// into a single DependencyScanResult.
512513
class ScanResultCollector {
513514
public:
514-
explicit ScanResultCollector(size_t NumInputs) : InputDeps(NumInputs) {}
515+
explicit ScanResultCollector(size_t NumInputs)
516+
: InputDeps(NumInputs), ClangModuleGraphs(NumInputs) {}
515517

516518
/// Adds the dependency scan result for the input at \c InputIndex.
517519
///
@@ -525,44 +527,15 @@ class ScanResultCollector {
525527
DependencyScanResult takeScanResults();
526528

527529
private:
528-
/// Merges and deterministically orders Clang module dependencies.
529-
class ClangModuleDepsCollector {
530-
public:
531-
void mergeGraph(ModuleDepsGraph &&Graph, size_t InputIndex);
532-
533-
SmallVector<std::unique_ptr<ModuleDeps>> takeOrderedModuleDeps();
534-
535-
private:
536-
/// We need the output of dependency scan to be deterministic. However,
537-
/// the dependency graph may contain two modules with the same name. How
538-
/// do we decide which one to order first? If we made that decision based
539-
/// on the context hash, the ordering would be deterministic, but
540-
/// different across machines. This can happen for example when the inputs
541-
/// or the SDKs (which both contribute to the "context" hash) live in
542-
/// different absolute locations. We solve that by tracking the index of
543-
/// the first input TU that (transitively) imports the dependency, which
544-
/// is always the same for the same input, resulting in deterministic
545-
/// sorting that's also reproducible across machines.
546-
struct IndexedModuleDeps {
547-
size_t FirstImportingInputIndex;
548-
std::unique_ptr<ModuleDeps> ModuleDeps;
549-
};
550-
551-
SmallVector<IndexedModuleDeps> IndexedModuleGraph;
552-
llvm::DenseMap<ModuleID, size_t> ModuleGraphIndexByID;
553-
std::mutex Lock;
554-
};
555-
556530
SmallVector<std::optional<InputDependencies>, 0> InputDeps;
557-
ClangModuleDepsCollector ClangModuleDeps;
531+
llvm::SmallVector<std::vector<ModuleDeps>, 0> ClangModuleGraphs;
558532
};
559533
} // anonymous namespace
560534

561535
void ScanResultCollector::handleTUResult(TranslationUnitDeps &&TUDeps,
562536
bool IsSystem, size_t InputIndex) {
563537
assert(!InputDeps[InputIndex].has_value() &&
564538
"Each slot should be written to at most once.");
565-
566539
InputDeps[InputIndex].emplace();
567540
auto &NewInputDep = InputDeps[InputIndex].value();
568541
NewInputDep.ID = std::move(TUDeps.ID);
@@ -573,61 +546,27 @@ void ScanResultCollector::handleTUResult(TranslationUnitDeps &&TUDeps,
573546
assert(TUDeps.Commands.size() == 1 && "Expected exactly one command");
574547
NewInputDep.BuildArgs = TUDeps.Commands.front().Arguments;
575548

576-
ClangModuleDeps.mergeGraph(std::move(TUDeps.ModuleGraph), InputIndex);
549+
assert(ClangModuleGraphs[InputIndex].empty() &&
550+
"Each slot should be written to at most once.");
551+
ClangModuleGraphs[InputIndex] = std::move(TUDeps.ModuleGraph);
577552
}
578553

579554
DependencyScanResult ScanResultCollector::takeScanResults() {
580-
return {std::move(InputDeps), ClangModuleDeps.takeOrderedModuleDeps()};
581-
}
582-
583-
void ScanResultCollector::ClangModuleDepsCollector::mergeGraph(
584-
ModuleDepsGraph &&ModuleGraph, size_t InputIndex) {
585-
SmallVector<ModuleDeps *> NewModuleDeps;
586-
587-
{
588-
std::scoped_lock<std::mutex> SL(Lock);
555+
DependencyScanResult Out;
589556

557+
// Record each module only once, from its first importing input.
558+
// This keeps the output deterministic.
559+
llvm::DenseSet<ModuleID> AlreadySeen;
560+
for (auto &ModuleGraph : ClangModuleGraphs) {
590561
for (auto &MD : ModuleGraph) {
591-
if (const auto It = ModuleGraphIndexByID.find(MD.ID);
592-
It != ModuleGraphIndexByID.end()) {
593-
auto &FirstImportingInput =
594-
IndexedModuleGraph[It->second].FirstImportingInputIndex;
595-
FirstImportingInput = std::min(FirstImportingInput, InputIndex);
562+
auto [It, Inserted] = AlreadySeen.insert(MD.ID);
563+
if (!Inserted)
596564
continue;
597-
}
598-
599-
const auto GraphIndex = IndexedModuleGraph.size();
600-
ModuleGraphIndexByID.try_emplace(MD.ID, GraphIndex);
601-
602-
auto NewModule = std::make_unique<ModuleDeps>(std::move(MD));
603-
NewModuleDeps.push_back(NewModule.get());
604-
605-
IndexedModuleGraph.push_back(
606-
IndexedModuleDeps{InputIndex, std::move(NewModule)});
565+
Out.ClangModuleDeps.push_back(std::move(MD));
607566
}
608567
}
609568

610-
// First call to \c getBuildArguments is somewhat expensive. Let's call it
611-
// on the current thread (instead of the main one), and outside the
612-
// critical section.
613-
for (const auto *MD : NewModuleDeps)
614-
(void)MD->getBuildArguments();
615-
}
616-
617-
SmallVector<std::unique_ptr<ModuleDeps>>
618-
ScanResultCollector::ClangModuleDepsCollector::takeOrderedModuleDeps() {
619-
// The context hash of a ModuleID can be different across machines.
620-
// We therefore use the first importing input index and module name to sort
621-
// ModuleDeps deterministically.
622-
llvm::sort(IndexedModuleGraph, [](const auto &A, const auto &B) {
623-
return std::tie(A.FirstImportingInputIndex, A.ModuleDeps->ID.ModuleName) <
624-
std::tie(B.FirstImportingInputIndex, B.ModuleDeps->ID.ModuleName);
625-
});
626-
627-
SmallVector<std::unique_ptr<ModuleDeps>> Out;
628-
Out.reserve(IndexedModuleGraph.size());
629-
for (auto &IndexedMD : IndexedModuleGraph)
630-
Out.push_back(std::move(IndexedMD.ModuleDeps));
569+
Out.InputDeps = std::move(InputDeps);
631570
return Out;
632571
}
633572

@@ -639,11 +578,11 @@ class ScanningWorkerPool {
639578
ScanningWorkerPool(size_t NumWorkers, DependencyScanningService &S,
640579
llvm::vfs::FileSystem &FS);
641580

642-
/// Acquires a unique pointer to a dependency scanning worker and its context.
581+
/// Acquires a unique pointer to a dependency scanning worker and its
582+
/// context.
643583
///
644-
/// The worker bundle automatically released back to the pool when the pointer
645-
/// is destroyed.
646-
/// The pool has to outlive the leased worker bundle.
584+
/// The worker bundle automatically released back to the pool when the
585+
/// pointer is destroyed. The pool has to outlive the leased worker bundle.
647586
[[nodiscard]] auto scopedAcquire();
648587

649588
private:
@@ -1063,14 +1002,13 @@ static MDGComponent *makeWithAlloc(llvm::BumpPtrAllocator &Alloc,
10631002
/// Builds all Clang module nodes from \c ClangModuleDeps for \c Graph and
10641003
/// registers them in \c ClangModuleMap.
10651004
static void buildClangModuleNodes(
1066-
ModuleDependencyGraph &Graph,
1067-
ArrayRef<std::unique_ptr<ModuleDeps>> ClangModuleDeps,
1005+
ModuleDependencyGraph &Graph, ArrayRef<ModuleDeps> ClangModuleDeps,
10681006
llvm::DenseMap<ModuleID, ClangModuleMDGNode *> &ClangModuleMap) {
10691007
auto &Alloc = Graph.getAllocator();
10701008
for (auto &M : ClangModuleDeps) {
1071-
auto *Node = makeWithAlloc<ClangModuleMDGNode>(Alloc, *M);
1009+
auto *Node = makeWithAlloc<ClangModuleMDGNode>(Alloc, M);
10721010
Graph.addNode(*Node);
1073-
const auto [It, Inserted] = ClangModuleMap.try_emplace(M->ID, Node);
1011+
const auto [It, Inserted] = ClangModuleMap.try_emplace(M.ID, Node);
10741012
assert(Inserted && "Duplicate Clang module in scan result.");
10751013
}
10761014
}

clang/test/Driver/modules-driver-dep-graph.cpp

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,19 +12,22 @@
1212
// RUN: | sed 's:\\\\\?:/:g' \
1313
// RUN: | FileCheck -DPREFIX=%/t --check-prefixes=CHECK %s
1414

15-
// CHECK: digraph "Module Dependency Graph" {
15+
// CHECK: clang: remark: printing module dependency graph [-Rmodules-driver]
16+
// CHECK-NEXT: digraph "Module Dependency Graph" {
1617
// CHECK-NEXT: label="Module Dependency Graph";
17-
18-
// CHECK: "direct1:[[HASH_DIRECT1:.*]]" [ fillcolor=[[COLOR1:[0-9]+]],label="{ Kind: Clang module | Module name: direct1 | Modulemap file: [[PREFIX]]/module.modulemap | Input origin: User | Hash: [[HASH_DIRECT1]] }"];
19-
// CHECK-NEXT: "direct2:[[HASH_DIRECT2:.*]]" [ fillcolor=[[COLOR1]],label="{ Kind: Clang module | Module name: direct2 | Modulemap file: [[PREFIX]]/module.modulemap | Input origin: User | Hash: [[HASH_DIRECT2]] }"];
20-
// CHECK-NEXT: "root:[[HASH_ROOT:.*]]" [ fillcolor=[[COLOR1]],label="{ Kind: Clang module | Module name: root | Modulemap file: [[PREFIX]]/module.modulemap | Input origin: User | Hash: [[HASH_ROOT]] }"];
21-
// CHECK-NEXT: "transitive1:[[HASH_TRANSITIVE1:.*]]" [ fillcolor=[[COLOR1]],label="{ Kind: Clang module | Module name: transitive1 | Modulemap file: [[PREFIX]]/module.modulemap | Input origin: User | Hash: [[HASH_TRANSITIVE1]] }"];
22-
// CHECK-NEXT: "transitive2:[[HASH_TRANSITIVE2:.*]]" [ fillcolor=[[COLOR1]],label="{ Kind: Clang module | Module name: transitive2 | Modulemap file: [[PREFIX]]/module.modulemap | Input origin: User | Hash: [[HASH_TRANSITIVE2]] }"];
23-
// CHECK-NEXT: "[[PREFIX]]/main.cpp" [ fillcolor=[[COLOR2:[0-9]+]],label="{ Kind: Non-module | Filename: [[PREFIX]]/main.cpp }"];
24-
// CHECK-NEXT: "A" [ fillcolor=[[COLOR3:[0-9]+]],label="{ Kind: C++ named module | Module name: A | Filename: [[PREFIX]]/A.cpp | Input origin: User | Hash: {{.*}} }"];
25-
// CHECK-NEXT: "A:B" [ fillcolor=[[COLOR3]],label="{ Kind: C++ named module | Module name: A:B | Filename: [[PREFIX]]/A-B.cpp | Input origin: User | Hash: {{.*}} }"];
26-
// CHECK-NEXT: "A:C" [ fillcolor=[[COLOR3]],label="{ Kind: C++ named module | Module name: A:C | Filename: [[PREFIX]]/A-C.cpp | Input origin: User | Hash: {{.*}} }"];
27-
// CHECK-NEXT: "B" [ fillcolor=[[COLOR3]],label="{ Kind: C++ named module | Module name: B | Filename: [[PREFIX]]/B.cpp | Input origin: User | Hash: {{.*}} }"];
18+
// CHECK-NEXT: node [colorscheme={{.*}},style=filled,shape=Mrecord];
19+
// CHECK-NEXT: edge [dir="back"];
20+
21+
// CHECK: "transitive1:[[HASH_TRANSITIVE1:.*]]" [ fillcolor=1,label="{ Kind: Clang module | Module name: transitive1 | Modulemap file: [[PREFIX]]/module.modulemap | Input origin: User | Hash: [[HASH_TRANSITIVE1]] }"];
22+
// CHECK-NEXT: "transitive2:[[HASH_TRANSITIVE2:.*]]" [ fillcolor=1,label="{ Kind: Clang module | Module name: transitive2 | Modulemap file: [[PREFIX]]/module.modulemap | Input origin: User | Hash: [[HASH_TRANSITIVE2]] }"];
23+
// CHECK-NEXT: "direct1:[[HASH_DIRECT1:.*]]" [ fillcolor=1,label="{ Kind: Clang module | Module name: direct1 | Modulemap file: [[PREFIX]]/module.modulemap | Input origin: User | Hash: [[HASH_DIRECT1]] }"];
24+
// CHECK-NEXT: "direct2:[[HASH_DIRECT2:.*]]" [ fillcolor=1,label="{ Kind: Clang module | Module name: direct2 | Modulemap file: [[PREFIX]]/module.modulemap | Input origin: User | Hash: [[HASH_DIRECT2]] }"];
25+
// CHECK-NEXT: "root:[[HASH_ROOT:.*]]" [ fillcolor=1,label="{ Kind: Clang module | Module name: root | Modulemap file: [[PREFIX]]/module.modulemap | Input origin: User | Hash: [[HASH_ROOT]] }"];
26+
// CHECK-NEXT: "[[PREFIX]]/main.cpp" [ fillcolor=3,label="{ Kind: Non-module | Filename: [[PREFIX]]/main.cpp }"];
27+
// CHECK-NEXT: "A" [ fillcolor=2,label="{ Kind: C++ named module | Module name: A | Filename: [[PREFIX]]/A.cpp | Input origin: User | Hash: {{.*}} }"];
28+
// CHECK-NEXT: "A:B" [ fillcolor=2,label="{ Kind: C++ named module | Module name: A:B | Filename: [[PREFIX]]/A-B.cpp | Input origin: User | Hash: {{.*}} }"];
29+
// CHECK-NEXT: "A:C" [ fillcolor=2,label="{ Kind: C++ named module | Module name: A:C | Filename: [[PREFIX]]/A-C.cpp | Input origin: User | Hash: {{.*}} }"];
30+
// CHECK-NEXT: "B" [ fillcolor=2,label="{ Kind: C++ named module | Module name: B | Filename: [[PREFIX]]/B.cpp | Input origin: User | Hash: {{.*}} }"];
2831

2932
// CHECK: "direct1:[[HASH_DIRECT1]]" -> "transitive1:[[HASH_TRANSITIVE1]]";
3033
// CHECK-NEXT: "direct1:[[HASH_DIRECT1]]" -> "transitive2:[[HASH_TRANSITIVE2]]";

0 commit comments

Comments
 (0)