Skip to content

Commit 5cda242

Browse files
authored
[LLD][COFF] Add more --time-trace tags for ThinLTO linking (#156471)
In order to better see what's going on during ThinLTO linking, this PR adds more profile tags when using `--time-trace` on a `lld-link.exe` invocation. After PR, linking `clang.exe`: <img width="3839" height="2026" alt="Capture d’écran 2025-09-02 082021" src="https://github.com/user-attachments/assets/bf0c85ba-2f85-4bbf-a5c1-800039b56910" /> Linking a custom (Unreal Engine game) binary gives a completly different picture, probably because of using Unity files, and the sheer amount of input files (here, providing over 60 GB of .OBJs/.LIBs). <img width="1940" height="1008" alt="Capture d’écran 2025-09-02 102048" src="https://github.com/user-attachments/assets/60b28630-7995-45ce-9e8c-13f3cb5312e0" />
1 parent a549e73 commit 5cda242

File tree

12 files changed

+166
-107
lines changed

12 files changed

+166
-107
lines changed

lld/COFF/LTO.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
#include "llvm/Support/Caching.h"
2727
#include "llvm/Support/CodeGen.h"
2828
#include "llvm/Support/MemoryBuffer.h"
29+
#include "llvm/Support/TimeProfiler.h"
2930
#include "llvm/Support/raw_ostream.h"
3031
#include <cstddef>
3132
#include <memory>
@@ -176,6 +177,7 @@ void BitcodeCompiler::add(BitcodeFile &f) {
176177
// Merge all the bitcode files we have seen, codegen the result
177178
// and return the resulting objects.
178179
std::vector<InputFile *> BitcodeCompiler::compile() {
180+
llvm::TimeTraceScope timeScope("Bitcode compile");
179181
unsigned maxTasks = ltoObj->getMaxTasks();
180182
buf.resize(maxTasks);
181183
files.resize(maxTasks);

lld/COFF/SymbolTable.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1437,11 +1437,13 @@ void SymbolTable::compileBitcodeFiles() {
14371437
if (bitcodeFileInstances.empty())
14381438
return;
14391439

1440-
llvm::TimeTraceScope timeScope("Compile bitcode");
14411440
ScopedTimer t(ctx.ltoTimer);
14421441
lto.reset(new BitcodeCompiler(ctx));
1443-
for (BitcodeFile *f : bitcodeFileInstances)
1444-
lto->add(*f);
1442+
{
1443+
llvm::TimeTraceScope addScope("Add bitcode file instances");
1444+
for (BitcodeFile *f : bitcodeFileInstances)
1445+
lto->add(*f);
1446+
}
14451447
for (InputFile *newObj : lto->compile()) {
14461448
ObjFile *obj = cast<ObjFile>(newObj);
14471449
obj->parse();

llvm/lib/Bitcode/Reader/MetadataLoader.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
#include "llvm/Support/CommandLine.h"
4444
#include "llvm/Support/Compiler.h"
4545
#include "llvm/Support/ErrorHandling.h"
46+
#include "llvm/Support/TimeProfiler.h"
4647

4748
#include <algorithm>
4849
#include <cassert>
@@ -1052,6 +1053,7 @@ void MetadataLoader::MetadataLoaderImpl::callMDTypeCallback(Metadata **Val,
10521053
/// Parse a METADATA_BLOCK. If ModuleLevel is true then we are parsing
10531054
/// module level metadata.
10541055
Error MetadataLoader::MetadataLoaderImpl::parseMetadata(bool ModuleLevel) {
1056+
llvm::TimeTraceScope timeScope("Parse metadata");
10551057
if (!ModuleLevel && MetadataList.hasFwdRefs())
10561058
return error("Invalid metadata: fwd refs into function blocks");
10571059

llvm/lib/IR/AutoUpgrade.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
#include "llvm/Support/ErrorHandling.h"
4949
#include "llvm/Support/NVPTXAddrSpace.h"
5050
#include "llvm/Support/Regex.h"
51+
#include "llvm/Support/TimeProfiler.h"
5152
#include "llvm/TargetParser/Triple.h"
5253
#include <cstdint>
5354
#include <cstring>
@@ -5256,6 +5257,7 @@ bool llvm::UpgradeDebugInfo(Module &M) {
52565257
if (DisableAutoUpgradeDebugInfo)
52575258
return false;
52585259

5260+
llvm::TimeTraceScope timeScope("Upgrade debug info");
52595261
// We need to get metadata before the module is verified (i.e., getModuleFlag
52605262
// makes assumptions that we haven't verified yet). Carefully extract the flag
52615263
// from the metadata.

llvm/lib/IR/DebugInfo.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636
#include "llvm/IR/Module.h"
3737
#include "llvm/IR/PassManager.h"
3838
#include "llvm/Support/Casting.h"
39+
#include "llvm/Support/TimeProfiler.h"
3940
#include <algorithm>
4041
#include <cassert>
4142
#include <optional>
@@ -563,6 +564,7 @@ bool llvm::stripDebugInfo(Function &F) {
563564
}
564565

565566
bool llvm::StripDebugInfo(Module &M) {
567+
llvm::TimeTraceScope timeScope("Strip debug info");
566568
bool Changed = false;
567569

568570
for (NamedMDNode &NMD : llvm::make_early_inc_range(M.named_metadata())) {

llvm/lib/IR/Module.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@
4444
#include "llvm/Support/MemoryBuffer.h"
4545
#include "llvm/Support/Path.h"
4646
#include "llvm/Support/RandomNumberGenerator.h"
47+
#include "llvm/Support/TimeProfiler.h"
4748
#include "llvm/Support/VersionTuple.h"
4849
#include <cassert>
4950
#include <cstdint>
@@ -478,6 +479,7 @@ Error Module::materializeAll() {
478479
}
479480

480481
Error Module::materializeMetadata() {
482+
llvm::TimeTraceScope timeScope("Materialize metadata");
481483
if (!Materializer)
482484
return Error::success();
483485
return Materializer->materializeMetadata();

llvm/lib/IR/Verifier.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -119,6 +119,7 @@
119119
#include "llvm/Support/ErrorHandling.h"
120120
#include "llvm/Support/MathExtras.h"
121121
#include "llvm/Support/ModRef.h"
122+
#include "llvm/Support/TimeProfiler.h"
122123
#include "llvm/Support/raw_ostream.h"
123124
#include <algorithm>
124125
#include <cassert>
@@ -399,6 +400,7 @@ class Verifier : public InstVisitor<Verifier>, VerifierSupport {
399400
bool hasBrokenDebugInfo() const { return BrokenDebugInfo; }
400401

401402
bool verify(const Function &F) {
403+
llvm::TimeTraceScope timeScope("Verifier");
402404
assert(F.getParent() == &M &&
403405
"An instance of this class only works with a specific module!");
404406

@@ -2822,6 +2824,7 @@ static Instruction *getSuccPad(Instruction *Terminator) {
28222824
}
28232825

28242826
void Verifier::verifySiblingFuncletUnwinds() {
2827+
llvm::TimeTraceScope timeScope("Verifier verify sibling funclet unwinds");
28252828
SmallPtrSet<Instruction *, 8> Visited;
28262829
SmallPtrSet<Instruction *, 8> Active;
28272830
for (const auto &Pair : SiblingFuncletInfo) {

llvm/lib/LTO/LTO.cpp

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -631,6 +631,7 @@ LTO::~LTO() = default;
631631
void LTO::addModuleToGlobalRes(ArrayRef<InputFile::Symbol> Syms,
632632
ArrayRef<SymbolResolution> Res,
633633
unsigned Partition, bool InSummary) {
634+
llvm::TimeTraceScope timeScope("LTO add module to global resolution");
634635
auto *ResI = Res.begin();
635636
auto *ResE = Res.end();
636637
(void)ResE;
@@ -731,6 +732,7 @@ static void writeToResolutionFile(raw_ostream &OS, InputFile *Input,
731732

732733
Error LTO::add(std::unique_ptr<InputFile> Input,
733734
ArrayRef<SymbolResolution> Res) {
735+
llvm::TimeTraceScope timeScope("LTO add input", Input->getName());
734736
assert(!CalledGetMaxTasks);
735737

736738
if (Conf.ResolutionFile)
@@ -756,6 +758,7 @@ Error LTO::add(std::unique_ptr<InputFile> Input,
756758
Expected<ArrayRef<SymbolResolution>>
757759
LTO::addModule(InputFile &Input, ArrayRef<SymbolResolution> InputRes,
758760
unsigned ModI, ArrayRef<SymbolResolution> Res) {
761+
llvm::TimeTraceScope timeScope("LTO add module", Input.getName());
759762
Expected<BitcodeLTOInfo> LTOInfo = Input.Mods[ModI].getLTOInfo();
760763
if (!LTOInfo)
761764
return LTOInfo.takeError();
@@ -850,6 +853,7 @@ Expected<
850853
LTO::addRegularLTO(InputFile &Input, ArrayRef<SymbolResolution> InputRes,
851854
BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
852855
ArrayRef<SymbolResolution> Res) {
856+
llvm::TimeTraceScope timeScope("LTO add regular LTO");
853857
RegularLTOState::AddedModule Mod;
854858
Expected<std::unique_ptr<Module>> MOrErr =
855859
BM.getLazyModule(RegularLTO.Ctx, /*ShouldLazyLoadMetadata*/ true,
@@ -1024,6 +1028,7 @@ LTO::addRegularLTO(InputFile &Input, ArrayRef<SymbolResolution> InputRes,
10241028

10251029
Error LTO::linkRegularLTO(RegularLTOState::AddedModule Mod,
10261030
bool LivenessFromIndex) {
1031+
llvm::TimeTraceScope timeScope("LTO link regular LTO");
10271032
std::vector<GlobalValue *> Keep;
10281033
for (GlobalValue *GV : Mod.Keep) {
10291034
if (LivenessFromIndex && !ThinLTO.CombinedIndex.isGUIDLive(GV->getGUID())) {
@@ -1063,6 +1068,7 @@ Error LTO::linkRegularLTO(RegularLTOState::AddedModule Mod,
10631068
Expected<ArrayRef<SymbolResolution>>
10641069
LTO::addThinLTO(BitcodeModule BM, ArrayRef<InputFile::Symbol> Syms,
10651070
ArrayRef<SymbolResolution> Res) {
1071+
llvm::TimeTraceScope timeScope("LTO add thin LTO");
10661072
ArrayRef<SymbolResolution> ResTmp = Res;
10671073
for (const InputFile::Symbol &Sym : Syms) {
10681074
assert(!ResTmp.empty());
@@ -1252,6 +1258,7 @@ Error LTO::run(AddStreamFn AddStream, FileCache Cache) {
12521258

12531259
void lto::updateMemProfAttributes(Module &Mod,
12541260
const ModuleSummaryIndex &Index) {
1261+
llvm::TimeTraceScope timeScope("LTO update memprof attributes");
12551262
if (Index.withSupportsHotColdNew())
12561263
return;
12571264

@@ -1282,6 +1289,7 @@ void lto::updateMemProfAttributes(Module &Mod,
12821289
}
12831290

12841291
Error LTO::runRegularLTO(AddStreamFn AddStream) {
1292+
llvm::TimeTraceScope timeScope("Run regular LTO");
12851293
// Setup optimization remarks.
12861294
auto DiagFileOrErr = lto::setupLLVMOptimizationRemarks(
12871295
RegularLTO.CombinedModule->getContext(), Conf.RemarksFilename,
@@ -1294,10 +1302,12 @@ Error LTO::runRegularLTO(AddStreamFn AddStream) {
12941302

12951303
// Finalize linking of regular LTO modules containing summaries now that
12961304
// we have computed liveness information.
1297-
for (auto &M : RegularLTO.ModsWithSummaries)
1298-
if (Error Err = linkRegularLTO(std::move(M),
1299-
/*LivenessFromIndex=*/true))
1300-
return Err;
1305+
{
1306+
llvm::TimeTraceScope timeScope("Link regular LTO");
1307+
for (auto &M : RegularLTO.ModsWithSummaries)
1308+
if (Error Err = linkRegularLTO(std::move(M), /*LivenessFromIndex=*/true))
1309+
return Err;
1310+
}
13011311

13021312
// Ensure we don't have inconsistently split LTO units with type tests.
13031313
// FIXME: this checks both LTO and ThinLTO. It happens to work as we take
@@ -1526,6 +1536,9 @@ class InProcessThinBackend : public CGThinBackend {
15261536
const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
15271537
const GVSummaryMapTy &DefinedGlobals,
15281538
MapVector<StringRef, BitcodeModule> &ModuleMap) {
1539+
auto ModuleID = BM.getModuleIdentifier();
1540+
llvm::TimeTraceScope timeScope("Run ThinLTO backend thread (in-process)",
1541+
ModuleID);
15291542
auto RunThinBackend = [&](AddStreamFn AddStream) {
15301543
LTOLLVMContext BackendContext(Conf);
15311544
Expected<std::unique_ptr<Module>> MOrErr = BM.parseModule(BackendContext);
@@ -1536,9 +1549,6 @@ class InProcessThinBackend : public CGThinBackend {
15361549
ImportList, DefinedGlobals, &ModuleMap,
15371550
Conf.CodeGenOnly);
15381551
};
1539-
1540-
auto ModuleID = BM.getModuleIdentifier();
1541-
15421552
if (ShouldEmitIndexFiles) {
15431553
if (auto E = emitFiles(ImportList, ModuleID, ModuleID.str()))
15441554
return E;
@@ -1639,6 +1649,9 @@ class FirstRoundThinBackend : public InProcessThinBackend {
16391649
const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
16401650
const GVSummaryMapTy &DefinedGlobals,
16411651
MapVector<StringRef, BitcodeModule> &ModuleMap) override {
1652+
auto ModuleID = BM.getModuleIdentifier();
1653+
llvm::TimeTraceScope timeScope("Run ThinLTO backend thread (first round)",
1654+
ModuleID);
16421655
auto RunThinBackend = [&](AddStreamFn CGAddStream,
16431656
AddStreamFn IRAddStream) {
16441657
LTOLLVMContext BackendContext(Conf);
@@ -1650,8 +1663,6 @@ class FirstRoundThinBackend : public InProcessThinBackend {
16501663
ImportList, DefinedGlobals, &ModuleMap,
16511664
Conf.CodeGenOnly, IRAddStream);
16521665
};
1653-
1654-
auto ModuleID = BM.getModuleIdentifier();
16551666
// Like InProcessThinBackend, we produce index files as needed for
16561667
// FirstRoundThinBackend. However, these files are not generated for
16571668
// SecondRoundThinBackend.
@@ -1735,6 +1746,9 @@ class SecondRoundThinBackend : public InProcessThinBackend {
17351746
const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
17361747
const GVSummaryMapTy &DefinedGlobals,
17371748
MapVector<StringRef, BitcodeModule> &ModuleMap) override {
1749+
auto ModuleID = BM.getModuleIdentifier();
1750+
llvm::TimeTraceScope timeScope("Run ThinLTO backend thread (second round)",
1751+
ModuleID);
17381752
auto RunThinBackend = [&](AddStreamFn AddStream) {
17391753
LTOLLVMContext BackendContext(Conf);
17401754
std::unique_ptr<Module> LoadedModule =
@@ -1744,8 +1758,6 @@ class SecondRoundThinBackend : public InProcessThinBackend {
17441758
ImportList, DefinedGlobals, &ModuleMap,
17451759
/*CodeGenOnly=*/true);
17461760
};
1747-
1748-
auto ModuleID = BM.getModuleIdentifier();
17491761
if (!Cache.isValid() || !CombinedIndex.modulePaths().count(ModuleID) ||
17501762
all_of(CombinedIndex.getModuleHash(ModuleID),
17511763
[](uint32_t V) { return V == 0; }))
@@ -1915,6 +1927,7 @@ ThinBackend lto::createWriteIndexesThinBackend(
19151927

19161928
Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
19171929
const DenseSet<GlobalValue::GUID> &GUIDPreservedSymbols) {
1930+
llvm::TimeTraceScope timeScope("Run ThinLTO");
19181931
LLVM_DEBUG(dbgs() << "Running ThinLTO\n");
19191932
ThinLTO.CombinedIndex.releaseTemporaryMemory();
19201933
timeTraceProfilerBegin("ThinLink", StringRef(""));

llvm/lib/LTO/LTOBackend.cpp

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -366,6 +366,7 @@ bool lto::opt(const Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod,
366366
bool IsThinLTO, ModuleSummaryIndex *ExportSummary,
367367
const ModuleSummaryIndex *ImportSummary,
368368
const std::vector<uint8_t> &CmdArgs) {
369+
llvm::TimeTraceScope timeScope("opt");
369370
if (EmbedBitcode == LTOBitcodeEmbedding::EmbedPostMergePreOptimized) {
370371
// FIXME: the motivation for capturing post-merge bitcode and command line
371372
// is replicating the compilation environment from bitcode, without needing
@@ -399,6 +400,7 @@ bool lto::opt(const Config &Conf, TargetMachine *TM, unsigned Task, Module &Mod,
399400
static void codegen(const Config &Conf, TargetMachine *TM,
400401
AddStreamFn AddStream, unsigned Task, Module &Mod,
401402
const ModuleSummaryIndex &CombinedIndex) {
403+
llvm::TimeTraceScope timeScope("codegen");
402404
if (Conf.PreCodeGenModuleHook && !Conf.PreCodeGenModuleHook(Task, Mod))
403405
return;
404406

@@ -552,6 +554,7 @@ Error lto::finalizeOptimizationRemarks(
552554
Error lto::backend(const Config &C, AddStreamFn AddStream,
553555
unsigned ParallelCodeGenParallelismLevel, Module &Mod,
554556
ModuleSummaryIndex &CombinedIndex) {
557+
llvm::TimeTraceScope timeScope("LTO backend");
555558
Expected<const Target *> TOrErr = initAndLookupTarget(C, Mod);
556559
if (!TOrErr)
557560
return TOrErr.takeError();
@@ -577,6 +580,7 @@ Error lto::backend(const Config &C, AddStreamFn AddStream,
577580

578581
static void dropDeadSymbols(Module &Mod, const GVSummaryMapTy &DefinedGlobals,
579582
const ModuleSummaryIndex &Index) {
583+
llvm::TimeTraceScope timeScope("Drop dead symbols");
580584
std::vector<GlobalValue*> DeadGVs;
581585
for (auto &GV : Mod.global_values())
582586
if (GlobalValueSummary *GVS = DefinedGlobals.lookup(GV.getGUID()))
@@ -603,6 +607,7 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream,
603607
MapVector<StringRef, BitcodeModule> *ModuleMap,
604608
bool CodeGenOnly, AddStreamFn IRAddStream,
605609
const std::vector<uint8_t> &CmdArgs) {
610+
llvm::TimeTraceScope timeScope("Thin backend", Mod.getModuleIdentifier());
606611
Expected<const Target *> TOrErr = initAndLookupTarget(Conf, Mod);
607612
if (!TOrErr)
608613
return TOrErr.takeError();
@@ -679,6 +684,7 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream,
679684
return finalizeOptimizationRemarks(std::move(DiagnosticOutputFile));
680685

681686
auto ModuleLoader = [&](StringRef Identifier) {
687+
llvm::TimeTraceScope moduleLoaderScope("Module loader", Identifier);
682688
assert(Mod.getContext().isODRUniquingDebugTypes() &&
683689
"ODR Type uniquing should be enabled on the context");
684690
if (ModuleMap) {
@@ -712,10 +718,13 @@ Error lto::thinBackend(const Config &Conf, unsigned Task, AddStreamFn AddStream,
712718
return MOrErr;
713719
};
714720

715-
FunctionImporter Importer(CombinedIndex, ModuleLoader,
716-
ClearDSOLocalOnDeclarations);
717-
if (Error Err = Importer.importFunctions(Mod, ImportList).takeError())
718-
return Err;
721+
{
722+
llvm::TimeTraceScope importScope("Import functions");
723+
FunctionImporter Importer(CombinedIndex, ModuleLoader,
724+
ClearDSOLocalOnDeclarations);
725+
if (Error Err = Importer.importFunctions(Mod, ImportList).takeError())
726+
return Err;
727+
}
719728

720729
// Do this after any importing so that imported code is updated.
721730
updateMemProfAttributes(Mod, CombinedIndex);

0 commit comments

Comments
 (0)