Skip to content

Commit 445bd56

Browse files
author
Nuri Amari
committed
Make WriteIndexesThinBackend multi threaded
We've noticed that for large builds executing thin-link can take on the order of 10s of minutes. We are only using a single thread to write the sharded indices and import files for each input bitcode file. While we need to ensure the index files produced lists modules in a deterministic order, that doesn't prevent us from executing the rest of the work in parallel. In this change we use a thread pool to execute as much of the backend's work as possible in parallel. In local testing on a machine with 80 cores, this change makes a thin-link for ~100,000 input files run in ~2 minutes. Without this change is takes upwards of 10 minutes.
1 parent acf92a4 commit 445bd56

File tree

1 file changed

+51
-19
lines changed

1 file changed

+51
-19
lines changed

llvm/lib/LTO/LTO.cpp

Lines changed: 51 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1395,11 +1395,12 @@ class lto::ThinBackendProc {
13951395
MapVector<StringRef, BitcodeModule> &ModuleMap) = 0;
13961396
virtual Error wait() = 0;
13971397
virtual unsigned getThreadCount() = 0;
1398+
virtual bool isSensitiveToInputOrder() { return false; }
13981399

13991400
// Write sharded indices and (optionally) imports to disk
14001401
Error emitFiles(const FunctionImporter::ImportMapTy &ImportList,
14011402
llvm::StringRef ModulePath,
1402-
const std::string &NewModulePath) {
1403+
const std::string &NewModulePath) const {
14031404
ModuleToSummariesForIndexTy ModuleToSummariesForIndex;
14041405
GVSummaryPtrSet DeclarationSummaries;
14051406

@@ -1614,6 +1615,10 @@ namespace {
16141615
class WriteIndexesThinBackend : public ThinBackendProc {
16151616
std::string OldPrefix, NewPrefix, NativeObjectPrefix;
16161617
raw_fd_ostream *LinkedObjectsFile;
1618+
DefaultThreadPool BackendThreadPool;
1619+
std::optional<Error> Err;
1620+
std::mutex ErrMu;
1621+
std::mutex OnWriteMu;
16171622

16181623
public:
16191624
WriteIndexesThinBackend(
@@ -1635,8 +1640,6 @@ class WriteIndexesThinBackend : public ThinBackendProc {
16351640
const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
16361641
MapVector<StringRef, BitcodeModule> &ModuleMap) override {
16371642
StringRef ModulePath = BM.getModuleIdentifier();
1638-
std::string NewModulePath =
1639-
getThinLTOOutputFile(ModulePath, OldPrefix, NewPrefix);
16401643

16411644
if (LinkedObjectsFile) {
16421645
std::string ObjectPrefix =
@@ -1646,19 +1649,48 @@ class WriteIndexesThinBackend : public ThinBackendProc {
16461649
*LinkedObjectsFile << LinkedObjectsFilePath << '\n';
16471650
}
16481651

1649-
if (auto E = emitFiles(ImportList, ModulePath, NewModulePath))
1650-
return E;
1652+
BackendThreadPool.async(
1653+
[this](const StringRef ModulePath,
1654+
const FunctionImporter::ImportMapTy &ImportList,
1655+
const std::string &OldPrefix, const std::string &NewPrefix) {
1656+
std::string NewModulePath =
1657+
getThinLTOOutputFile(ModulePath, OldPrefix, NewPrefix);
1658+
auto E = emitFiles(ImportList, ModulePath, NewModulePath);
1659+
if (E) {
1660+
std::unique_lock<std::mutex> L(ErrMu);
1661+
if (Err)
1662+
Err = joinErrors(std::move(*Err), std::move(E));
1663+
else
1664+
Err = std::move(E);
1665+
return;
1666+
}
1667+
if (OnWrite) {
1668+
// Serialize calls to the on write callback in case it is not thread
1669+
// safe
1670+
std::unique_lock<std::mutex> L(OnWriteMu);
1671+
OnWrite(std::string(ModulePath));
1672+
}
1673+
},
1674+
ModulePath, ImportList, OldPrefix, NewPrefix);
1675+
return Error::success();
1676+
}
16511677

1652-
if (OnWrite)
1653-
OnWrite(std::string(ModulePath));
1678+
Error wait() override {
1679+
BackendThreadPool.wait();
1680+
if (Err)
1681+
return std::move(*Err);
16541682
return Error::success();
16551683
}
16561684

1657-
Error wait() override { return Error::success(); }
1685+
unsigned getThreadCount() override {
1686+
return BackendThreadPool.getMaxConcurrency();
1687+
}
16581688

1659-
// WriteIndexesThinBackend should always return 1 to prevent module
1660-
// re-ordering and avoid non-determinism in the final link.
1661-
unsigned getThreadCount() override { return 1; }
1689+
bool isSensitiveToInputOrder() override {
1690+
// The order which modules are written to LinkedObjectsFile should be
1691+
// deterministic and match the order they are passed on the command line.
1692+
return true;
1693+
}
16621694
};
16631695
} // end anonymous namespace
16641696

@@ -1854,20 +1886,20 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
18541886
ResolvedODR[Mod.first], ThinLTO.ModuleMap);
18551887
};
18561888

1857-
if (BackendProcess->getThreadCount() == 1) {
1858-
// Process the modules in the order they were provided on the
1859-
// command-line. It is important for this codepath to be used for
1860-
// WriteIndexesThinBackend, to ensure the emitted LinkedObjectsFile lists
1861-
// ThinLTO objects in the same order as the inputs, which otherwise would
1862-
// affect the final link order.
1889+
if (BackendProcess->getThreadCount() == 1 ||
1890+
BackendProcess->isSensitiveToInputOrder()) {
1891+
// Process the modules in the order they were provided on the command-line.
1892+
// It is important for this codepath to be used for WriteIndexesThinBackend,
1893+
// to ensure the emitted LinkedObjectsFile lists ThinLTO objects in the same
1894+
// order as the inputs, which otherwise would affect the final link order.
18631895
for (int I = 0, E = ModuleMap.size(); I != E; ++I)
18641896
if (Error E = ProcessOneModule(I))
18651897
return E;
18661898
} else {
18671899
// When executing in parallel, process largest bitsize modules first to
18681900
// improve parallelism, and avoid starving the thread pool near the end.
1869-
// This saves about 15 sec on a 36-core machine while link `clang.exe`
1870-
// (out of 100 sec).
1901+
// This saves about 15 sec on a 36-core machine while link `clang.exe` (out
1902+
// of 100 sec).
18711903
std::vector<BitcodeModule *> ModulesVec;
18721904
ModulesVec.reserve(ModuleMap.size());
18731905
for (auto &Mod : ModuleMap)

0 commit comments

Comments
 (0)