@@ -1395,11 +1395,12 @@ class lto::ThinBackendProc {
13951395 MapVector<StringRef, BitcodeModule> &ModuleMap) = 0;
13961396 virtual Error wait () = 0;
13971397 virtual unsigned getThreadCount () = 0;
1398+ virtual bool isSensitiveToInputOrder () { return false ; }
13981399
13991400 // Write sharded indices and (optionally) imports to disk
14001401 Error emitFiles (const FunctionImporter::ImportMapTy &ImportList,
14011402 llvm::StringRef ModulePath,
1402- const std::string &NewModulePath) {
1403+ const std::string &NewModulePath) const {
14031404 ModuleToSummariesForIndexTy ModuleToSummariesForIndex;
14041405 GVSummaryPtrSet DeclarationSummaries;
14051406
@@ -1614,6 +1615,10 @@ namespace {
16141615class WriteIndexesThinBackend : public ThinBackendProc {
16151616 std::string OldPrefix, NewPrefix, NativeObjectPrefix;
16161617 raw_fd_ostream *LinkedObjectsFile;
1618+ DefaultThreadPool BackendThreadPool;
1619+ std::optional<Error> Err;
1620+ std::mutex ErrMu;
1621+ std::mutex OnWriteMu;
16171622
16181623public:
16191624 WriteIndexesThinBackend (
@@ -1635,8 +1640,6 @@ class WriteIndexesThinBackend : public ThinBackendProc {
16351640 const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
16361641 MapVector<StringRef, BitcodeModule> &ModuleMap) override {
16371642 StringRef ModulePath = BM.getModuleIdentifier ();
1638- std::string NewModulePath =
1639- getThinLTOOutputFile (ModulePath, OldPrefix, NewPrefix);
16401643
16411644 if (LinkedObjectsFile) {
16421645 std::string ObjectPrefix =
@@ -1646,19 +1649,48 @@ class WriteIndexesThinBackend : public ThinBackendProc {
16461649 *LinkedObjectsFile << LinkedObjectsFilePath << ' \n ' ;
16471650 }
16481651
1649- if (auto E = emitFiles (ImportList, ModulePath, NewModulePath))
1650- return E;
1652+ BackendThreadPool.async (
1653+ [this ](const StringRef ModulePath,
1654+ const FunctionImporter::ImportMapTy &ImportList,
1655+ const std::string &OldPrefix, const std::string &NewPrefix) {
1656+ std::string NewModulePath =
1657+ getThinLTOOutputFile (ModulePath, OldPrefix, NewPrefix);
1658+ auto E = emitFiles (ImportList, ModulePath, NewModulePath);
1659+ if (E) {
1660+ std::unique_lock<std::mutex> L (ErrMu);
1661+ if (Err)
1662+ Err = joinErrors (std::move (*Err), std::move (E));
1663+ else
1664+ Err = std::move (E);
1665+ return ;
1666+ }
1667+ if (OnWrite) {
1668+ // Serialize calls to the on write callback in case it is not thread
1669+ // safe
1670+ std::unique_lock<std::mutex> L (OnWriteMu);
1671+ OnWrite (std::string (ModulePath));
1672+ }
1673+ },
1674+ ModulePath, ImportList, OldPrefix, NewPrefix);
1675+ return Error::success ();
1676+ }
16511677
1652- if (OnWrite)
1653- OnWrite (std::string (ModulePath));
1678+ Error wait () override {
1679+ BackendThreadPool.wait ();
1680+ if (Err)
1681+ return std::move (*Err);
16541682 return Error::success ();
16551683 }
16561684
1657- Error wait () override { return Error::success (); }
1685+ unsigned getThreadCount () override {
1686+ return BackendThreadPool.getMaxConcurrency ();
1687+ }
16581688
1659- // WriteIndexesThinBackend should always return 1 to prevent module
1660- // re-ordering and avoid non-determinism in the final link.
1661- unsigned getThreadCount () override { return 1 ; }
1689+ bool isSensitiveToInputOrder () override {
1690+ // The order which modules are written to LinkedObjectsFile should be
1691+ // deterministic and match the order they are passed on the command line.
1692+ return true ;
1693+ }
16621694};
16631695} // end anonymous namespace
16641696
@@ -1854,20 +1886,20 @@ Error LTO::runThinLTO(AddStreamFn AddStream, FileCache Cache,
18541886 ResolvedODR[Mod.first ], ThinLTO.ModuleMap );
18551887 };
18561888
1857- if (BackendProcess->getThreadCount () == 1 ) {
1858- // Process the modules in the order they were provided on the
1859- // command-line. It is important for this codepath to be used for
1860- // WriteIndexesThinBackend, to ensure the emitted LinkedObjectsFile lists
1861- // ThinLTO objects in the same order as the inputs, which otherwise would
1862- // affect the final link order.
1889+ if (BackendProcess->getThreadCount () == 1 ||
1890+ BackendProcess-> isSensitiveToInputOrder ()) {
1891+ // Process the modules in the order they were provided on the command-line.
1892+ // It is important for this codepath to be used for WriteIndexesThinBackend,
1893+ // to ensure the emitted LinkedObjectsFile lists ThinLTO objects in the same
1894+ // order as the inputs, which otherwise would affect the final link order.
18631895 for (int I = 0 , E = ModuleMap.size (); I != E; ++I)
18641896 if (Error E = ProcessOneModule (I))
18651897 return E;
18661898 } else {
18671899 // When executing in parallel, process largest bitsize modules first to
18681900 // improve parallelism, and avoid starving the thread pool near the end.
1869- // This saves about 15 sec on a 36-core machine while link `clang.exe`
1870- // (out of 100 sec).
1901+ // This saves about 15 sec on a 36-core machine while link `clang.exe` (out
1902+ // of 100 sec).
18711903 std::vector<BitcodeModule *> ModulesVec;
18721904 ModulesVec.reserve (ModuleMap.size ());
18731905 for (auto &Mod : ModuleMap)
0 commit comments