diff --git a/cross-project-tests/dtlto/dtlto-cache.test b/cross-project-tests/dtlto/dtlto-cache.test new file mode 100644 index 0000000000000..b98d4dbb433bb --- /dev/null +++ b/cross-project-tests/dtlto/dtlto-cache.test @@ -0,0 +1,89 @@ +REQUIRES: x86-registered-target, ld.lld + +# Show that the ThinLTO cache works with DTLTO. + +RUN: rm -rf %t && split-file %s %t && cd %t + +# Compile source files into bitcode files. +RUN: %clang -O2 --target=x86_64-linux-gnu -flto=thin -c foo.c main.c + +# Execute the linker and check that the cache is populated. +RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \ +RUN: main.o foo.o -o populate1.elf \ +RUN: -Wl,--thinlto-distributor=%python \ +RUN: -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \ +RUN: -Wl,--thinlto-remote-compiler=%clang \ +RUN: -Wl,--thinlto-cache-dir=cache.dir \ +RUN: -Wl,--save-temps + +# Check that there are two backend compilation jobs occurred. +RUN: grep -wo args populate1.*.dist-file.json | wc -l | grep -qx 3 +RUN: ls cache.dir/llvmcache.timestamp +RUN: ls cache.dir | count 3 + +# Execute the linker again and check that a fully populated cache is used correctly, +# i.e., no additional cache entries are created for cache hits. +RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \ +RUN: main.o foo.o -o populate2.elf \ +RUN: -Wl,--thinlto-distributor=%python \ +RUN: -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \ +RUN: -Wl,--thinlto-remote-compiler=%clang \ +RUN: -Wl,--thinlto-cache-dir=cache.dir \ +RUN: -Wl,--save-temps + +# Check that there are no backend compilation jobs occurred. +RUN: grep -wo args populate2.*.dist-file.json | wc -l | grep -qx 1 +RUN: ls cache.dir | count 3 + +RUN: %clang -O0 --target=x86_64-linux-gnu -flto=thin -c foo.c -o foo.O0.o +RUN: %clang -O0 --target=x86_64-linux-gnu -flto=thin -c main.c -o main.O0.o + +# Execute the linker again and check that the cache is populated correctly when there +# are no cache hits but there are existing cache entries. +# As a side effect, this also verifies that the optimization level is considered when +# evaluating the cache entry key. + +RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \ +RUN: main.O0.o foo.O0.o -o populate3.elf \ +RUN: -Wl,--thinlto-distributor=%python \ +RUN: -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \ +RUN: -Wl,--thinlto-remote-compiler=%clang \ +RUN: -Wl,--thinlto-cache-dir=cache.dir \ +RUN: -Wl,--save-temps + +# Check that there are two new backend compilation jobs occurred. +RUN: grep -wo args populate3.*.dist-file.json | wc -l | grep -qx 3 +RUN: ls cache.dir | count 5 + +RUN: %clang -O2 --target=x86_64-linux-gnu -flto=thin -c main-partial.c + +# Execute the linker and check that everything works correctly with the partially populated cache; +# One more cache entry should be generated after this run. + +RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \ +RUN: main-partial.o foo.o -o main-partial.elf \ +RUN: -Wl,--thinlto-distributor=%python \ +RUN: -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \ +RUN: -Wl,--thinlto-remote-compiler=%clang \ +RUN: -Wl,--thinlto-cache-dir=cache.dir \ +RUN: -Wl,--save-temps + +# Check that there is one new backend compilation jobs occurred. +RUN: grep -wo args main-partial.*.dist-file.json | wc -l | grep -qx 2 +RUN: ls cache.dir | count 6 + +#--- foo.c +volatile int foo_int; +__attribute__((retain)) int foo(int x) { return x + foo_int; } + +#--- main.c +extern int foo(int x); +__attribute__((retain)) int main(int argc, char** argv) { + return foo(argc); +} + +#--- main-partial.c +extern int foo(int x); +__attribute__((retain)) int main(int argc, char** argv) { + return foo(argc+1); +} diff --git a/cross-project-tests/dtlto/dtlto-thinlto-cache.test b/cross-project-tests/dtlto/dtlto-thinlto-cache.test new file mode 100644 index 0000000000000..c177112e2dbbd --- /dev/null +++ b/cross-project-tests/dtlto/dtlto-thinlto-cache.test @@ -0,0 +1,70 @@ +REQUIRES: x86-registered-target, ld.lld + +# This test verifies that a cache populated by an in-process ThinLTO codegen is +# not reused by an out-of-process (DTLTO) codegen and vice versa. + +RUN: rm -rf %t && split-file %s %t && cd %t + +# Compile source files into bitcode files. +RUN: %clang -O2 --target=x86_64-linux-gnu -flto=thin -c foo.c main.c + +# Execute the linker and check that in-process ThinLTO cache is populated. +RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \ +RUN: main.o foo.o -o main.elf \ +RUN: -Wl,--thinlto-cache-dir=cache.dir \ +RUN: -Wl,--save-temps + +RUN: ls cache.dir/llvmcache.timestamp +RUN: ls cache.dir | count 3 + +# Execute the linker and check that out-of-process codegen (DTLTO) adds +# additional entries to the cache, implying that in-process and +# out-of-process codegens do not share cache entries. +RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \ +RUN: main.o foo.o -o populate1.elf \ +RUN: -Wl,--thinlto-distributor=%python \ +RUN: -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \ +RUN: -Wl,--thinlto-remote-compiler=%clang \ +RUN: -Wl,--thinlto-cache-dir=cache.dir \ +RUN: -Wl,--save-temps + +# Check that there are two backend compilation jobs occurred. +RUN: grep -wo args populate1.*.dist-file.json | wc -l | grep -qx 3 +RUN: ls cache.dir | count 5 + +# Clean up cache directory. +RUN: rm -rf cache.dir + +# Execute the linker and check that out-of-process (DTLTO) cache is populated. +RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \ +RUN: main.o foo.o -o populate2.elf \ +RUN: -Wl,--thinlto-distributor=%python \ +RUN: -Wl,--thinlto-distributor-arg=%llvm_src_root/utils/dtlto/local.py \ +RUN: -Wl,--thinlto-remote-compiler=%clang \ +RUN: -Wl,--thinlto-cache-dir=cache.dir \ +RUN: -Wl,--save-temps + +# Check that there are two backend compilation jobs occurred. +RUN: grep -wo args populate2.*.dist-file.json | wc -l | grep -qx 3 +RUN: ls cache.dir/llvmcache.timestamp +RUN: ls cache.dir | count 3 + +# Execute the linker and check that in-process codegen adds additional entries +# to the cache, implying that in-process and out-of-process codegens do +# not share cache entries. +RUN: %clang -O2 --target=x86_64-linux-gnu -Werror -flto=thin -fuse-ld=lld -nostdlib -e main \ +RUN: main.o foo.o -o main.elf \ +RUN: -Wl,--thinlto-cache-dir=cache.dir \ +RUN: -Wl,--save-temps + +RUN: ls cache.dir | count 5 + +#--- foo.c +volatile int foo_int; +__attribute__((retain)) int foo(int x) { return x + foo_int; } + +#--- main.c +extern int foo(int x); +__attribute__((retain)) int main(int argc, char** argv) { + return foo(argc); +} diff --git a/llvm/include/llvm/LTO/Config.h b/llvm/include/llvm/LTO/Config.h index 50e143c518213..566a87ed1a790 100644 --- a/llvm/include/llvm/LTO/Config.h +++ b/llvm/include/llvm/LTO/Config.h @@ -94,6 +94,11 @@ struct Config { /// need to create copies, so it can set this field to false. bool KeepSymbolNameCopies = true; + /// This flag is used as one of parameters to calculate cache entries and to + /// ensure that in-process cache and out-of-process (DTLTO) cache are + /// distinguished. + mutable bool Dtlto = 0; + /// Allows non-imported definitions to get the potentially more constraining /// visibility from the prevailing definition. FromPrevailing is the default /// because it works for many binary formats. ELF can use the more optimized diff --git a/llvm/lib/LTO/LTO.cpp b/llvm/lib/LTO/LTO.cpp index fefc733fa7697..a02af59600c44 100644 --- a/llvm/lib/LTO/LTO.cpp +++ b/llvm/lib/LTO/LTO.cpp @@ -169,6 +169,7 @@ std::string llvm::computeLTOCacheKey( AddString(Conf.OverrideTriple); AddString(Conf.DefaultTriple); AddString(Conf.DwoDir); + AddUint8(Conf.Dtlto); // Include the hash for the current module auto ModHash = Index.getModuleHash(ModuleID); @@ -2226,7 +2227,8 @@ class OutOfProcessThinBackend : public CGThinBackend { SmallVector CodegenOptions; DenseSet CommonInputs; - + // Number of the object files that have been already cached. + std::atomic CachedJobs{0}; // Information specific to individual backend compilation job. struct Job { unsigned Task; @@ -2234,6 +2236,9 @@ class OutOfProcessThinBackend : public CGThinBackend { StringRef NativeObjectPath; StringRef SummaryIndexPath; ImportsFilesContainer ImportsFiles; + std::string CacheKey; + AddStreamFn CacheAddStream; + bool Cached = false; }; // The set of backend compilations jobs. SmallVector Jobs; @@ -2247,12 +2252,15 @@ class OutOfProcessThinBackend : public CGThinBackend { // The target triple to supply for backend compilations. llvm::Triple Triple; + // Cache + FileCache Cache; + public: OutOfProcessThinBackend( const Config &Conf, ModuleSummaryIndex &CombinedIndex, ThreadPoolStrategy ThinLTOParallelism, const DenseMap &ModuleToDefinedGVSummaries, - AddStreamFn AddStream, lto::IndexWriteCallback OnWrite, + AddStreamFn AddStream, FileCache CacheFn, lto::IndexWriteCallback OnWrite, bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles, StringRef LinkerOutputFile, StringRef Distributor, ArrayRef DistributorArgs, StringRef RemoteCompiler, @@ -2264,7 +2272,8 @@ class OutOfProcessThinBackend : public CGThinBackend { LinkerOutputFile(LinkerOutputFile), DistributorPath(Distributor), DistributorArgs(DistributorArgs), RemoteCompiler(RemoteCompiler), RemoteCompilerPrependArgs(RemoteCompilerPrependArgs), - RemoteCompilerArgs(RemoteCompilerArgs), SaveTemps(SaveTemps) {} + RemoteCompilerArgs(RemoteCompilerArgs), SaveTemps(SaveTemps), + Cache(std::move(CacheFn)) {} void setup(unsigned ThinLTONumTasks, unsigned ThinLTOTaskOffset, llvm::Triple Triple) override { @@ -2272,6 +2281,54 @@ class OutOfProcessThinBackend : public CGThinBackend { Jobs.resize((size_t)ThinLTONumTasks); this->ThinLTOTaskOffset = ThinLTOTaskOffset; this->Triple = Triple; + this->Conf.Dtlto = 1; + } + + virtual Error runThinLTOBackendThread( + Job &J, const FunctionImporter::ImportMapTy &ImportList, + const FunctionImporter::ExportSetTy &ExportList, + const std::map + &ResolvedODR) { + + llvm::TimeTraceScope timeScope( + "Run ThinLTO backend thread (out-of-process)", J.ModuleID); + + if (auto E = emitFiles(ImportList, J.ModuleID, J.ModuleID.str(), + J.SummaryIndexPath, J.ImportsFiles)) + return E; + + if (!Cache.isValid() || !CombinedIndex.modulePaths().count(J.ModuleID) || + all_of(CombinedIndex.getModuleHash(J.ModuleID), + [](uint32_t V) { return V == 0; })) + // Cache disabled or no entry for this module in the combined index or + // no module hash. + return Error::success(); + + const GVSummaryMapTy &DefinedGlobals = + ModuleToDefinedGVSummaries.find(J.ModuleID)->second; + + // The module may be cached, this helps handling it. + J.CacheKey = computeLTOCacheKey(Conf, CombinedIndex, J.ModuleID, ImportList, + ExportList, ResolvedODR, DefinedGlobals, + CfiFunctionDefs, CfiFunctionDecls); + + // The module may be cached, this helps handling it. + auto CacheAddStreamExp = Cache(J.Task, J.CacheKey, J.ModuleID); + if (Error Err = CacheAddStreamExp.takeError()) + return Err; + AddStreamFn &CacheAddStream = *CacheAddStreamExp; + // If CacheAddStream is null, we have a cache hit and at this point + // object file is already passed back to the linker. + if (!CacheAddStream) { + J.Cached = true; // Cache hit, mark the job as cached. + CachedJobs.fetch_add(1); + } else { + // If CacheAddStream is not null, we have a cache miss and we need to + // run the backend for codegen. Save cache 'add stream' + // function for a later use. + J.CacheAddStream = std::move(CacheAddStream); + } + return Error::success(); } Error start( @@ -2288,22 +2345,27 @@ class OutOfProcessThinBackend : public CGThinBackend { itostr(Task) + "." + UID + ".native.o"); Job &J = Jobs[Task - ThinLTOTaskOffset]; - J = { - Task, - ModulePath, - Saver.save(ObjFilePath.str()), - Saver.save(ObjFilePath.str() + ".thinlto.bc"), - {} // Filled in by emitFiles below. - }; + J = {Task, + ModulePath, + Saver.save(ObjFilePath.str()), + Saver.save(ObjFilePath.str() + ".thinlto.bc"), + {}, // Filled in by emitFiles below. + "", /*CacheKey=*/ + nullptr, + false}; assert(ModuleToDefinedGVSummaries.count(ModulePath)); // The BackendThreadPool is only used here to write the sharded index files // (similar to WriteIndexesThinBackend). BackendThreadPool.async( - [=](Job &J, const FunctionImporter::ImportMapTy &ImportList) { - if (auto E = emitFiles(ImportList, J.ModuleID, J.ModuleID.str(), - J.SummaryIndexPath, J.ImportsFiles)) { + [=](Job &J, const FunctionImporter::ImportMapTy &ImportList, + const FunctionImporter::ExportSetTy &ExportList, + const std::map + &ResolvedODR) { + Error E = + runThinLTOBackendThread(J, ImportList, ExportList, ResolvedODR); + if (E) { std::unique_lock L(ErrMu); if (Err) Err = joinErrors(std::move(*Err), std::move(E)); @@ -2311,7 +2373,8 @@ class OutOfProcessThinBackend : public CGThinBackend { Err = std::move(E); } }, - std::ref(J), std::ref(ImportList)); + std::ref(J), std::ref(ImportList), std::ref(ExportList), + std::ref(ResolvedODR)); return Error::success(); } @@ -2405,6 +2468,10 @@ class OutOfProcessThinBackend : public CGThinBackend { JOS.attributeArray("jobs", [&]() { for (const auto &J : Jobs) { assert(J.Task != 0); + if (J.Cached) { + assert(!Cache.getCacheDirectoryPath().empty()); + continue; + } SmallVector Inputs; SmallVector Outputs; @@ -2477,20 +2544,28 @@ class OutOfProcessThinBackend : public CGThinBackend { removeFile(JsonFile); }); - SmallVector Args = {DistributorPath}; - llvm::append_range(Args, DistributorArgs); - Args.push_back(JsonFile); - std::string ErrMsg; - if (sys::ExecuteAndWait(Args[0], Args, - /*Env=*/std::nullopt, /*Redirects=*/{}, - /*SecondsToWait=*/0, /*MemoryLimit=*/0, &ErrMsg)) { - return make_error( - BCError + "distributor execution failed" + - (!ErrMsg.empty() ? ": " + ErrMsg + Twine(".") : Twine(".")), - inconvertibleErrorCode()); + // Checks if we have any jobs that don't have corresponding cache entries. + if (CachedJobs.load() < Jobs.size()) { + SmallVector Args = {DistributorPath}; + llvm::append_range(Args, DistributorArgs); + Args.push_back(JsonFile); + std::string ErrMsg; + if (sys::ExecuteAndWait(Args[0], Args, + /*Env=*/std::nullopt, /*Redirects=*/{}, + /*SecondsToWait=*/0, /*MemoryLimit=*/0, + &ErrMsg)) { + return make_error( + BCError + "distributor execution failed" + + (!ErrMsg.empty() ? ": " + ErrMsg + Twine(".") : Twine(".")), + inconvertibleErrorCode()); + } } for (auto &Job : Jobs) { + if (!Job.CacheKey.empty() && Job.Cached) { + assert(Cache.isValid()); + continue; + } // Load the native object from a file into a memory buffer // and store its contents in the output buffer. auto ObjFileMbOrErr = @@ -2501,15 +2576,35 @@ class OutOfProcessThinBackend : public CGThinBackend { BCError + "cannot open native object file: " + Job.NativeObjectPath + ": " + EC.message(), inconvertibleErrorCode()); - auto StreamOrErr = AddStream(Job.Task, Job.ModuleID); - if (Error Err = StreamOrErr.takeError()) - report_fatal_error(std::move(Err)); - auto &Stream = *StreamOrErr->get(); - *Stream.OS << ObjFileMbOrErr->get()->getMemBufferRef().getBuffer(); - if (Error Err = Stream.commit()) - report_fatal_error(std::move(Err)); - } + MemoryBufferRef ObjFileMbRef = ObjFileMbOrErr->get()->getMemBufferRef(); + if (Cache.isValid()) { + // Cache hits are taken care of earlier. At this point, we could only + // have cache misses. + assert(Job.CacheAddStream); + // Obtain a file stream for a storing a cache entry. + auto CachedFileStreamOrErr = Job.CacheAddStream(Job.Task, Job.ModuleID); + if (!CachedFileStreamOrErr) + return joinErrors( + CachedFileStreamOrErr.takeError(), + createStringError(inconvertibleErrorCode(), + "Cannot get a cache file stream: %s", + Job.NativeObjectPath.data())); + // Store a file buffer into the cache stream. + auto &CacheStream = *(CachedFileStreamOrErr->get()); + *(CacheStream.OS) << ObjFileMbRef.getBuffer(); + if (Error Err = CacheStream.commit()) + return Err; + } else { + auto StreamOrErr = AddStream(Job.Task, Job.ModuleID); + if (Error Err = StreamOrErr.takeError()) + report_fatal_error(std::move(Err)); + auto &Stream = *StreamOrErr->get(); + *Stream.OS << ObjFileMbRef.getBuffer(); + if (Error Err = Stream.commit()) + report_fatal_error(std::move(Err)); + } + } return Error::success(); } }; @@ -2525,12 +2620,13 @@ ThinBackend lto::createOutOfProcessThinBackend( auto Func = [=](const Config &Conf, ModuleSummaryIndex &CombinedIndex, const DenseMap &ModuleToDefinedGVSummaries, - AddStreamFn AddStream, FileCache /*Cache*/) { + AddStreamFn AddStream, FileCache Cache) { return std::make_unique( Conf, CombinedIndex, Parallelism, ModuleToDefinedGVSummaries, - AddStream, OnWrite, ShouldEmitIndexFiles, ShouldEmitImportsFiles, - LinkerOutputFile, Distributor, DistributorArgs, RemoteCompiler, - RemoteCompilerPrependArgs, RemoteCompilerArgs, SaveTemps); + AddStream, Cache, OnWrite, ShouldEmitIndexFiles, + ShouldEmitImportsFiles, LinkerOutputFile, Distributor, + DistributorArgs, RemoteCompiler, RemoteCompilerPrependArgs, + RemoteCompilerArgs, SaveTemps); }; return ThinBackend(Func, Parallelism); } diff --git a/llvm/test/ThinLTO/X86/dtlto/dtlto-cache.ll b/llvm/test/ThinLTO/X86/dtlto/dtlto-cache.ll new file mode 100644 index 0000000000000..df98c5e90b1ae --- /dev/null +++ b/llvm/test/ThinLTO/X86/dtlto/dtlto-cache.ll @@ -0,0 +1,74 @@ +; Test DTLTO output with llvm-lto2. + +RUN: rm -rf %t && split-file %s %t && cd %t + +; Generate bitcode files with summary. +RUN: opt -thinlto-bc t1.ll -o t1.bc +RUN: opt -thinlto-bc t2.ll -o t2.bc + +; Generate fake object files for mock.py to return. +RUN: touch t1.o t2.o + +; Create an empty subdirectory to avoid having to account for the input files. +RUN: mkdir %t/out && cd %t/out + +; Define a substitution to share the common DTLTO arguments with caching enabled. +DEFINE: %{command} = llvm-lto2 run ../t1.bc ../t2.bc -o t.o -cache-dir cache-dir \ +DEFINE: -dtlto-distributor=%python \ +DEFINE: -dtlto-distributor-arg=%llvm_src_root/utils/dtlto/mock.py,../t1.o,../t2.o \ +DEFINE: -r=../t1.bc,t1,px \ +DEFINE: -r=../t2.bc,t2,px + +; Perform out of process ThinLTO (DTLTO). +; Note: mock.py does not do any compilation, instead it simply writes +; the contents of the object files supplied on the command line into the +; output object files in job order. +RUN: %{command} + +; Check that the expected output files have been created. +RUN: ls | count 3 +; Check that two native object files has been created +RUN: ls | FileCheck %s --check-prefix=THINLTO +; Check that DTLTO cache directory has been created +RUN: ls cache-dir/* | count 2 +; Check that 2 cache entries are created +RUN: ls cache-dir/llvmcache-* | count 2 + +; llvm-lto2 ThinLTO output files. +THINLTO-DAG: {{^}}t.o.1{{$}} +THINLTO-DAG: {{^}}t.o.2{{$}} + +# Execute llvm-lto2 again and check that a fully populated cache is used correctly, +# i.e., no additional cache entries are created for cache hits. + +RUN: %{command} + +; Check that the expected output files have been created. +RUN: ls | count 3 +; Check that two native object files has been created +RUN: ls | FileCheck %s --check-prefix=THINLTO +; Check that DTLTO cache directory has been created +RUN: ls cache-dir/* | count 2 +; Check that 2 cache entries are created +RUN: ls cache-dir/llvmcache-* | count 2 + + + + +;--- t1.ll + +target triple = "x86_64-unknown-linux-gnu" +target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" + +define void @t1() { + ret void +} + +;--- t2.ll + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @t2() { + ret void +}