Skip to content

Commit e3c016f

Browse files
committed
[DTLTO][LLVM] Implement integrated distribution for ThinLTO (DTLTO).
Structural changes: 1. A new ThinLTO backend implementing DTLTO has been added. 2. Both the new backend and the InProcess backend derive from a common base class to share common setup code and state. 3. The target triple is now stored for the ThinLTO bitcode files. 4. A new setup() member is called, which the ThinLTO backends can use to prepare for code generation. For the DTLTO backend, this is used to pre-allocate storage for the information required to perform the backend compilation jobs. 5. The functions for emitting summary index shard and imports files have been altered to allow the caller to specify the filenames to write and to allow the list of imports to be stored in a container rather than written to a file.
1 parent d55d8c0 commit e3c016f

File tree

11 files changed

+782
-1
lines changed

11 files changed

+782
-1
lines changed

llvm/include/llvm/LTO/LTO.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,28 @@ ThinBackend createInProcessThinBackend(ThreadPoolStrategy Parallelism,
305305
bool ShouldEmitIndexFiles = false,
306306
bool ShouldEmitImportsFiles = false);
307307

308+
/// This ThinBackend generates the index shards and then runs the individual
309+
/// backend jobs via an external process. It takes the same parameters as the
310+
/// InProcessThinBackend, however, these parameters only control the behavior
311+
/// when generating the index files for the modules. Addtionally:
312+
/// LinkerOutputFile is a string that should identify this LTO invocation in
313+
/// the context of a wider build. It's used for naming to aid the user in
314+
/// identifying activity related to a specific LTO invocation.
315+
/// RemoteOptTool specifies the path to a Clang executable to be invoked for the
316+
/// backend jobs.
317+
/// Distributor specifies the path to a process to invoke to manage the backend
318+
/// jobs execution.
319+
/// SaveTemps is a debugging tool that prevents temporary files created by this
320+
/// backend from being cleaned up.
321+
ThinBackend createOutOfProcessThinBackend(ThreadPoolStrategy Parallelism,
322+
IndexWriteCallback OnWrite,
323+
bool ShouldEmitIndexFiles,
324+
bool ShouldEmitImportsFiles,
325+
StringRef LinkerOutputFile,
326+
StringRef RemoteOptTool,
327+
StringRef Distributor,
328+
bool SaveTemps);
329+
308330
/// This ThinBackend writes individual module indexes to files, instead of
309331
/// running the individual backend jobs. This backend is for distributed builds
310332
/// where separate processes will invoke the real backends.

llvm/lib/LTO/LTO.cpp

Lines changed: 277 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,8 +41,11 @@
4141
#include "llvm/Support/CommandLine.h"
4242
#include "llvm/Support/Error.h"
4343
#include "llvm/Support/FileSystem.h"
44+
#include "llvm/Support/FileUtilities.h"
45+
#include "llvm/Support/JSON.h"
4446
#include "llvm/Support/MemoryBuffer.h"
4547
#include "llvm/Support/Path.h"
48+
#include "llvm/Support/Process.h"
4649
#include "llvm/Support/SHA1.h"
4750
#include "llvm/Support/SourceMgr.h"
4851
#include "llvm/Support/ThreadPool.h"
@@ -91,6 +94,15 @@ extern cl::opt<bool> SupportsHotColdNew;
9194

9295
/// Enable MemProf context disambiguation for thin link.
9396
extern cl::opt<bool> EnableMemProfContextDisambiguation;
97+
98+
cl::list<std::string> AdditionalThinLTODistributorArgs(
99+
"thinlto-distributor-arg",
100+
cl::desc("Additional arguments to pass to the ThinLTO distributor"));
101+
102+
cl::list<std::string>
103+
ThinLTORemoteOptToolArgs("thinlto-remote-opt-tool-arg",
104+
cl::desc("Additional arguments to pass to the "
105+
"ThinLTO remote optimization tool"));
94106
} // namespace llvm
95107

96108
// Computes a unique hash for the Module considering the current list of
@@ -2179,3 +2191,268 @@ std::vector<int> lto::generateModulesOrdering(ArrayRef<BitcodeModule *> R) {
21792191
});
21802192
return ModulesOrdering;
21812193
}
2194+
2195+
namespace {
2196+
// For this out-of-process backend no codegen is done when invoked for each
2197+
// task. Instead we generate the required information (e.g. the summary index
2198+
// shard, import list, etc..) to allow for the codegen to be performed
2199+
// externally . This backend's `wait` function then invokes an external
2200+
// distributor process to do backend compilations.
2201+
class OutOfProcessThinBackend : public CGThinBackend {
2202+
using SString = SmallString<128>;
2203+
2204+
BumpPtrAllocator Alloc;
2205+
StringSaver Saver{Alloc};
2206+
2207+
SString LinkerOutputFile;
2208+
SString RemoteOptTool;
2209+
SString DistributorPath;
2210+
bool SaveTemps;
2211+
2212+
SmallVector<StringRef, 0> CodegenOptions;
2213+
DenseSet<StringRef> AdditionalInputs;
2214+
2215+
// Information specific to individual backend compilation job.
2216+
struct Job {
2217+
unsigned Task;
2218+
StringRef ModuleID;
2219+
StringRef Triple;
2220+
StringRef NativeObjectPath;
2221+
StringRef SummaryIndexPath;
2222+
ImportsFilesContainer ImportFiles;
2223+
};
2224+
// The set of backend compilations jobs.
2225+
SmallVector<Job> Jobs;
2226+
2227+
// A unique string to identify the current link.
2228+
SmallString<8> UID;
2229+
2230+
public:
2231+
OutOfProcessThinBackend(
2232+
const Config &Conf, ModuleSummaryIndex &CombinedIndex,
2233+
ThreadPoolStrategy ThinLTOParallelism,
2234+
const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
2235+
AddStreamFn AddStream, lto::IndexWriteCallback OnWrite,
2236+
bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles,
2237+
StringRef LinkerOutputFile, StringRef RemoteOptTool,
2238+
StringRef Distributor, bool SaveTemps)
2239+
: CGThinBackend(Conf, CombinedIndex, ModuleToDefinedGVSummaries,
2240+
AddStream, OnWrite, ShouldEmitIndexFiles,
2241+
ShouldEmitImportsFiles, ThinLTOParallelism),
2242+
LinkerOutputFile(LinkerOutputFile), RemoteOptTool(RemoteOptTool),
2243+
DistributorPath(Distributor), SaveTemps(SaveTemps) {}
2244+
2245+
virtual void setup(unsigned MaxTasks) override {
2246+
UID = itostr(sys::Process::getProcessId());
2247+
Jobs.resize((size_t)MaxTasks);
2248+
}
2249+
2250+
Error start(
2251+
unsigned Task, BitcodeModule BM,
2252+
const FunctionImporter::ImportMapTy &ImportList,
2253+
const FunctionImporter::ExportSetTy &ExportList,
2254+
const std::map<GlobalValue::GUID, GlobalValue::LinkageTypes> &ResolvedODR,
2255+
MapVector<StringRef, BitcodeModule> &ModuleMap,
2256+
DenseMap<StringRef, std::string> &ModuleTriples) override {
2257+
2258+
StringRef ModulePath = BM.getModuleIdentifier();
2259+
2260+
SString ObjFilePath = sys::path::parent_path(LinkerOutputFile);
2261+
sys::path::append(ObjFilePath, sys::path::stem(ModulePath) + "." +
2262+
itostr(Task) + "." + UID + ".native.o");
2263+
2264+
Job &J = Jobs[Task - 1]; /*Task 0 is reserved*/
2265+
J = {Task,
2266+
ModulePath,
2267+
ModuleTriples[ModulePath],
2268+
Saver.save(ObjFilePath.str()),
2269+
Saver.save(ObjFilePath.str() + ".thinlto.bc"),
2270+
{}};
2271+
2272+
assert(ModuleToDefinedGVSummaries.count(ModulePath));
2273+
BackendThreadPool.async(
2274+
[=](Job &J, const FunctionImporter::ImportMapTy &ImportList) {
2275+
if (LLVM_ENABLE_THREADS && Conf.TimeTraceEnabled)
2276+
timeTraceProfilerInitialize(Conf.TimeTraceGranularity,
2277+
"thin backend");
2278+
if (auto E = emitFiles(ImportList, J.ModuleID, J.SummaryIndexPath,
2279+
J.ModuleID.str(), J.ImportFiles)) {
2280+
std::unique_lock<std::mutex> L(ErrMu);
2281+
if (Err)
2282+
Err = joinErrors(std::move(*Err), std::move(E));
2283+
else
2284+
Err = std::move(E);
2285+
}
2286+
if (LLVM_ENABLE_THREADS && Conf.TimeTraceEnabled)
2287+
timeTraceProfilerFinishThread();
2288+
},
2289+
std::ref(J), std::ref(ImportList));
2290+
2291+
return Error::success();
2292+
}
2293+
2294+
// Generates a JSON file describing the backend compilations, for the
2295+
// distributor.
2296+
bool emitDistributorJson(StringRef DistributorJson) {
2297+
using json::Array;
2298+
std::error_code EC;
2299+
raw_fd_ostream OS(DistributorJson, EC);
2300+
if (EC)
2301+
return false;
2302+
2303+
json::OStream JOS(OS);
2304+
JOS.object([&]() {
2305+
// Information common to all jobs note that we use a custom syntax for
2306+
// referencing by index into the job input and output file arrays.
2307+
JOS.attributeObject("common", [&]() {
2308+
JOS.attribute("linker_output", LinkerOutputFile);
2309+
2310+
// Common command line template.
2311+
JOS.attributeArray("args", [&]() {
2312+
JOS.value(RemoteOptTool);
2313+
2314+
// Reference to Job::NativeObjectPath.
2315+
JOS.value("-o");
2316+
JOS.value(Array{"primary_output", 0});
2317+
2318+
JOS.value("-c");
2319+
2320+
JOS.value("-x");
2321+
JOS.value("ir");
2322+
2323+
// Reference to Job::ModuleID.
2324+
JOS.value(Array{"primary_input", 0});
2325+
2326+
// Reference to Job::SummaryIndexPath.
2327+
JOS.value(Array{"summary_index", "-fthinlto-index=", 0});
2328+
JOS.value(Saver.save("--target=" + Twine(Jobs.front().Triple)));
2329+
2330+
for (const auto &A : ThinLTORemoteOptToolArgs)
2331+
JOS.value(A);
2332+
});
2333+
});
2334+
JOS.attributeArray("jobs", [&]() {
2335+
for (const auto &J : Jobs) {
2336+
assert(J.Task != 0);
2337+
JOS.object([&]() {
2338+
JOS.attribute("primary_input", Array{J.ModuleID});
2339+
JOS.attribute("summary_index", Array{J.SummaryIndexPath});
2340+
JOS.attribute("primary_output", Array{J.NativeObjectPath});
2341+
2342+
// Add the bitcode files from which imports will be made. These do
2343+
// not appear on the command line but are recorded in the summary
2344+
// index shard.
2345+
JOS.attribute("imports", Array(J.ImportFiles));
2346+
2347+
// Add any input files that are common to each invocation. These
2348+
// filenames are duplicated in the command line template and in
2349+
// each of the per job "inputs" array. However, this small amount
2350+
// of duplication makes the schema simpler.
2351+
JOS.attribute("additional_inputs", Array(AdditionalInputs));
2352+
});
2353+
}
2354+
});
2355+
});
2356+
2357+
return true;
2358+
}
2359+
2360+
void removeFile(StringRef FileName) {
2361+
std::error_code EC = sys::fs::remove(FileName, true);
2362+
if (EC && EC != std::make_error_code(std::errc::no_such_file_or_directory))
2363+
errs() << "warning: could not remove the file '" << FileName
2364+
<< "': " << EC.message() << "\n";
2365+
}
2366+
2367+
Error wait() override {
2368+
// Wait for the information on the required backend compilations to be
2369+
// gathered.
2370+
BackendThreadPool.wait();
2371+
if (Err)
2372+
return std::move(*Err);
2373+
2374+
auto CleanPerJobFiles = llvm::make_scope_exit([&] {
2375+
if (!SaveTemps)
2376+
for (auto &Job : Jobs) {
2377+
removeFile(Job.NativeObjectPath);
2378+
if (!ShouldEmitIndexFiles)
2379+
removeFile(Job.SummaryIndexPath);
2380+
}
2381+
});
2382+
2383+
const StringRef BCError = "DTLTO backend compilation: ";
2384+
2385+
// TODO: If we move to using an optimisation tool that does not require an
2386+
// explicit triple to be passed then the triple handling can be removed
2387+
// entirely.
2388+
if (!llvm::all_of(Jobs, [&](const auto &Job) {
2389+
return Job.Triple == Jobs.front().Triple;
2390+
}))
2391+
return make_error<StringError>(BCError + "all triples must be consistent",
2392+
inconvertibleErrorCode());
2393+
2394+
SString JsonFile = sys::path::parent_path(LinkerOutputFile);
2395+
sys::path::append(JsonFile, sys::path::stem(LinkerOutputFile) + "." + UID +
2396+
".dist-file.json");
2397+
if (!emitDistributorJson(JsonFile))
2398+
return make_error<StringError>(
2399+
BCError + "failed to generate distributor JSON script: " + JsonFile,
2400+
inconvertibleErrorCode());
2401+
auto CleanJson = llvm::make_scope_exit([&] {
2402+
if (!SaveTemps)
2403+
removeFile(JsonFile);
2404+
});
2405+
2406+
SmallVector<StringRef, 3> Args = {DistributorPath};
2407+
llvm::append_range(Args, AdditionalThinLTODistributorArgs);
2408+
Args.push_back(JsonFile);
2409+
std::string ErrMsg;
2410+
if (sys::ExecuteAndWait(Args[0], Args,
2411+
/*Env=*/std::nullopt, /*Redirects=*/{},
2412+
/*SecondsToWait=*/0, /*MemoryLimit=*/0, &ErrMsg)) {
2413+
return make_error<StringError>(
2414+
BCError + "distributor execution failed" +
2415+
(!ErrMsg.empty() ? ": " + ErrMsg + Twine(".") : Twine(".")),
2416+
inconvertibleErrorCode());
2417+
}
2418+
2419+
for (auto &Job : Jobs) {
2420+
// Load the native object from a file into a memory buffer
2421+
// and store its contents in the output buffer.
2422+
ErrorOr<std::unique_ptr<MemoryBuffer>> objFileMbOrErr =
2423+
MemoryBuffer::getFile(Job.NativeObjectPath, false, false);
2424+
if (std::error_code ec = objFileMbOrErr.getError())
2425+
return make_error<StringError>(
2426+
BCError + "cannot open native object file: " +
2427+
Job.NativeObjectPath + ": " + ec.message(),
2428+
inconvertibleErrorCode());
2429+
std::unique_ptr<llvm::MemoryBuffer> umb = std::move(objFileMbOrErr.get());
2430+
Expected<std::unique_ptr<CachedFileStream>> StreamOrErr =
2431+
AddStream(Job.Task, Job.ModuleID);
2432+
if (Error Err = StreamOrErr.takeError())
2433+
report_fatal_error(std::move(Err));
2434+
std::unique_ptr<CachedFileStream> Stream = std::move(*StreamOrErr);
2435+
*Stream->OS << umb->getMemBufferRef().getBuffer();
2436+
}
2437+
2438+
return Error::success();
2439+
}
2440+
};
2441+
} // end anonymous namespace
2442+
2443+
ThinBackend lto::createOutOfProcessThinBackend(
2444+
ThreadPoolStrategy Parallelism, lto::IndexWriteCallback OnWrite,
2445+
bool ShouldEmitIndexFiles, bool ShouldEmitImportsFiles,
2446+
StringRef LinkerOutputFile, StringRef RemoteOptTool, StringRef Distributor,
2447+
bool SaveTemps) {
2448+
auto Func =
2449+
[=](const Config &Conf, ModuleSummaryIndex &CombinedIndex,
2450+
const DenseMap<StringRef, GVSummaryMapTy> &ModuleToDefinedGVSummaries,
2451+
AddStreamFn AddStream, FileCache /*Cache*/) {
2452+
return std::make_unique<OutOfProcessThinBackend>(
2453+
Conf, CombinedIndex, Parallelism, ModuleToDefinedGVSummaries,
2454+
AddStream, OnWrite, ShouldEmitIndexFiles, ShouldEmitImportsFiles,
2455+
LinkerOutputFile, RemoteOptTool, Distributor, SaveTemps);
2456+
};
2457+
return ThinBackend(Func, Parallelism);
2458+
}

0 commit comments

Comments
 (0)