|
8 | 8 |
|
9 | 9 | #include "DeviceCompilation.h" |
10 | 10 |
|
| 11 | +#include "PostLinkActions.h" |
| 12 | + |
11 | 13 | #include <clang/Basic/DiagnosticDriver.h> |
12 | 14 | #include <clang/Basic/Version.h> |
13 | 15 | #include <clang/CodeGen/CodeGenAction.h> |
|
20 | 22 |
|
21 | 23 | #include <llvm/IRReader/IRReader.h> |
22 | 24 | #include <llvm/Linker/Linker.h> |
23 | | - |
24 | | -#include <array> |
| 25 | +#include <llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h> |
| 26 | +#include <llvm/SYCLLowerIR/ModuleSplitter.h> |
| 27 | +#include <llvm/SYCLLowerIR/SYCLJointMatrixTransform.h> |
| 28 | +#include <llvm/Support/PropertySetIO.h> |
25 | 29 |
|
26 | 30 | using namespace clang; |
27 | 31 | using namespace clang::tooling; |
28 | 32 | using namespace clang::driver; |
29 | 33 | using namespace clang::driver::options; |
30 | 34 | using namespace llvm; |
31 | 35 | using namespace llvm::opt; |
| 36 | +using namespace llvm::sycl; |
| 37 | +using namespace llvm::module_split; |
| 38 | +using namespace llvm::util; |
| 39 | +using namespace jit_compiler; |
| 40 | +using namespace jit_compiler::post_link; |
32 | 41 |
|
33 | 42 | #ifdef _GNU_SOURCE |
34 | 43 | #include <dlfcn.h> |
@@ -356,6 +365,96 @@ Error jit_compiler::linkDeviceLibraries(llvm::Module &Module, |
356 | 365 | return Error::success(); |
357 | 366 | } |
358 | 367 |
|
| 368 | +Expected<RTCBundleInfo> jit_compiler::performPostLink( |
| 369 | + llvm::Module &Module, [[maybe_unused]] const InputArgList &UserArgList) { |
| 370 | + // This is a simplified version of `processInputModule` in |
| 371 | + // `llvm/tools/sycl-post-link.cpp`. Assertions/TODOs point to functionality |
| 372 | + // left out of the algorithm for now. |
| 373 | + |
| 374 | + // After linking device bitcode "llvm.used" holds references to the kernels |
| 375 | + // that are defined in the device image. But after splitting device image into |
| 376 | + // separate kernels we may end up with having references to kernel declaration |
| 377 | + // originating from "llvm.used" in the IR that is passed to llvm-spirv tool, |
| 378 | + // and these declarations cause an assertion in llvm-spirv. To workaround this |
| 379 | + // issue remove "llvm.used" from the input module before performing any other |
| 380 | + // actions. |
| 381 | + removeSYCLKernelsConstRefArray(Module); |
| 382 | + |
| 383 | + // There may be device_global variables kept alive in "llvm.compiler.used" |
| 384 | + // to keep the optimizer from wrongfully removing them. llvm.compiler.used |
| 385 | + // symbols are usually removed at backend lowering, but this is handled here |
| 386 | + // for SPIR-V since SYCL compilation uses llvm-spirv, not the SPIR-V backend. |
| 387 | + removeDeviceGlobalFromCompilerUsed(Module); |
| 388 | + |
| 389 | + assert(!isModuleUsingAsan(Module)); |
| 390 | + // Otherwise: Need to instrument each image scope device globals if the module |
| 391 | + // has been instrumented by sanitizer pass. |
| 392 | + |
| 393 | + // Transform Joint Matrix builtin calls to align them with SPIR-V friendly |
| 394 | + // LLVM IR specification. |
| 395 | + runModulePass<SYCLJointMatrixTransformPass>(Module); |
| 396 | + |
| 397 | + // TODO: Implement actual device code splitting. We're just using the splitter |
| 398 | + // to obtain additional information about the module for now. |
| 399 | + // TODO: EmitOnlyKernelsAsEntryPoints is controlled by |
| 400 | + // `shouldEmitOnlyKernelsAsEntryPoints` in |
| 401 | + // `clang/lib/Driver/ToolChains/Clang.cpp`. |
| 402 | + std::unique_ptr<ModuleSplitterBase> Splitter = getDeviceCodeSplitter( |
| 403 | + ModuleDesc{std::unique_ptr<llvm::Module>{&Module}}, SPLIT_NONE, |
| 404 | + /*IROutputOnly=*/false, |
| 405 | + /*EmitOnlyKernelsAsEntryPoints=*/true); |
| 406 | + bool SplitOccurred = Splitter->remainingSplits() > 1; |
| 407 | + assert(!SplitOccurred); |
| 408 | + |
| 409 | + // TODO: Call `verifyNoCrossModuleDeviceGlobalUsage` if device globals shall |
| 410 | + // be processed. |
| 411 | + |
| 412 | + assert(Splitter->hasMoreSplits()); |
| 413 | + ModuleDesc MDesc = Splitter->nextSplit(); |
| 414 | + assert(&Module == &MDesc.getModule()); |
| 415 | + MDesc.saveSplitInformationAsMetadata(); |
| 416 | + |
| 417 | + RTCBundleInfo BundleInfo; |
| 418 | + BundleInfo.SymbolTable = |
| 419 | + decltype(BundleInfo.SymbolTable){MDesc.entries().size()}; |
| 420 | + transform(MDesc.entries(), BundleInfo.SymbolTable.begin(), |
| 421 | + [](Function *F) { return F->getName(); }); |
| 422 | + |
| 423 | + // TODO: Determine what is requested. |
| 424 | + GlobalBinImageProps PropReq{ |
| 425 | + /*EmitKernelParamInfo=*/true, /*EmitProgramMetadata=*/true, |
| 426 | + /*EmitExportedSymbols=*/true, /*EmitImportedSymbols=*/true, |
| 427 | + /*DeviceGlobals=*/false}; |
| 428 | + PropertySetRegistry Properties = |
| 429 | + computeModuleProperties(MDesc.getModule(), MDesc.entries(), PropReq); |
| 430 | + // TODO: Manually add `compile_target` property as in |
| 431 | + // `saveModuleProperties`? |
| 432 | + const auto &PropertySets = Properties.getPropSets(); |
| 433 | + |
| 434 | + BundleInfo.Properties = decltype(BundleInfo.Properties){PropertySets.size()}; |
| 435 | + for (auto &&[KV, FrozenPropSet] : zip(PropertySets, BundleInfo.Properties)) { |
| 436 | + const auto &PropertySetName = KV.first; |
| 437 | + const auto &PropertySet = KV.second; |
| 438 | + FrozenPropertySet FPS{PropertySetName.str(), PropertySet.size()}; |
| 439 | + for (auto &&[KV2, FrozenProp] : zip(PropertySet, FPS.Values)) { |
| 440 | + const auto &PropertyName = KV2.first; |
| 441 | + const auto &PropertyValue = KV2.second; |
| 442 | + FrozenProp = PropertyValue.getType() == PropertyValue::Type::UINT32 |
| 443 | + ? FrozenPropertyValue{PropertyName.str(), |
| 444 | + PropertyValue.asUint32()} |
| 445 | + : FrozenPropertyValue{ |
| 446 | + PropertyName.str(), PropertyValue.asRawByteArray(), |
| 447 | + PropertyValue.getRawByteArraySize()}; |
| 448 | + } |
| 449 | + FrozenPropSet = std::move(FPS); |
| 450 | + }; |
| 451 | + |
| 452 | + // Regain ownership of the module. |
| 453 | + MDesc.releaseModulePtr().release(); |
| 454 | + |
| 455 | + return BundleInfo; |
| 456 | +} |
| 457 | + |
359 | 458 | Expected<InputArgList> |
360 | 459 | jit_compiler::parseUserArgs(View<const char *> UserArgs) { |
361 | 460 | unsigned MissingArgIndex, MissingArgCount; |
@@ -410,5 +509,17 @@ jit_compiler::parseUserArgs(View<const char *> UserArgs) { |
410 | 509 | } |
411 | 510 | } |
412 | 511 |
|
| 512 | + if (auto DCSMode = AL.getLastArgValue(OPT_fsycl_device_code_split_EQ, "none"); |
| 513 | + DCSMode != "none" && DCSMode != "auto") { |
| 514 | + return createStringError("Device code splitting is not yet supported"); |
| 515 | + } |
| 516 | + |
| 517 | + if (AL.hasArg(OPT_fsycl_device_code_split_esimd, |
| 518 | + OPT_fno_sycl_device_code_split_esimd)) { |
| 519 | + // TODO: There are more ESIMD-related options. |
| 520 | + return createStringError( |
| 521 | + "Runtime compilation of ESIMD kernels is not yet supported"); |
| 522 | + } |
| 523 | + |
413 | 524 | return Expected<InputArgList>{std::move(AL)}; |
414 | 525 | } |
0 commit comments