|
7 | 7 | //===----------------------------------------------------------------------===// |
8 | 8 |
|
9 | 9 | #include "DeviceCompilation.h" |
| 10 | +#include "ESIMD.h" |
10 | 11 |
|
11 | 12 | #include <clang/Basic/DiagnosticDriver.h> |
12 | 13 | #include <clang/Basic/Version.h> |
|
23 | 24 | #include <llvm/IRReader/IRReader.h> |
24 | 25 | #include <llvm/Linker/Linker.h> |
25 | 26 | #include <llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h> |
| 27 | +#include <llvm/SYCLLowerIR/ESIMD/LowerESIMD.h> |
| 28 | +#include <llvm/SYCLLowerIR/LowerInvokeSimd.h> |
26 | 29 | #include <llvm/SYCLLowerIR/ModuleSplitter.h> |
27 | 30 | #include <llvm/SYCLLowerIR/SYCLJointMatrixTransform.h> |
28 | 31 | #include <llvm/Support/PropertySetIO.h> |
@@ -376,42 +379,82 @@ template <class PassClass> static bool runModulePass(llvm::Module &M) { |
376 | 379 | return !Res.areAllPreserved(); |
377 | 380 | } |
378 | 381 |
|
379 | | -Expected<RTCBundleInfo> jit_compiler::performPostLink( |
380 | | - llvm::Module &Module, [[maybe_unused]] const InputArgList &UserArgList) { |
| 382 | +llvm::Expected<PostLinkResult> jit_compiler::performPostLink( |
| 383 | + std::unique_ptr<llvm::Module> Module, |
| 384 | + [[maybe_unused]] const llvm::opt::InputArgList &UserArgList) { |
381 | 385 | // This is a simplified version of `processInputModule` in |
382 | 386 | // `llvm/tools/sycl-post-link.cpp`. Assertions/TODOs point to functionality |
383 | 387 | // left out of the algorithm for now. |
384 | 388 |
|
385 | | - assert(!Module.getGlobalVariable("llvm.used") && |
386 | | - !Module.getGlobalVariable("llvm.compiler.used")); |
| 389 | + // TODO: SplitMode can be controlled by the user. |
| 390 | + const auto SplitMode = SPLIT_NONE; |
| 391 | + |
| 392 | + // TODO: EmitOnlyKernelsAsEntryPoints is controlled by |
| 393 | + // `shouldEmitOnlyKernelsAsEntryPoints` in |
| 394 | + // `clang/lib/Driver/ToolChains/Clang.cpp`. |
| 395 | + const bool EmitOnlyKernelsAsEntryPoints = true; |
| 396 | + |
| 397 | + // TODO: The optlevel passed to `sycl-post-link` is determined by |
| 398 | + // `getSYCLPostLinkOptimizationLevel` in |
| 399 | + // `clang/lib/Driver/ToolChains/Clang.cpp`. |
| 400 | + const bool PerformOpts = true; |
| 401 | + |
| 402 | + // Propagate ESIMD attribute to wrapper functions to prevent spurious splits |
| 403 | + // and kernel link errors. |
| 404 | + runModulePass<SYCLFixupESIMDKernelWrapperMDPass>(*Module); |
| 405 | + |
| 406 | + assert(!Module->getGlobalVariable("llvm.used") && |
| 407 | + !Module->getGlobalVariable("llvm.compiler.used")); |
387 | 408 | // Otherwise: Port over the `removeSYCLKernelsConstRefArray` and |
388 | 409 | // `removeDeviceGlobalFromCompilerUsed` methods. |
389 | 410 |
|
390 | | - assert(!isModuleUsingAsan(Module)); |
| 411 | + assert(!isModuleUsingAsan(*Module)); |
391 | 412 | // Otherwise: Need to instrument each image scope device globals if the module |
392 | 413 | // has been instrumented by sanitizer pass. |
393 | 414 |
|
394 | 415 | // Transform Joint Matrix builtin calls to align them with SPIR-V friendly |
395 | 416 | // LLVM IR specification. |
396 | | - runModulePass<SYCLJointMatrixTransformPass>(Module); |
| 417 | + runModulePass<SYCLJointMatrixTransformPass>(*Module); |
| 418 | + |
| 419 | + // Do invoke_simd processing before splitting because this: |
| 420 | + // - saves processing time (the pass is run once, even though on larger IR) |
| 421 | + // - doing it before SYCL/ESIMD splitting is required for correctness |
| 422 | + if (runModulePass<SYCLLowerInvokeSimdPass>(*Module)) { |
| 423 | + return createStringError("`invoke_simd` calls detected"); |
| 424 | + } |
397 | 425 |
|
398 | 426 | // TODO: Implement actual device code splitting. We're just using the splitter |
399 | 427 | // to obtain additional information about the module for now. |
400 | | - // TODO: EmitOnlyKernelsAsEntryPoints is controlled by |
401 | | - // `shouldEmitOnlyKernelsAsEntryPoints` in |
402 | | - // `clang/lib/Driver/ToolChains/Clang.cpp`. |
| 428 | + |
403 | 429 | std::unique_ptr<ModuleSplitterBase> Splitter = getDeviceCodeSplitter( |
404 | | - ModuleDesc{std::unique_ptr<llvm::Module>{&Module}}, SPLIT_NONE, |
405 | | - /*IROutputOnly=*/false, |
406 | | - /*EmitOnlyKernelsAsEntryPoints=*/true); |
407 | | - assert(Splitter->remainingSplits() == 1); |
| 430 | + ModuleDesc{std::move(Module)}, SplitMode, |
| 431 | + /*IROutputOnly=*/false, EmitOnlyKernelsAsEntryPoints); |
| 432 | + assert(Splitter->hasMoreSplits()); |
| 433 | + if (Splitter->remainingSplits() > 1) { |
| 434 | + return createStringError("Device code requires splitting"); |
| 435 | + } |
408 | 436 |
|
409 | 437 | // TODO: Call `verifyNoCrossModuleDeviceGlobalUsage` if device globals shall |
410 | 438 | // be processed. |
411 | 439 |
|
412 | | - assert(Splitter->hasMoreSplits()); |
413 | 440 | ModuleDesc MDesc = Splitter->nextSplit(); |
414 | | - assert(&Module == &MDesc.getModule()); |
| 441 | + |
| 442 | + // TODO: Call `MDesc.fixupLinkageOfDirectInvokeSimdTargets()` when |
| 443 | + // `invoke_simd` is supported. |
| 444 | + |
| 445 | + SmallVector<ModuleDesc, 2> ESIMDSplits = |
| 446 | + splitByESIMD(std::move(MDesc), EmitOnlyKernelsAsEntryPoints); |
| 447 | + assert(!ESIMDSplits.empty()); |
| 448 | + if (ESIMDSplits.size() > 1) { |
| 449 | + return createStringError("Mixing SYCL and ESIMD code is unsupported"); |
| 450 | + } |
| 451 | + MDesc = std::move(ESIMDSplits.front()); |
| 452 | + |
| 453 | + if (MDesc.isESIMD()) { |
| 454 | + // TODO: We're assuming ESIMD lowering is not deactivated (why would it?). |
| 455 | + lowerEsimdConstructs(MDesc, PerformOpts); |
| 456 | + } |
| 457 | + |
415 | 458 | MDesc.saveSplitInformationAsMetadata(); |
416 | 459 |
|
417 | 460 | RTCBundleInfo BundleInfo; |
@@ -448,10 +491,7 @@ Expected<RTCBundleInfo> jit_compiler::performPostLink( |
448 | 491 | } |
449 | 492 | }; |
450 | 493 |
|
451 | | - // Regain ownership of the module. |
452 | | - MDesc.releaseModulePtr().release(); |
453 | | - |
454 | | - return std::move(BundleInfo); |
| 494 | + return PostLinkResult{std::move(BundleInfo), MDesc.releaseModulePtr()}; |
455 | 495 | } |
456 | 496 |
|
457 | 497 | Expected<InputArgList> |
@@ -513,11 +553,9 @@ jit_compiler::parseUserArgs(View<const char *> UserArgs) { |
513 | 553 | return createStringError("Device code splitting is not yet supported"); |
514 | 554 | } |
515 | 555 |
|
516 | | - if (AL.hasArg(OPT_fsycl_device_code_split_esimd, |
517 | | - OPT_fno_sycl_device_code_split_esimd)) { |
518 | | - // TODO: There are more ESIMD-related options. |
519 | | - return createStringError( |
520 | | - "Runtime compilation of ESIMD kernels is not yet supported"); |
| 556 | + if (!AL.hasFlag(OPT_fsycl_device_code_split_esimd, |
| 557 | + OPT_fno_sycl_device_code_split_esimd, true)) { |
| 558 | + return createStringError("ESIMD device code split cannot be deactivated"); |
521 | 559 | } |
522 | 560 |
|
523 | 561 | if (AL.hasFlag(OPT_fsycl_dead_args_optimization, |
|
0 commit comments