|
7 | 7 | //===----------------------------------------------------------------------===// |
8 | 8 |
|
9 | 9 | #include "DeviceCompilation.h" |
| 10 | +#include "ESIMD.h" |
10 | 11 |
|
11 | 12 | #include <clang/Basic/DiagnosticDriver.h> |
12 | 13 | #include <clang/Basic/Version.h> |
|
27 | 28 | #include <llvm/IRReader/IRReader.h> |
28 | 29 | #include <llvm/Linker/Linker.h> |
29 | 30 | #include <llvm/SYCLLowerIR/ComputeModuleRuntimeInfo.h> |
| 31 | +#include <llvm/SYCLLowerIR/ESIMD/LowerESIMD.h> |
| 32 | +#include <llvm/SYCLLowerIR/LowerInvokeSimd.h> |
30 | 33 | #include <llvm/SYCLLowerIR/ModuleSplitter.h> |
31 | 34 | #include <llvm/SYCLLowerIR/SYCLJointMatrixTransform.h> |
32 | 35 | #include <llvm/Support/PropertySetIO.h> |
@@ -432,42 +435,84 @@ template <class PassClass> static bool runModulePass(llvm::Module &M) { |
432 | 435 | return !Res.areAllPreserved(); |
433 | 436 | } |
434 | 437 |
|
435 | | -Expected<RTCBundleInfo> jit_compiler::performPostLink( |
436 | | - llvm::Module &Module, [[maybe_unused]] const InputArgList &UserArgList) { |
| 438 | +llvm::Expected<PostLinkResult> jit_compiler::performPostLink( |
| 439 | + std::unique_ptr<llvm::Module> Module, |
| 440 | + [[maybe_unused]] const llvm::opt::InputArgList &UserArgList) { |
437 | 441 | // This is a simplified version of `processInputModule` in |
438 | 442 | // `llvm/tools/sycl-post-link.cpp`. Assertions/TODOs point to functionality |
439 | 443 | // left out of the algorithm for now. |
440 | 444 |
|
441 | | - assert(!Module.getGlobalVariable("llvm.used") && |
442 | | - !Module.getGlobalVariable("llvm.compiler.used")); |
| 445 | + // TODO: SplitMode can be controlled by the user. |
| 446 | + const auto SplitMode = SPLIT_NONE; |
| 447 | + |
| 448 | + // TODO: EmitOnlyKernelsAsEntryPoints is controlled by |
| 449 | + // `shouldEmitOnlyKernelsAsEntryPoints` in |
| 450 | + // `clang/lib/Driver/ToolChains/Clang.cpp`. |
| 451 | + const bool EmitOnlyKernelsAsEntryPoints = true; |
| 452 | + |
| 453 | + // TODO: The optlevel passed to `sycl-post-link` is determined by |
| 454 | + // `getSYCLPostLinkOptimizationLevel` in |
| 455 | + // `clang/lib/Driver/ToolChains/Clang.cpp`. |
| 456 | + const bool PerformOpts = true; |
| 457 | + |
| 458 | + // Propagate ESIMD attribute to wrapper functions to prevent spurious splits |
| 459 | + // and kernel link errors. |
| 460 | + runModulePass<SYCLFixupESIMDKernelWrapperMDPass>(*Module); |
| 461 | + |
| 462 | + assert(!Module->getGlobalVariable("llvm.used") && |
| 463 | + !Module->getGlobalVariable("llvm.compiler.used")); |
443 | 464 | // Otherwise: Port over the `removeSYCLKernelsConstRefArray` and |
444 | 465 | // `removeDeviceGlobalFromCompilerUsed` methods. |
445 | 466 |
|
446 | | - assert(!isModuleUsingAsan(Module)); |
| 467 | + assert(!isModuleUsingAsan(*Module)); |
447 | 468 | // Otherwise: Need to instrument each image scope device globals if the module |
448 | 469 | // has been instrumented by sanitizer pass. |
449 | 470 |
|
450 | 471 | // Transform Joint Matrix builtin calls to align them with SPIR-V friendly |
451 | 472 | // LLVM IR specification. |
452 | | - runModulePass<SYCLJointMatrixTransformPass>(Module); |
| 473 | + runModulePass<SYCLJointMatrixTransformPass>(*Module); |
| 474 | + |
| 475 | + // Do invoke_simd processing before splitting because this: |
| 476 | + // - saves processing time (the pass is run once, even though on larger IR) |
| 477 | + // - doing it before SYCL/ESIMD splitting is required for correctness |
| 478 | + if (runModulePass<SYCLLowerInvokeSimdPass>(*Module)) { |
| 479 | + return createStringError("`invoke_simd` calls detected"); |
| 480 | + } |
453 | 481 |
|
454 | 482 | // TODO: Implement actual device code splitting. We're just using the splitter |
455 | 483 | // to obtain additional information about the module for now. |
456 | | - // TODO: EmitOnlyKernelsAsEntryPoints is controlled by |
457 | | - // `shouldEmitOnlyKernelsAsEntryPoints` in |
458 | | - // `clang/lib/Driver/ToolChains/Clang.cpp`. |
| 484 | + |
459 | 485 | std::unique_ptr<ModuleSplitterBase> Splitter = getDeviceCodeSplitter( |
460 | | - ModuleDesc{std::unique_ptr<llvm::Module>{&Module}}, SPLIT_NONE, |
461 | | - /*IROutputOnly=*/false, |
462 | | - /*EmitOnlyKernelsAsEntryPoints=*/true); |
463 | | - assert(Splitter->remainingSplits() == 1); |
| 486 | + ModuleDesc{std::move(Module)}, SplitMode, |
| 487 | + /*IROutputOnly=*/false, EmitOnlyKernelsAsEntryPoints); |
| 488 | + assert(Splitter->hasMoreSplits()); |
| 489 | + if (Splitter->remainingSplits() > 1) { |
| 490 | + return createStringError("Device code requires splitting"); |
| 491 | + } |
464 | 492 |
|
465 | 493 | // TODO: Call `verifyNoCrossModuleDeviceGlobalUsage` if device globals shall |
466 | 494 | // be processed. |
467 | 495 |
|
468 | | - assert(Splitter->hasMoreSplits()); |
469 | 496 | ModuleDesc MDesc = Splitter->nextSplit(); |
470 | | - assert(&Module == &MDesc.getModule()); |
| 497 | + |
| 498 | + // TODO: Call `MDesc.fixupLinkageOfDirectInvokeSimdTargets()` when |
| 499 | + // `invoke_simd` is supported. |
| 500 | + |
| 501 | + SmallVector<ModuleDesc, 2> ESIMDSplits = |
| 502 | + splitByESIMD(std::move(MDesc), EmitOnlyKernelsAsEntryPoints); |
| 503 | + assert(!ESIMDSplits.empty()); |
| 504 | + if (ESIMDSplits.size() > 1) { |
| 505 | + return createStringError("Mixing SYCL and ESIMD code is unsupported"); |
| 506 | + } |
| 507 | + MDesc = std::move(ESIMDSplits.front()); |
| 508 | + |
| 509 | + if (MDesc.isESIMD()) { |
| 510 | + // `sycl-post-link` has a `-lower-esimd` option, but there's no clang driver |
| 511 | + // option to influence it. Rather, the driver sets it unconditionally in the |
| 512 | + // multi-file output mode, which we are mimicking here. |
| 513 | + lowerEsimdConstructs(MDesc, PerformOpts); |
| 514 | + } |
| 515 | + |
471 | 516 | MDesc.saveSplitInformationAsMetadata(); |
472 | 517 |
|
473 | 518 | RTCBundleInfo BundleInfo; |
@@ -504,10 +549,7 @@ Expected<RTCBundleInfo> jit_compiler::performPostLink( |
504 | 549 | } |
505 | 550 | }; |
506 | 551 |
|
507 | | - // Regain ownership of the module. |
508 | | - MDesc.releaseModulePtr().release(); |
509 | | - |
510 | | - return std::move(BundleInfo); |
| 552 | + return PostLinkResult{std::move(BundleInfo), MDesc.releaseModulePtr()}; |
511 | 553 | } |
512 | 554 |
|
513 | 555 | Expected<InputArgList> |
@@ -569,11 +611,9 @@ jit_compiler::parseUserArgs(View<const char *> UserArgs) { |
569 | 611 | return createStringError("Device code splitting is not yet supported"); |
570 | 612 | } |
571 | 613 |
|
572 | | - if (AL.hasArg(OPT_fsycl_device_code_split_esimd, |
573 | | - OPT_fno_sycl_device_code_split_esimd)) { |
574 | | - // TODO: There are more ESIMD-related options. |
575 | | - return createStringError( |
576 | | - "Runtime compilation of ESIMD kernels is not yet supported"); |
| 614 | + if (!AL.hasFlag(OPT_fsycl_device_code_split_esimd, |
| 615 | + OPT_fno_sycl_device_code_split_esimd, true)) { |
| 616 | + return createStringError("ESIMD device code split cannot be deactivated"); |
577 | 617 | } |
578 | 618 |
|
579 | 619 | if (AL.hasFlag(OPT_fsycl_dead_args_optimization, |
|
0 commit comments