|
15 | 15 | #include <clang/Basic/Version.h>
|
16 | 16 | #include <clang/CodeGen/CodeGenAction.h>
|
17 | 17 | #include <clang/Driver/Compilation.h>
|
| 18 | +#include <clang/Driver/CudaInstallationDetector.h> |
18 | 19 | #include <clang/Driver/Driver.h>
|
| 20 | +#include <clang/Driver/LazyDetector.h> |
19 | 21 | #include <clang/Driver/Options.h>
|
| 22 | +#include <clang/Driver/RocmInstallationDetector.h> |
20 | 23 | #include <clang/Driver/ToolChain.h>
|
21 | 24 | #include <clang/Frontend/ChainedDiagnosticConsumer.h>
|
22 | 25 | #include <clang/Frontend/CompilerInstance.h>
|
|
42 | 45 | #include <llvm/Support/Base64.h>
|
43 | 46 | #include <llvm/Support/PropertySetIO.h>
|
44 | 47 | #include <llvm/Support/TimeProfiler.h>
|
| 48 | +#include <llvm/TargetParser/TargetParser.h> |
45 | 49 |
|
46 | 50 | #include <algorithm>
|
47 | 51 | #include <array>
|
@@ -622,67 +626,57 @@ Error jit_compiler::linkDeviceLibraries(llvm::Module &Module,
|
622 | 626 |
|
623 | 627 | // For GPU targets we need to link against vendor provided libdevice.
|
624 | 628 | if (IsCudaHIP) {
|
625 |
| - std::string Argv0 = DPCPPRoot + "/bin/clang++"; |
626 | 629 | Triple T{Module.getTargetTriple()};
|
627 |
| - IntrusiveRefCntPtr<OverlayFileSystem> OFS{ |
628 |
| - new OverlayFileSystem{getRealFileSystem()}}; |
629 |
| - IntrusiveRefCntPtr<InMemoryFileSystem> VFS{new InMemoryFileSystem}; |
630 |
| - std::string CppFileName{"a.cpp"}; |
631 |
| - VFS->addFile(CppFileName, /*ModificationTime=*/0, |
632 |
| - MemoryBuffer::getMemBuffer("", "")); |
633 |
| - OFS->pushOverlay(VFS); |
634 |
| - Driver D{Argv0, T.getTriple(), Diags, "dpcpp compiler driver", OFS}; |
635 |
| - |
636 |
| - SmallVector<std::string> CommandLine; |
637 |
| - CommandLine.push_back(Argv0); |
638 |
| - adjustArgs(UserArgList, DPCPPRoot, Format, CommandLine); |
639 |
| - CommandLine.push_back(CppFileName); |
640 |
| - SmallVector<const char *> CommandLineCStr(CommandLine.size()); |
641 |
| - llvm::transform(CommandLine, CommandLineCStr.begin(), |
642 |
| - [](const auto &S) { return S.c_str(); }); |
643 |
| - |
644 |
| - Compilation *C = D.BuildCompilation(CommandLineCStr); |
645 |
| - if (!C) { |
646 |
| - return createStringError("Unable to construct driver for CUDA/HIP"); |
647 |
| - } |
648 |
| - |
649 |
| - const ToolChain *OffloadTC = |
650 |
| - C->getSingleOffloadToolChain<Action::OFK_SYCL>(); |
651 |
| - InputArgList EmptyArgList; |
652 |
| - auto Archs = |
653 |
| - D.getOffloadArchs(*C, EmptyArgList, Action::OFK_SYCL, OffloadTC); |
654 |
| - assert(Archs.size() == 1 && |
655 |
| - "Offload toolchain should be configured to single architecture"); |
656 |
| - StringRef CPU = *Archs.begin(); |
657 |
| - |
658 |
| - // Pass only `-march=` or `-mcpu=` with the GPU arch determined by the |
659 |
| - // driver to `getDeviceLibs`. |
660 |
| - DerivedArgList CPUArgList{EmptyArgList}; |
| 630 | + Driver D{(Twine(DPCPPRoot) + "/bin/clang++").str(), T.getTriple(), Diags}; |
| 631 | + auto [CPU, Features] = |
| 632 | + Translator::getTargetCPUAndFeatureAttrs(&Module, "", Format); |
| 633 | + (void)Features; |
| 634 | + // Helper lambda to link modules. |
| 635 | + auto LinkInLib = [&](const StringRef LibDevice) -> Error { |
| 636 | + ModuleUPtr LibDeviceModule; |
| 637 | + if (auto Error = loadBitcodeLibrary(LibDevice, Context) |
| 638 | + .moveInto(LibDeviceModule)) { |
| 639 | + return Error; |
| 640 | + } |
| 641 | + if (Linker::linkModules(Module, std::move(LibDeviceModule), |
| 642 | + Linker::LinkOnlyNeeded)) { |
| 643 | + return createStringError("Unable to link libdevice: %s", |
| 644 | + BuildLog.c_str()); |
| 645 | + } |
| 646 | + return Error::success(); |
| 647 | + }; |
| 648 | + SmallVector<std::string, 12> LibDeviceFiles; |
661 | 649 | if (Format == BinaryFormat::PTX) {
|
662 |
| - CPUArgList.AddJoinedArg(nullptr, D.getOpts().getOption(OPT_march_EQ), |
663 |
| - CPU); |
| 650 | + // For NVPTX we can get away with CudaInstallationDetector. |
| 651 | + LazyDetector<CudaInstallationDetector> CudaInstallation{D, T, |
| 652 | + UserArgList}; |
| 653 | + auto LibDevice = CudaInstallation->getLibDeviceFile(CPU); |
| 654 | + if (LibDevice.empty()) { |
| 655 | + return createStringError("Unable to find Cuda libdevice"); |
| 656 | + } |
| 657 | + LibDeviceFiles.push_back(LibDevice); |
664 | 658 | } else {
|
665 |
| - CPUArgList.AddJoinedArg(nullptr, D.getOpts().getOption(OPT_mcpu_EQ), CPU); |
666 |
| - } |
667 |
| - |
668 |
| - SmallVector<ToolChain::BitCodeLibraryInfo, 12> CommonDeviceLibs = |
669 |
| - OffloadTC->getDeviceLibs(CPUArgList, Action::OffloadKind::OFK_SYCL); |
670 |
| - if (CommonDeviceLibs.empty()) { |
671 |
| - return createStringError("Unable to find common device libraries"); |
| 659 | + LazyDetector<RocmInstallationDetector> RocmInstallation{D, T, |
| 660 | + UserArgList}; |
| 661 | + RocmInstallation->detectDeviceLibrary(); |
| 662 | + StringRef CanonArch = |
| 663 | + llvm::AMDGPU::getArchNameAMDGCN(llvm::AMDGPU::parseArchAMDGCN(CPU)); |
| 664 | + StringRef LibDeviceFile = RocmInstallation->getLibDeviceFile(CanonArch); |
| 665 | + auto CommonBCLibs = RocmInstallation->getCommonBitcodeLibs( |
| 666 | + UserArgList, LibDeviceFile, CPU, Action::OFK_SYCL, |
| 667 | + /*NeedsASanRT=*/false); |
| 668 | + if (CommonBCLibs.empty()) { |
| 669 | + return createStringError("Unable to find ROCm bitcode libraries"); |
| 670 | + } |
| 671 | + for (auto &Lib : CommonBCLibs) { |
| 672 | + LibDeviceFiles.push_back(Lib.Path); |
| 673 | + } |
672 | 674 | }
|
673 |
| - |
674 |
| - for (auto &Lib : CommonDeviceLibs) { |
675 |
| - ModuleUPtr LibModule; |
676 |
| - if (auto Error = |
677 |
| - loadBitcodeLibrary(Lib.Path, Context).moveInto(LibModule)) { |
| 675 | + for (auto &LibDeviceFile : LibDeviceFiles) { |
| 676 | + // llvm::Error converts to false on success. |
| 677 | + if (auto Error = LinkInLib(LibDeviceFile)) { |
678 | 678 | return Error;
|
679 | 679 | }
|
680 |
| - |
681 |
| - if (Linker::linkModules(Module, std::move(LibModule), |
682 |
| - Linker::LinkOnlyNeeded)) { |
683 |
| - return createStringError("Unable to link device library %s: %s", |
684 |
| - Lib.Path.c_str(), BuildLog.c_str()); |
685 |
| - } |
686 | 680 | }
|
687 | 681 | }
|
688 | 682 |
|
|
0 commit comments