Skip to content

Commit 912b04d

Browse files
committed
[SYCL] Simplify detection of device bitcode libraries for RTC
By using installation detectors we can move away from creating full toolchains and querying them.
1 parent 5a795f6 commit 912b04d

File tree

3 files changed

+61
-62
lines changed

3 files changed

+61
-62
lines changed

clang/lib/Driver/ToolChains/AMDGPU.cpp

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -81,11 +81,18 @@ RocmInstallationDetector::CommonBitcodeLibsPreferences::
8181
options::OPT_fno_fast_math, false);
8282

8383
const bool DefaultSqrt = IsKnownOffloading ? true : false;
84-
CorrectSqrt =
85-
DriverArgs.hasArg(options::OPT_cl_fp32_correctly_rounded_divide_sqrt) ||
86-
DriverArgs.hasFlag(
87-
options::OPT_fhip_fp32_correctly_rounded_divide_sqrt,
88-
options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt, DefaultSqrt);
84+
85+
if (DeviceOffloadingKind == Action::OFK_SYCL)
86+
// When using SYCL, sqrt is only correctly rounded if the flag is specified.
87+
CorrectSqrt = DriverArgs.hasArg(options::OPT_foffload_fp32_prec_sqrt);
88+
else
89+
CorrectSqrt =
90+
DriverArgs.hasArg(options::OPT_cl_fp32_correctly_rounded_divide_sqrt) ||
91+
DriverArgs.hasFlag(
92+
options::OPT_fhip_fp32_correctly_rounded_divide_sqrt,
93+
options::OPT_fno_hip_fp32_correctly_rounded_divide_sqrt,
94+
DefaultSqrt);
95+
8996
// GPU Sanitizer currently only supports ASan and is enabled through host
9097
// ASan.
9198
GPUSan = (DriverArgs.hasFlag(options::OPT_fgpu_sanitize,

clang/lib/Driver/ToolChains/SYCL.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,8 +21,6 @@ namespace driver {
2121
// AOT compiler).
2222
StringRef mapIntelGPUArchName(StringRef ArchName);
2323

24-
25-
2624
class Command;
2725

2826
namespace tools {

sycl-jit/jit-compiler/lib/rtc/DeviceCompilation.cpp

Lines changed: 49 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,11 @@
1515
#include <clang/Basic/Version.h>
1616
#include <clang/CodeGen/CodeGenAction.h>
1717
#include <clang/Driver/Compilation.h>
18+
#include <clang/Driver/CudaInstallationDetector.h>
1819
#include <clang/Driver/Driver.h>
20+
#include <clang/Driver/LazyDetector.h>
1921
#include <clang/Driver/Options.h>
22+
#include <clang/Driver/RocmInstallationDetector.h>
2023
#include <clang/Driver/ToolChain.h>
2124
#include <clang/Frontend/ChainedDiagnosticConsumer.h>
2225
#include <clang/Frontend/CompilerInstance.h>
@@ -42,6 +45,7 @@
4245
#include <llvm/Support/Base64.h>
4346
#include <llvm/Support/PropertySetIO.h>
4447
#include <llvm/Support/TimeProfiler.h>
48+
#include <llvm/TargetParser/TargetParser.h>
4549

4650
#include <algorithm>
4751
#include <array>
@@ -622,67 +626,57 @@ Error jit_compiler::linkDeviceLibraries(llvm::Module &Module,
622626

623627
// For GPU targets we need to link against vendor provided libdevice.
624628
if (IsCudaHIP) {
625-
std::string Argv0 = DPCPPRoot + "/bin/clang++";
626629
Triple T{Module.getTargetTriple()};
627-
IntrusiveRefCntPtr<OverlayFileSystem> OFS{
628-
new OverlayFileSystem{getRealFileSystem()}};
629-
IntrusiveRefCntPtr<InMemoryFileSystem> VFS{new InMemoryFileSystem};
630-
std::string CppFileName{"a.cpp"};
631-
VFS->addFile(CppFileName, /*ModificationTime=*/0,
632-
MemoryBuffer::getMemBuffer("", ""));
633-
OFS->pushOverlay(VFS);
634-
Driver D{Argv0, T.getTriple(), Diags, "dpcpp compiler driver", OFS};
635-
636-
SmallVector<std::string> CommandLine;
637-
CommandLine.push_back(Argv0);
638-
adjustArgs(UserArgList, DPCPPRoot, Format, CommandLine);
639-
CommandLine.push_back(CppFileName);
640-
SmallVector<const char *> CommandLineCStr(CommandLine.size());
641-
llvm::transform(CommandLine, CommandLineCStr.begin(),
642-
[](const auto &S) { return S.c_str(); });
643-
644-
Compilation *C = D.BuildCompilation(CommandLineCStr);
645-
if (!C) {
646-
return createStringError("Unable to construct driver for CUDA/HIP");
647-
}
648-
649-
const ToolChain *OffloadTC =
650-
C->getSingleOffloadToolChain<Action::OFK_SYCL>();
651-
InputArgList EmptyArgList;
652-
auto Archs =
653-
D.getOffloadArchs(*C, EmptyArgList, Action::OFK_SYCL, OffloadTC);
654-
assert(Archs.size() == 1 &&
655-
"Offload toolchain should be configured to single architecture");
656-
StringRef CPU = *Archs.begin();
657-
658-
// Pass only `-march=` or `-mcpu=` with the GPU arch determined by the
659-
// driver to `getDeviceLibs`.
660-
DerivedArgList CPUArgList{EmptyArgList};
630+
Driver D{(Twine(DPCPPRoot) + "/bin/clang++").str(), T.getTriple(), Diags};
631+
auto [CPU, Features] =
632+
Translator::getTargetCPUAndFeatureAttrs(&Module, "", Format);
633+
(void)Features;
634+
// Helper lambda to link modules.
635+
auto LinkInLib = [&](const StringRef LibDevice) -> Error {
636+
ModuleUPtr LibDeviceModule;
637+
if (auto Error = loadBitcodeLibrary(LibDevice, Context)
638+
.moveInto(LibDeviceModule)) {
639+
return Error;
640+
}
641+
if (Linker::linkModules(Module, std::move(LibDeviceModule),
642+
Linker::LinkOnlyNeeded)) {
643+
return createStringError("Unable to link libdevice: %s",
644+
BuildLog.c_str());
645+
}
646+
return Error::success();
647+
};
648+
SmallVector<std::string, 12> LibDeviceFiles;
661649
if (Format == BinaryFormat::PTX) {
662-
CPUArgList.AddJoinedArg(nullptr, D.getOpts().getOption(OPT_march_EQ),
663-
CPU);
650+
// For NVPTX we can get away with CudaInstallationDetector.
651+
LazyDetector<CudaInstallationDetector> CudaInstallation{D, T,
652+
UserArgList};
653+
auto LibDevice = CudaInstallation->getLibDeviceFile(CPU);
654+
if (LibDevice.empty()) {
655+
return createStringError("Unable to find Cuda libdevice");
656+
}
657+
LibDeviceFiles.push_back(LibDevice);
664658
} else {
665-
CPUArgList.AddJoinedArg(nullptr, D.getOpts().getOption(OPT_mcpu_EQ), CPU);
666-
}
667-
668-
SmallVector<ToolChain::BitCodeLibraryInfo, 12> CommonDeviceLibs =
669-
OffloadTC->getDeviceLibs(CPUArgList, Action::OffloadKind::OFK_SYCL);
670-
if (CommonDeviceLibs.empty()) {
671-
return createStringError("Unable to find common device libraries");
659+
LazyDetector<RocmInstallationDetector> RocmInstallation{D, T,
660+
UserArgList};
661+
RocmInstallation->detectDeviceLibrary();
662+
StringRef CanonArch =
663+
llvm::AMDGPU::getArchNameAMDGCN(llvm::AMDGPU::parseArchAMDGCN(CPU));
664+
StringRef LibDeviceFile = RocmInstallation->getLibDeviceFile(CanonArch);
665+
auto CommonBCLibs = RocmInstallation->getCommonBitcodeLibs(
666+
UserArgList, LibDeviceFile, CPU, Action::OFK_SYCL,
667+
/*NeedsASanRT=*/false);
668+
if (CommonBCLibs.empty()) {
669+
return createStringError("Unable to find ROCm bitcode libraries");
670+
}
671+
for (auto &Lib : CommonBCLibs) {
672+
LibDeviceFiles.push_back(Lib.Path);
673+
}
672674
}
673-
674-
for (auto &Lib : CommonDeviceLibs) {
675-
ModuleUPtr LibModule;
676-
if (auto Error =
677-
loadBitcodeLibrary(Lib.Path, Context).moveInto(LibModule)) {
675+
for (auto &LibDeviceFile : LibDeviceFiles) {
676+
// llvm::Error converts to false on success.
677+
if (auto Error = LinkInLib(LibDeviceFile)) {
678678
return Error;
679679
}
680-
681-
if (Linker::linkModules(Module, std::move(LibModule),
682-
Linker::LinkOnlyNeeded)) {
683-
return createStringError("Unable to link device library %s: %s",
684-
Lib.Path.c_str(), BuildLog.c_str());
685-
}
686680
}
687681
}
688682

0 commit comments

Comments
 (0)