Skip to content
3 changes: 2 additions & 1 deletion clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2719,7 +2719,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
ConstWithoutErrnoOrExceptions && ErrnoOverridenToFalseWithOpt;
}
if (GenerateIntrinsics &&
!(getLangOpts().SYCLIsDevice && getTarget().getTriple().isNVPTX())) {
!(getLangOpts().SYCLIsDevice && (getTarget().getTriple().isNVPTX() ||
getTarget().getTriple().isAMDGCN()))) {
switch (BuiltinIDIfNoAsmLabel) {
case Builtin::BIceil:
case Builtin::BIceilf:
Expand Down
9 changes: 4 additions & 5 deletions clang/lib/Driver/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5533,7 +5533,7 @@ class OffloadingActionBuilder final {
// AOT compilation.
bool SYCLDeviceLibLinked = false;
Action *NativeCPULib = nullptr;
if (IsSPIR || IsNVPTX || IsSYCLNativeCPU) {
if (IsSPIR || IsNVPTX || IsAMDGCN || IsSYCLNativeCPU) {
bool UseJitLink =
IsSPIR &&
Args.hasFlag(options::OPT_fsycl_device_lib_jit_link,
Expand Down Expand Up @@ -5848,10 +5848,9 @@ class OffloadingActionBuilder final {
++NumOfDeviceLibLinked;
Arg *InputArg = MakeInputArg(Args, C.getDriver().getOpts(),
Args.MakeArgString(LibName));
if (TC->getTriple().isNVPTX() ||
(TC->getTriple().isSPIR() &&
TC->getTriple().getSubArch() ==
llvm::Triple::SPIRSubArch_fpga)) {
if (TC->getTriple().isSPIR() &&
TC->getTriple().getSubArch() ==
llvm::Triple::SPIRSubArch_fpga) {
auto *SYCLDeviceLibsInputAction =
C.MakeAction<InputAction>(*InputArg, types::TY_Object);
auto *SYCLDeviceLibsUnbundleAction =
Expand Down
42 changes: 31 additions & 11 deletions clang/lib/Driver/ToolChains/SYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,9 +165,9 @@ static bool selectBfloatLibs(const llvm::Triple &Triple, const Compilation &C,

// spir64 target is actually JIT compilation, so we defer selection of
// bfloat16 libraries to runtime. For AOT we need libraries, but skip
// for Nvidia.
NeedLibs =
Triple.getSubArch() != llvm::Triple::NoSubArch && !Triple.isNVPTX();
// for Nvidia and AMD.
NeedLibs = Triple.getSubArch() != llvm::Triple::NoSubArch &&
!Triple.isNVPTX() && !Triple.isAMDGCN();
UseNative = false;
if (NeedLibs && Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen &&
C.hasOffloadToolChain<Action::OFK_SYCL>()) {
Expand Down Expand Up @@ -212,6 +212,10 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple,
SmallVector<std::string, 8> LibraryList;
const llvm::opt::ArgList &Args = C.getArgs();

// For NVPTX and AMDGCN we only use one single bitcode library and ignore
// manually specified SYCL device libraries.
bool IgnoreSingleLibs = TargetTriple.isNVPTX() || TargetTriple.isAMDGCN();

struct DeviceLibOptInfo {
StringRef DeviceLibName;
StringRef DeviceLibOption;
Expand All @@ -233,10 +237,13 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple,
if (A->getOption().matches(options::OPT_fno_sycl_device_lib_EQ))
NoDeviceLibs = true;

bool PrintUnusedLibWarning = false;
for (StringRef Val : A->getValues()) {
if (Val == "all") {
for (const auto &K : DeviceLibLinkInfo.keys())
DeviceLibLinkInfo[K] = true && (!NoDeviceLibs || K == "internal");
DeviceLibLinkInfo[K] = (!IgnoreSingleLibs && !NoDeviceLibs) ||
(K == "internal" && NoDeviceLibs);
PrintUnusedLibWarning = false;
break;
}
auto LinkInfoIter = DeviceLibLinkInfo.find(Val);
Expand All @@ -247,10 +254,24 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple,
C.getDriver().Diag(diag::err_drv_unsupported_option_argument)
<< A->getSpelling() << Val;
}
DeviceLibLinkInfo[Val] = true && !NoDeviceLibs;
DeviceLibLinkInfo[Val] = !NoDeviceLibs && !IgnoreSingleLibs;
PrintUnusedLibWarning = IgnoreSingleLibs && !NoDeviceLibs;
}
if (PrintUnusedLibWarning)
C.getDriver().Diag(diag::warn_ignored_clang_option)
<< A->getSpelling() << A->getAsString(Args);
}
}

if (TargetTriple.isNVPTX() && !NoDeviceLibs)
LibraryList.push_back(Args.MakeArgString("devicelib--cuda.bc"));

if (TargetTriple.isAMDGCN() && !NoDeviceLibs)
LibraryList.push_back(Args.MakeArgString("devicelib--amd.bc"));

if (IgnoreSingleLibs && !NoDeviceLibs)
return LibraryList;

using SYCLDeviceLibsList = SmallVector<DeviceLibOptInfo, 5>;

const SYCLDeviceLibsList SYCLDeviceWrapperLibs = {
Expand Down Expand Up @@ -304,10 +325,9 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple,
C.getDefaultToolChain().getTriple().isWindowsMSVCEnvironment();
bool IsNewOffload = C.getDriver().getUseNewOffloadingDriver();
StringRef LibSuffix = ".bc";
if (TargetTriple.isNVPTX() ||
(TargetTriple.isSPIR() &&
TargetTriple.getSubArch() == llvm::Triple::SPIRSubArch_fpga))
// For NVidia or FPGA, we are unbundling objects.
if (TargetTriple.isSPIR() &&
TargetTriple.getSubArch() == llvm::Triple::SPIRSubArch_fpga)
// For FPGA, we are unbundling objects.
LibSuffix = IsWindowsMSVCEnv ? ".obj" : ".o";
if (IsNewOffload)
// For new offload model, we use packaged .bc files.
Expand All @@ -323,7 +343,7 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple,
};

addLibraries(SYCLDeviceWrapperLibs);
if (IsSpirvAOT || TargetTriple.isNVPTX())
if (IsSpirvAOT)
addLibraries(SYCLDeviceFallbackLibs);

bool NativeBfloatLibs;
Expand Down Expand Up @@ -551,7 +571,7 @@ const char *SYCL::Linker::constructLLVMLinkCommand(
this->getToolChain().getTriple().getSubArch() ==
llvm::Triple::SPIRSubArch_fpga;
StringRef LibPostfix = ".bc";
if (IsNVPTX || IsFPGA) {
if (IsFPGA) {
LibPostfix = ".o";
if (HostTC->getTriple().isWindowsMSVCEnvironment() &&
C.getDriver().IsCLMode())
Expand Down
4 changes: 3 additions & 1 deletion clang/test/CodeGenSYCL/sycl-libdevice-cmath.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,10 @@
// intrinsics. This allows the driver to link in the libdevice definitions for
// cosf etc. later in the driver flow.

// RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-cuda -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-cuda -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-cuda -ffast-math -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 %s -fsycl-is-device -triple amdgcn-amd-amdhsa -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 %s -fsycl-is-device -triple amdgcn-amd-amdhsa -ffast-math -emit-llvm -o - | FileCheck %s

#include "Inputs/sycl.hpp"

Expand Down
24 changes: 13 additions & 11 deletions clang/test/Driver/sycl-offload-amdgcn.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,17 +37,19 @@
// CHK-PHASES-NO-CC: 7: backend, {6}, assembler, (host-sycl)
// CHK-PHASES-NO-CC: 8: assembler, {7}, object, (host-sycl)
// CHK-PHASES-NO-CC: 9: linker, {4}, ir, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 10: sycl-post-link, {9}, ir, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 11: file-table-tform, {10}, ir, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 12: backend, {11}, assembler, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 13: assembler, {12}, object, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 14: linker, {13}, image, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 15: linker, {14}, hip-fatbin, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 16: foreach, {11, 15}, hip-fatbin, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 17: file-table-tform, {10, 16}, tempfiletable, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 18: clang-offload-wrapper, {17}, object, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 19: offload, "device-sycl (amdgcn-amd-amdhsa:gfx906)" {18}, object
// CHK-PHASES-NO-CC: 20: linker, {8, 19}, image, (host-sycl)
// CHK-PHASES-NO-CC: 10: input, "{{.*}}devicelib--amd.bc", ir, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 11: linker, {9, 10}, ir, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 12: sycl-post-link, {11}, ir, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 13: file-table-tform, {12}, ir, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 14: backend, {13}, assembler, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 15: assembler, {14}, object, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 16: linker, {15}, image, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 17: linker, {16}, hip-fatbin, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 18: foreach, {13, 17}, hip-fatbin, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 19: file-table-tform, {12, 18}, tempfiletable, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 20: clang-offload-wrapper, {19}, object, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 21: offload, "device-sycl (amdgcn-amd-amdhsa:gfx906)" {20}, object
// CHK-PHASES-NO-CC: 22: linker, {8, 21}, image, (host-sycl)

/// Check that we only unbundle an archive once.
// RUN: %clangxx -### -target x86_64-unknown-linux-gnu -fsycl -nogpulib \
Expand Down
76 changes: 32 additions & 44 deletions clang/test/Driver/sycl-offload-nvptx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,28 +53,22 @@
// CHK-PHASES-NO-CC: 7: backend, {6}, assembler, (host-sycl)
// CHK-PHASES-NO-CC: 8: assembler, {7}, object, (host-sycl)
// CHK-PHASES-NO-CC: 9: linker, {4}, ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 10: input, "{{.*}}libsycl-itt-user-wrappers.o{{.*}}", object
// CHK-PHASES-NO-CC: 11: clang-offload-unbundler, {10}, object
// CHK-PHASES-NO-CC: 12: offload, " (nvptx64-nvidia-cuda)" {11}, object
// CHK-PHASES-NO-CC: 13: input, "{{.*}}libsycl-itt-compiler-wrappers.o{{.*}}", object
// CHK-PHASES-NO-CC: 14: clang-offload-unbundler, {13}, object
// CHK-PHASES-NO-CC: 15: offload, " (nvptx64-nvidia-cuda)" {14}, object
// CHK-PHASES-NO-CC: 16: input, "{{.*}}libsycl-itt-stubs.o{{.*}}", object
// CHK-PHASES-NO-CC: 17: clang-offload-unbundler, {16}, object
// CHK-PHASES-NO-CC: 18: offload, " (nvptx64-nvidia-cuda)" {17}, object
// CHK-PHASES-NO-CC: 19: input, "{{.*}}nvidiacl{{.*}}", ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 20: input, "{{.*}}libdevice{{.*}}", ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 21: linker, {9, 12, 15, 18, 19, 20}, ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 22: sycl-post-link, {21}, ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 23: file-table-tform, {22}, ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 24: backend, {23}, assembler, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 25: assembler, {24}, object, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 26: linker, {24, 25}, cuda-fatbin, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 27: foreach, {23, 26}, cuda-fatbin, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 28: file-table-tform, {22, 27}, tempfiletable, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 29: clang-offload-wrapper, {28}, object, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 30: offload, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {29}, object
// CHK-PHASES-NO-CC: 31: linker, {8, 30}, image, (host-sycl)
// CHK-PHASES-NO-CC: 10: input, "{{.*}}libsycl-itt-user-wrappers.bc", ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 11: input, "{{.*}}libsycl-itt-compiler-wrappers.bc", ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 12: input, "{{.*}}libsycl-itt-stubs.bc", ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 13: input, "{{.*}}nvidiacl{{.*}}", ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 14: input, "{{.*}}libdevice{{.*}}", ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 15: linker, {9, 10, 11, 12, 13, 14}, ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 16: sycl-post-link, {15}, ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 17: file-table-tform, {16}, ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 18: backend, {17}, assembler, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 19: assembler, {18}, object, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 20: linker, {18, 19}, cuda-fatbin, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 21: foreach, {17, 20}, cuda-fatbin, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 22: file-table-tform, {16, 21}, tempfiletable, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 23: clang-offload-wrapper, {22}, object, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 24: offload, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {23}, object
// CHK-PHASES-NO-CC: 25: linker, {8, 24}, image, (host-sycl)
//
/// Check phases specifying a compute capability.
// RUN: %clangxx -ccc-print-phases --sysroot=%S/Inputs/SYCL -std=c++11 \
Expand All @@ -97,28 +91,22 @@
// CHK-PHASES: 7: backend, {6}, assembler, (host-sycl)
// CHK-PHASES: 8: assembler, {7}, object, (host-sycl)
// CHK-PHASES: 9: linker, {4}, ir, (device-sycl, sm_35)
// CHK-PHASES: 10: input, "{{.*}}libsycl-itt-user-wrappers.o", object
// CHK-PHASES: 11: clang-offload-unbundler, {10}, object
// CHK-PHASES: 12: offload, " (nvptx64-nvidia-cuda)" {11}, object
// CHK-PHASES: 13: input, "{{.*}}libsycl-itt-compiler-wrappers.o", object
// CHK-PHASES: 14: clang-offload-unbundler, {13}, object
// CHK-PHASES: 15: offload, " (nvptx64-nvidia-cuda)" {14}, object
// CHK-PHASES: 16: input, "{{.*}}libsycl-itt-stubs.o", object
// CHK-PHASES: 17: clang-offload-unbundler, {16}, object
// CHK-PHASES: 18: offload, " (nvptx64-nvidia-cuda)" {17}, object
// CHK-PHASES: 19: input, "{{.*}}nvidiacl{{.*}}", ir, (device-sycl, sm_35)
// CHK-PHASES: 20: input, "{{.*}}libdevice{{.*}}", ir, (device-sycl, sm_35)
// CHK-PHASES: 21: linker, {9, 12, 15, 18, 19, 20}, ir, (device-sycl, sm_35)
// CHK-PHASES: 22: sycl-post-link, {21}, ir, (device-sycl, sm_35)
// CHK-PHASES: 23: file-table-tform, {22}, ir, (device-sycl, sm_35)
// CHK-PHASES: 24: backend, {23}, assembler, (device-sycl, sm_35)
// CHK-PHASES: 25: assembler, {24}, object, (device-sycl, sm_35)
// CHK-PHASES: 26: linker, {24, 25}, cuda-fatbin, (device-sycl, sm_35)
// CHK-PHASES: 27: foreach, {23, 26}, cuda-fatbin, (device-sycl, sm_35)
// CHK-PHASES: 28: file-table-tform, {22, 27}, tempfiletable, (device-sycl, sm_35)
// CHK-PHASES: 29: clang-offload-wrapper, {28}, object, (device-sycl, sm_35)
// CHK-PHASES: 30: offload, "device-sycl (nvptx64-nvidia-cuda:sm_35)" {29}, object
// CHK-PHASES: 31: linker, {8, 30}, image, (host-sycl)
// CHK-PHASES: 10: input, "{{.*}}libsycl-itt-user-wrappers.bc", ir, (device-sycl, sm_35)
// CHK-PHASES: 11: input, "{{.*}}libsycl-itt-compiler-wrappers.bc", ir, (device-sycl, sm_35)
// CHK-PHASES: 12: input, "{{.*}}libsycl-itt-stubs.bc", ir, (device-sycl, sm_35)
// CHK-PHASES: 13: input, "{{.*}}nvidiacl{{.*}}", ir, (device-sycl, sm_35)
// CHK-PHASES: 14: input, "{{.*}}libdevice{{.*}}", ir, (device-sycl, sm_35)
// CHK-PHASES: 15: linker, {9, 10, 11, 12, 13, 14}, ir, (device-sycl, sm_35)
// CHK-PHASES: 16: sycl-post-link, {15}, ir, (device-sycl, sm_35)
// CHK-PHASES: 17: file-table-tform, {16}, ir, (device-sycl, sm_35)
// CHK-PHASES: 18: backend, {17}, assembler, (device-sycl, sm_35)
// CHK-PHASES: 19: assembler, {18}, object, (device-sycl, sm_35)
// CHK-PHASES: 20: linker, {18, 19}, cuda-fatbin, (device-sycl, sm_35)
// CHK-PHASES: 21: foreach, {17, 20}, cuda-fatbin, (device-sycl, sm_35)
// CHK-PHASES: 22: file-table-tform, {16, 21}, tempfiletable, (device-sycl, sm_35)
// CHK-PHASES: 23: clang-offload-wrapper, {22}, object, (device-sycl, sm_35)
// CHK-PHASES: 24: offload, "device-sycl (nvptx64-nvidia-cuda:sm_35)" {23}, object
// CHK-PHASES: 25: linker, {8, 24}, image, (host-sycl)

/// Check calling preprocessor only
// RUN: %clangxx -E -fsycl -fsycl-targets=nvptx64-nvidia-cuda -ccc-print-phases %s 2>&1 \
Expand Down
2 changes: 1 addition & 1 deletion libclc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,7 @@ if( ENABLE_RUNTIME_SUBNORMAL )
foreach( file subnormal_use_default subnormal_disable )
link_bc(
TARGET ${file}
RSP_DIR ${LIBCLC_ARCH_OBJFILE_DIR}
INPUTS ${CMAKE_CURRENT_SOURCE_DIR}/generic/lib/${file}.ll
)
install( FILES $<TARGET_PROPERTY:${file},TARGET_FILE> ARCHIVE
Expand Down Expand Up @@ -405,7 +406,6 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
# Enable SPIR-V builtin function declarations, so they don't
# have to be explicity declared in the soruce.
list( APPEND flags -Xclang -fdeclare-spirv-builtins)

set( LIBCLC_ARCH_OBJFILE_DIR "${LIBCLC_OBJFILE_DIR}/${arch_suffix}" )
file( MAKE_DIRECTORY ${LIBCLC_ARCH_OBJFILE_DIR} )

Expand Down
Loading