Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 3 additions & 4 deletions clang/lib/Driver/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5848,10 +5848,9 @@ class OffloadingActionBuilder final {
++NumOfDeviceLibLinked;
Arg *InputArg = MakeInputArg(Args, C.getDriver().getOpts(),
Args.MakeArgString(LibName));
if (TC->getTriple().isNVPTX() ||
(TC->getTriple().isSPIR() &&
TC->getTriple().getSubArch() ==
llvm::Triple::SPIRSubArch_fpga)) {
if (TC->getTriple().isSPIR() &&
TC->getTriple().getSubArch() ==
llvm::Triple::SPIRSubArch_fpga) {
auto *SYCLDeviceLibsInputAction =
C.MakeAction<InputAction>(*InputArg, types::TY_Object);
auto *SYCLDeviceLibsUnbundleAction =
Expand Down
33 changes: 25 additions & 8 deletions clang/lib/Driver/ToolChains/SYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,10 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple,
SmallVector<std::string, 8> LibraryList;
const llvm::opt::ArgList &Args = C.getArgs();

// For NVPTX we only use one single bitcode library and ignore
// manually specified SYCL device libraries.
bool IgnoreSingleLibs = TargetTriple.isNVPTX();

struct DeviceLibOptInfo {
StringRef DeviceLibName;
StringRef DeviceLibOption;
Expand All @@ -233,10 +237,13 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple,
if (A->getOption().matches(options::OPT_fno_sycl_device_lib_EQ))
NoDeviceLibs = true;

bool PrintUnusedLibWarning = false;
for (StringRef Val : A->getValues()) {
if (Val == "all") {
for (const auto &K : DeviceLibLinkInfo.keys())
DeviceLibLinkInfo[K] = true && (!NoDeviceLibs || K == "internal");
DeviceLibLinkInfo[K] = (!IgnoreSingleLibs && !NoDeviceLibs) ||
(K == "internal" && NoDeviceLibs);
PrintUnusedLibWarning = false;
break;
}
auto LinkInfoIter = DeviceLibLinkInfo.find(Val);
Expand All @@ -247,10 +254,21 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple,
C.getDriver().Diag(diag::err_drv_unsupported_option_argument)
<< A->getSpelling() << Val;
}
DeviceLibLinkInfo[Val] = true && !NoDeviceLibs;
DeviceLibLinkInfo[Val] = !NoDeviceLibs && !IgnoreSingleLibs;
PrintUnusedLibWarning = IgnoreSingleLibs && !NoDeviceLibs;
}
if (PrintUnusedLibWarning)
C.getDriver().Diag(diag::warn_ignored_clang_option)
<< A->getSpelling() << A->getAsString(Args);
}
}

if (TargetTriple.isNVPTX() && !NoDeviceLibs)
LibraryList.push_back(Args.MakeArgString("devicelib--cuda.bc"));

if (IgnoreSingleLibs && !NoDeviceLibs)
return LibraryList;

using SYCLDeviceLibsList = SmallVector<DeviceLibOptInfo, 5>;

const SYCLDeviceLibsList SYCLDeviceWrapperLibs = {
Expand Down Expand Up @@ -304,10 +322,9 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple,
C.getDefaultToolChain().getTriple().isWindowsMSVCEnvironment();
bool IsNewOffload = C.getDriver().getUseNewOffloadingDriver();
StringRef LibSuffix = ".bc";
if (TargetTriple.isNVPTX() ||
(TargetTriple.isSPIR() &&
TargetTriple.getSubArch() == llvm::Triple::SPIRSubArch_fpga))
// For NVidia or FPGA, we are unbundling objects.
if (TargetTriple.isSPIR() &&
TargetTriple.getSubArch() == llvm::Triple::SPIRSubArch_fpga)
// For FPGA, we are unbundling objects.
LibSuffix = IsWindowsMSVCEnv ? ".obj" : ".o";
if (IsNewOffload)
// For new offload model, we use packaged .bc files.
Expand All @@ -323,7 +340,7 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple,
};

addLibraries(SYCLDeviceWrapperLibs);
if (IsSpirvAOT || TargetTriple.isNVPTX())
if (IsSpirvAOT)
addLibraries(SYCLDeviceFallbackLibs);

bool NativeBfloatLibs;
Expand Down Expand Up @@ -551,7 +568,7 @@ const char *SYCL::Linker::constructLLVMLinkCommand(
this->getToolChain().getTriple().getSubArch() ==
llvm::Triple::SPIRSubArch_fpga;
StringRef LibPostfix = ".bc";
if (IsNVPTX || IsFPGA) {
if (IsFPGA) {
LibPostfix = ".o";
if (HostTC->getTriple().isWindowsMSVCEnvironment() &&
C.getDriver().IsCLMode())
Expand Down
2 changes: 1 addition & 1 deletion clang/test/CodeGenSYCL/sycl-libdevice-cmath.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
// intrinsics. This allows the driver to link in the libdevice definitions for
// cosf etc. later in the driver flow.

// RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-cuda -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-cuda -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-cuda -ffast-math -emit-llvm -o - | FileCheck %s

#include "Inputs/sycl.hpp"
Expand Down
76 changes: 32 additions & 44 deletions clang/test/Driver/sycl-offload-nvptx.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,28 +53,22 @@
// CHK-PHASES-NO-CC: 7: backend, {6}, assembler, (host-sycl)
// CHK-PHASES-NO-CC: 8: assembler, {7}, object, (host-sycl)
// CHK-PHASES-NO-CC: 9: linker, {4}, ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 10: input, "{{.*}}libsycl-itt-user-wrappers.o{{.*}}", object
// CHK-PHASES-NO-CC: 11: clang-offload-unbundler, {10}, object
// CHK-PHASES-NO-CC: 12: offload, " (nvptx64-nvidia-cuda)" {11}, object
// CHK-PHASES-NO-CC: 13: input, "{{.*}}libsycl-itt-compiler-wrappers.o{{.*}}", object
// CHK-PHASES-NO-CC: 14: clang-offload-unbundler, {13}, object
// CHK-PHASES-NO-CC: 15: offload, " (nvptx64-nvidia-cuda)" {14}, object
// CHK-PHASES-NO-CC: 16: input, "{{.*}}libsycl-itt-stubs.o{{.*}}", object
// CHK-PHASES-NO-CC: 17: clang-offload-unbundler, {16}, object
// CHK-PHASES-NO-CC: 18: offload, " (nvptx64-nvidia-cuda)" {17}, object
// CHK-PHASES-NO-CC: 19: input, "{{.*}}nvidiacl{{.*}}", ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 20: input, "{{.*}}libdevice{{.*}}", ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 21: linker, {9, 12, 15, 18, 19, 20}, ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 22: sycl-post-link, {21}, ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 23: file-table-tform, {22}, ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 24: backend, {23}, assembler, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 25: assembler, {24}, object, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 26: linker, {24, 25}, cuda-fatbin, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 27: foreach, {23, 26}, cuda-fatbin, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 28: file-table-tform, {22, 27}, tempfiletable, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 29: clang-offload-wrapper, {28}, object, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 30: offload, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {29}, object
// CHK-PHASES-NO-CC: 31: linker, {8, 30}, image, (host-sycl)
// CHK-PHASES-NO-CC: 10: input, "{{.*}}libsycl-itt-user-wrappers.bc", ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 11: input, "{{.*}}libsycl-itt-compiler-wrappers.bc", ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 12: input, "{{.*}}libsycl-itt-stubs.bc", ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 13: input, "{{.*}}nvidiacl{{.*}}", ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 14: input, "{{.*}}libdevice{{.*}}", ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 15: linker, {9, 10, 11, 12, 13, 14}, ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 16: sycl-post-link, {15}, ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 17: file-table-tform, {16}, ir, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 18: backend, {17}, assembler, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 19: assembler, {18}, object, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 20: linker, {18, 19}, cuda-fatbin, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 21: foreach, {17, 20}, cuda-fatbin, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 22: file-table-tform, {16, 21}, tempfiletable, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 23: clang-offload-wrapper, {22}, object, (device-sycl, sm_50)
// CHK-PHASES-NO-CC: 24: offload, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {23}, object
// CHK-PHASES-NO-CC: 25: linker, {8, 24}, image, (host-sycl)
//
/// Check phases specifying a compute capability.
// RUN: %clangxx -ccc-print-phases --sysroot=%S/Inputs/SYCL -std=c++11 \
Expand All @@ -97,28 +91,22 @@
// CHK-PHASES: 7: backend, {6}, assembler, (host-sycl)
// CHK-PHASES: 8: assembler, {7}, object, (host-sycl)
// CHK-PHASES: 9: linker, {4}, ir, (device-sycl, sm_35)
// CHK-PHASES: 10: input, "{{.*}}libsycl-itt-user-wrappers.o", object
// CHK-PHASES: 11: clang-offload-unbundler, {10}, object
// CHK-PHASES: 12: offload, " (nvptx64-nvidia-cuda)" {11}, object
// CHK-PHASES: 13: input, "{{.*}}libsycl-itt-compiler-wrappers.o", object
// CHK-PHASES: 14: clang-offload-unbundler, {13}, object
// CHK-PHASES: 15: offload, " (nvptx64-nvidia-cuda)" {14}, object
// CHK-PHASES: 16: input, "{{.*}}libsycl-itt-stubs.o", object
// CHK-PHASES: 17: clang-offload-unbundler, {16}, object
// CHK-PHASES: 18: offload, " (nvptx64-nvidia-cuda)" {17}, object
// CHK-PHASES: 19: input, "{{.*}}nvidiacl{{.*}}", ir, (device-sycl, sm_35)
// CHK-PHASES: 20: input, "{{.*}}libdevice{{.*}}", ir, (device-sycl, sm_35)
// CHK-PHASES: 21: linker, {9, 12, 15, 18, 19, 20}, ir, (device-sycl, sm_35)
// CHK-PHASES: 22: sycl-post-link, {21}, ir, (device-sycl, sm_35)
// CHK-PHASES: 23: file-table-tform, {22}, ir, (device-sycl, sm_35)
// CHK-PHASES: 24: backend, {23}, assembler, (device-sycl, sm_35)
// CHK-PHASES: 25: assembler, {24}, object, (device-sycl, sm_35)
// CHK-PHASES: 26: linker, {24, 25}, cuda-fatbin, (device-sycl, sm_35)
// CHK-PHASES: 27: foreach, {23, 26}, cuda-fatbin, (device-sycl, sm_35)
// CHK-PHASES: 28: file-table-tform, {22, 27}, tempfiletable, (device-sycl, sm_35)
// CHK-PHASES: 29: clang-offload-wrapper, {28}, object, (device-sycl, sm_35)
// CHK-PHASES: 30: offload, "device-sycl (nvptx64-nvidia-cuda:sm_35)" {29}, object
// CHK-PHASES: 31: linker, {8, 30}, image, (host-sycl)
// CHK-PHASES: 10: input, "{{.*}}libsycl-itt-user-wrappers.bc", ir, (device-sycl, sm_35)
// CHK-PHASES: 11: input, "{{.*}}libsycl-itt-compiler-wrappers.bc", ir, (device-sycl, sm_35)
// CHK-PHASES: 12: input, "{{.*}}libsycl-itt-stubs.bc", ir, (device-sycl, sm_35)
// CHK-PHASES: 13: input, "{{.*}}nvidiacl{{.*}}", ir, (device-sycl, sm_35)
// CHK-PHASES: 14: input, "{{.*}}libdevice{{.*}}", ir, (device-sycl, sm_35)
// CHK-PHASES: 15: linker, {9, 10, 11, 12, 13, 14}, ir, (device-sycl, sm_35)
// CHK-PHASES: 16: sycl-post-link, {15}, ir, (device-sycl, sm_35)
// CHK-PHASES: 17: file-table-tform, {16}, ir, (device-sycl, sm_35)
// CHK-PHASES: 18: backend, {17}, assembler, (device-sycl, sm_35)
// CHK-PHASES: 19: assembler, {18}, object, (device-sycl, sm_35)
// CHK-PHASES: 20: linker, {18, 19}, cuda-fatbin, (device-sycl, sm_35)
// CHK-PHASES: 21: foreach, {17, 20}, cuda-fatbin, (device-sycl, sm_35)
// CHK-PHASES: 22: file-table-tform, {16, 21}, tempfiletable, (device-sycl, sm_35)
// CHK-PHASES: 23: clang-offload-wrapper, {22}, object, (device-sycl, sm_35)
// CHK-PHASES: 24: offload, "device-sycl (nvptx64-nvidia-cuda:sm_35)" {23}, object
// CHK-PHASES: 25: linker, {8, 24}, image, (host-sycl)

/// Check calling preprocessor only
// RUN: %clangxx -E -fsycl -fsycl-targets=nvptx64-nvidia-cuda -ccc-print-phases %s 2>&1 \
Expand Down
2 changes: 1 addition & 1 deletion libclc/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,7 @@ if( ENABLE_RUNTIME_SUBNORMAL )
foreach( file subnormal_use_default subnormal_disable )
link_bc(
TARGET ${file}
RSP_DIR ${LIBCLC_ARCH_OBJFILE_DIR}
INPUTS ${CMAKE_CURRENT_SOURCE_DIR}/generic/lib/${file}.ll
)
install( FILES $<TARGET_PROPERTY:${file},TARGET_FILE> ARCHIVE
Expand Down Expand Up @@ -405,7 +406,6 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} )
# Enable SPIR-V builtin function declarations, so they don't
# have to be explicity declared in the soruce.
list( APPEND flags -Xclang -fdeclare-spirv-builtins)

set( LIBCLC_ARCH_OBJFILE_DIR "${LIBCLC_OBJFILE_DIR}/${arch_suffix}" )
file( MAKE_DIRECTORY ${LIBCLC_ARCH_OBJFILE_DIR} )

Expand Down
91 changes: 61 additions & 30 deletions libclc/cmake/modules/AddLibclc.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -86,12 +86,15 @@ endfunction()
# Custom target to create
# * INPUT <string> ...
# List of bytecode files to link together
# * RSP_DIR <string>
# Directory where a response file should be placed
# (Only needed for WIN32 or CYGWIN)
# * DEPENDENCIES <string> ...
# List of extra dependencies to inject
function(link_bc)
cmake_parse_arguments(ARG
""
"TARGET"
"TARGET;RSP_DIR"
"INPUTS;DEPENDENCIES"
${ARGN}
)
Expand All @@ -100,7 +103,7 @@ function(link_bc)
if( WIN32 OR CYGWIN )
# Create a response file in case the number of inputs exceeds command-line
# character limits on certain platforms.
file( TO_CMAKE_PATH ${LIBCLC_ARCH_OBJFILE_DIR}/${ARG_TARGET}.rsp RSP_FILE )
file( TO_CMAKE_PATH ${ARG_RSP_DIR}/${ARG_TARGET}.rsp RSP_FILE )
# Turn it into a space-separate list of input files
list( JOIN ARG_INPUTS " " RSP_INPUT )
file( WRITE ${RSP_FILE} ${RSP_INPUT} )
Expand Down Expand Up @@ -216,6 +219,50 @@ function(add_libclc_alias alias target)

endfunction(add_libclc_alias alias target)

# Runs opt and prepare-builtins on a bitcode file specified by lib_tgt
#
# ARGUMENTS:
# * LIB_TGT string
# Target name that becomes dependent on the out file named LIB_TGT.bc
# * IN_FILE string
# Target name of the input bytecode file
# * OUT_DIR string
# Name of the directory where the output should be placed
# * DEPENDENCIES <string> ...
# List of extra dependencies to inject
function(process_bc out_file)
cmake_parse_arguments(ARG
""
"LIB_TGT;IN_FILE;OUT_DIR"
"OPT_FLAGS;DEPENDENCIES"
${ARGN})
add_custom_command( OUTPUT ${ARG_LIB_TGT}.bc
COMMAND ${opt_exe} ${ARG_OPT_FLAGS} -o ${ARG_LIB_TGT}.bc
${ARG_IN_FILE}
DEPENDS ${opt_target} ${ARG_IN_FILE} ${ARG_DEPENDENCIES}
)
add_custom_target( ${ARG_LIB_TGT}
ALL DEPENDS ${ARG_LIB_TGT}.bc
)
set_target_properties( ${ARG_LIB_TGT}
PROPERTIES TARGET_FILE ${ARG_LIB_TGT}.bc
)

set( builtins_opt_lib $<TARGET_PROPERTY:${ARG_LIB_TGT},TARGET_FILE> )

# Add prepare target
add_custom_command( OUTPUT ${ARG_OUT_DIR}/${out_file}
COMMAND ${prepare_builtins_exe} -o ${ARG_OUT_DIR}/${out_file}
${builtins_opt_lib}
DEPENDS ${builtins_opt_lib} ${ARG_LIB_TGT} ${prepare_builtins_target} )
add_custom_target( prepare-${out_file} ALL
DEPENDS ${ARG_OUT_DIR}/${out_file}
)
set_target_properties( prepare-${out_file}
PROPERTIES TARGET_FILE ${ARG_OUT_DIR}/${out_file}
)
endfunction()

# add_libclc_builtin_set(arch_suffix
# TRIPLE string
# Triple used to compile
Expand Down Expand Up @@ -291,44 +338,28 @@ macro(add_libclc_builtin_set arch_suffix)
link_bc(
TARGET ${builtins_link_lib_tgt}
INPUTS ${bytecode_files}
RSP_DIR ${LIBCLC_ARCH_OBJFILE_DIR}
DEPENDENCIES ${builtins_comp_lib_tgt}
)

set( builtins_link_lib $<TARGET_PROPERTY:${builtins_link_lib_tgt},TARGET_FILE> )

set( builtins_opt_lib_tgt builtins.opt.${arch_suffix} )
add_custom_command( OUTPUT ${LIBCLC_LIBRARY_OUTPUT_INTDIR}
COMMAND ${CMAKE_COMMAND} -E make_directory ${LIBCLC_LIBRARY_OUTPUT_INTDIR}
DEPENDS ${builtins_link_lib} prepare_builtins )

# Add opt target
add_custom_command( OUTPUT ${builtins_opt_lib_tgt}.bc
COMMAND ${opt_exe} ${ARG_OPT_FLAGS} -o ${builtins_opt_lib_tgt}.bc
${builtins_link_lib}
DEPENDS ${opt_target} ${builtins_link_lib} ${builtins_link_lib_tgt}
)
add_custom_target( ${builtins_opt_lib_tgt}
ALL DEPENDS ${builtins_opt_lib_tgt}.bc
)
set_target_properties( ${builtins_opt_lib_tgt}
PROPERTIES TARGET_FILE ${builtins_opt_lib_tgt}.bc
)
set( builtins_opt_lib_tgt builtins.opt.${arch_suffix} )

set( builtins_opt_lib $<TARGET_PROPERTY:${builtins_opt_lib_tgt},TARGET_FILE> )

# Add prepare target
set( obj_suffix ${arch_suffix}.bc )
add_custom_command( OUTPUT ${LIBCLC_LIBRARY_OUTPUT_INTDIR}/${obj_suffix}
COMMAND ${CMAKE_COMMAND} -E make_directory ${LIBCLC_LIBRARY_OUTPUT_INTDIR}
COMMAND ${prepare_builtins_exe} -o ${LIBCLC_LIBRARY_OUTPUT_INTDIR}/${obj_suffix}
${builtins_opt_lib}
DEPENDS ${builtins_opt_lib} ${builtins_opt_lib_tgt} ${prepare_builtins_target} )
add_custom_target( prepare-${obj_suffix} ALL
DEPENDS ${LIBCLC_LIBRARY_OUTPUT_INTDIR}/${obj_suffix}
)
set_target_properties( prepare-${obj_suffix}
PROPERTIES TARGET_FILE ${LIBCLC_LIBRARY_OUTPUT_INTDIR}/${obj_suffix}
)
process_bc(${arch_suffix}.bc
LIB_TGT ${builtins_opt_lib_tgt}
IN_FILE ${builtins_link_lib}
OUT_DIR ${LIBCLC_LIBRARY_OUTPUT_INTDIR}
OPT_FLAGS ${ARG_OPT_FLAGS}
DEPENDENCIES ${builtins_link_lib_tgt})

# Add dependency to top-level pseudo target to ease making other
# targets dependent on libclc.
set( obj_suffix ${arch_suffix}.bc )
add_dependencies(${ARG_PARENT_TARGET} prepare-${obj_suffix})
set( builtins_lib $<TARGET_PROPERTY:prepare-${obj_suffix},TARGET_FILE> )

Expand Down
2 changes: 2 additions & 0 deletions libdevice/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
# Utility project providing various functionalities for SPIR-V devices
# without native support of these functionalities.

include(${CMAKE_CURRENT_SOURCE_DIR}/../libclc/cmake/modules/AddLibclc.cmake)

set(CMAKE_MODULE_PATH
${CMAKE_MODULE_PATH}
"${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules"
Expand Down
Loading