diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index f8f2ff94e2a2c..7c31a980d4a5b 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -5848,10 +5848,9 @@ class OffloadingActionBuilder final { ++NumOfDeviceLibLinked; Arg *InputArg = MakeInputArg(Args, C.getDriver().getOpts(), Args.MakeArgString(LibName)); - if (TC->getTriple().isNVPTX() || - (TC->getTriple().isSPIR() && - TC->getTriple().getSubArch() == - llvm::Triple::SPIRSubArch_fpga)) { + if (TC->getTriple().isSPIR() && + TC->getTriple().getSubArch() == + llvm::Triple::SPIRSubArch_fpga) { auto *SYCLDeviceLibsInputAction = C.MakeAction(*InputArg, types::TY_Object); auto *SYCLDeviceLibsUnbundleAction = diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index 37589f00c4140..6e9d7fee5b961 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -212,18 +212,26 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, SmallVector LibraryList; const llvm::opt::ArgList &Args = C.getArgs(); + // For NVPTX we only use one single bitcode library and ignore + // manually specified SYCL device libraries. + bool IgnoreSingleLibs = TargetTriple.isNVPTX(); + struct DeviceLibOptInfo { StringRef DeviceLibName; StringRef DeviceLibOption; }; - bool NoDeviceLibs = false; - // Currently, all SYCL device libraries will be linked by default. Linkage - // of "internal" libraries cannot be affected via -fno-sycl-device-lib. + // Currently, all SYCL device libraries will be linked by default. llvm::StringMap DeviceLibLinkInfo = { {"libc", true}, {"libm-fp32", true}, {"libm-fp64", true}, {"libimf-fp32", true}, {"libimf-fp64", true}, {"libimf-bf16", true}, {"libm-bfloat16", true}, {"internal", true}}; + + // If -fno-sycl-device-lib is specified, its values will be used to exclude + // linkage of libraries specified by DeviceLibLinkInfo. Linkage of "internal" + // libraries cannot be affected via -fno-sycl-device-lib. + bool ExcludeDeviceLibs = false; + if (Arg *A = Args.getLastArg(options::OPT_fsycl_device_lib_EQ, options::OPT_fno_sycl_device_lib_EQ)) { if (A->getValues().size() == 0) @@ -231,12 +239,24 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, << A->getAsString(Args); else { if (A->getOption().matches(options::OPT_fno_sycl_device_lib_EQ)) - NoDeviceLibs = true; + ExcludeDeviceLibs = true; + + // When single libraries are ignored and a subset of library names + // not containing the value "all" is specified by -fno-sycl-device-lib, + // print an unused argument warning. + bool PrintUnusedExcludeWarning = false; for (StringRef Val : A->getValues()) { if (Val == "all") { + PrintUnusedExcludeWarning = false; + + // Make sure that internal libraries are still linked against + // when -fno-sycl-device-lib contains "all" and single libraries + // should be ignored. + IgnoreSingleLibs = IgnoreSingleLibs && !ExcludeDeviceLibs; + for (const auto &K : DeviceLibLinkInfo.keys()) - DeviceLibLinkInfo[K] = true && (!NoDeviceLibs || K == "internal"); + DeviceLibLinkInfo[K] = (K == "internal") || !ExcludeDeviceLibs; break; } auto LinkInfoIter = DeviceLibLinkInfo.find(Val); @@ -247,10 +267,20 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, C.getDriver().Diag(diag::err_drv_unsupported_option_argument) << A->getSpelling() << Val; } - DeviceLibLinkInfo[Val] = true && !NoDeviceLibs; + DeviceLibLinkInfo[Val] = !ExcludeDeviceLibs; + PrintUnusedExcludeWarning = IgnoreSingleLibs && ExcludeDeviceLibs; } + if (PrintUnusedExcludeWarning) + C.getDriver().Diag(diag::warn_drv_unused_argument) << A->getSpelling(); } } + + if (TargetTriple.isNVPTX() && IgnoreSingleLibs) + LibraryList.push_back(Args.MakeArgString("devicelib--cuda.bc")); + + if (IgnoreSingleLibs) + return LibraryList; + using SYCLDeviceLibsList = SmallVector; const SYCLDeviceLibsList SYCLDeviceWrapperLibs = { @@ -304,10 +334,9 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, C.getDefaultToolChain().getTriple().isWindowsMSVCEnvironment(); bool IsNewOffload = C.getDriver().getUseNewOffloadingDriver(); StringRef LibSuffix = ".bc"; - if (TargetTriple.isNVPTX() || - (TargetTriple.isSPIR() && - TargetTriple.getSubArch() == llvm::Triple::SPIRSubArch_fpga)) - // For NVidia or FPGA, we are unbundling objects. + if (TargetTriple.isSPIR() && + TargetTriple.getSubArch() == llvm::Triple::SPIRSubArch_fpga) + // For FPGA, we are unbundling objects. LibSuffix = IsWindowsMSVCEnv ? ".obj" : ".o"; if (IsNewOffload) // For new offload model, we use packaged .bc files. @@ -323,7 +352,7 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, }; addLibraries(SYCLDeviceWrapperLibs); - if (IsSpirvAOT || TargetTriple.isNVPTX()) + if (IsSpirvAOT) addLibraries(SYCLDeviceFallbackLibs); bool NativeBfloatLibs; @@ -551,7 +580,7 @@ const char *SYCL::Linker::constructLLVMLinkCommand( this->getToolChain().getTriple().getSubArch() == llvm::Triple::SPIRSubArch_fpga; StringRef LibPostfix = ".bc"; - if (IsNVPTX || IsFPGA) { + if (IsFPGA) { LibPostfix = ".o"; if (HostTC->getTriple().isWindowsMSVCEnvironment() && C.getDriver().IsCLMode()) diff --git a/clang/test/CodeGenSYCL/sycl-libdevice-cmath.cpp b/clang/test/CodeGenSYCL/sycl-libdevice-cmath.cpp index af94dada263d1..2a2043ac5dc55 100644 --- a/clang/test/CodeGenSYCL/sycl-libdevice-cmath.cpp +++ b/clang/test/CodeGenSYCL/sycl-libdevice-cmath.cpp @@ -5,7 +5,7 @@ // intrinsics. This allows the driver to link in the libdevice definitions for // cosf etc. later in the driver flow. -// RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-cuda -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-cuda -emit-llvm -o - | FileCheck %s // RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-cuda -ffast-math -emit-llvm -o - | FileCheck %s #include "Inputs/sycl.hpp" diff --git a/clang/test/Driver/sycl-device-lib-nvptx.cpp b/clang/test/Driver/sycl-device-lib-nvptx.cpp new file mode 100644 index 0000000000000..2525db0b8c44f --- /dev/null +++ b/clang/test/Driver/sycl-device-lib-nvptx.cpp @@ -0,0 +1,44 @@ +// Tests specific to `-fsycl-targets=nvptx64-nvidia-nvptx` +// Verify that the correct devicelib linking actions are spawned by the driver. +// Check also if the correct warnings are generated. + +// UNSUPPORTED: system-windows + +// Check if internal libraries are still linked against when linkage of all +// device libs is manually excluded. +// RUN: %clangxx -ccc-print-phases -std=c++11 -fsycl -fno-sycl-device-lib=all \ +// RUN: -fsycl-targets=nvptx64-nvidia-cuda %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-NO-DEVLIB %s + +// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib--cuda.bc", ir, (device-sycl, sm_50) +// CHK-NO-DEVLIB: [[LIB1:[0-9]+]]: input, "{{.*}}libsycl-itt-user-wrappers.bc", ir, (device-sycl, sm_50) +// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib--cuda.bc", ir, (device-sycl, sm_50) +// CHK-NO-DEVLIB: [[LIB2:[0-9]+]]: input, "{{.*}}libsycl-itt-compiler-wrappers.bc", ir, (device-sycl, sm_50) +// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib--cuda.bc", ir, (device-sycl, sm_50) +// CHK-NO-DEVLIB: [[LIB3:[0-9]+]]: input, "{{.*}}libsycl-itt-stubs.bc", ir, (device-sycl, sm_50) +// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib--cuda.bc", ir, (device-sycl, sm_50) +// CHK-NO-DEVLIB: {{[0-9]+}}: linker, {{{.*}}[[LIB1]], [[LIB2]], [[LIB3]]{{.*}}}, ir, (device-sycl, sm_50) + +// Check that the -fsycl-device-lib flag has no effect when "all" is specified. +// RUN: %clangxx -ccc-print-phases -std=c++11 -fsycl -fsycl-device-lib=all \ +// RUN: -fsycl-targets=nvptx64-nvidia-cuda %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-ALL %s + +// Check that the -fsycl-device-lib flag has no effect when subsets of libs +// are specified. +// RUN: %clangxx -ccc-print-phases -std=c++11 \ +// RUN: -fsycl -fsycl-device-lib=libc,libm-fp32,libm-fp64,libimf-fp32,libimf-fp64,libimf-bf16,libm-bfloat16 \ +// RUN: -fsycl-targets=nvptx64-nvidia-cuda %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-ALL %s + +// Check that -fno-sycl-device-lib is ignored when it does not contain "all". +// A warning should be printed that the flag got ignored. +// RUN: %clangxx -ccc-print-phases -std=c++11 -fsycl \ +// RUN: -fno-sycl-device-lib=libc,libm-fp32,libm-fp64,libimf-fp32,libimf-fp64,libimf-bf16,libm-bfloat16 \ +// RUN: -fsycl-targets=nvptx64-nvidia-cuda %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=CHK-UNUSED-WARN,CHK-ALL %s + +// CHK-UNUSED-WARN: warning: argument unused during compilation: '-fno-sycl-device-lib=' +// CHK-ALL: [[DEVLIB:[0-9]+]]: input, "{{.*}}devicelib--cuda.bc", ir, (device-sycl, sm_50) +// CHK-ALL: {{[0-9]+}}: linker, {{{.*}}[[DEVLIB]]{{.*}}}, ir, (device-sycl, sm_50) + diff --git a/clang/test/Driver/sycl-offload-nvptx.cpp b/clang/test/Driver/sycl-offload-nvptx.cpp index db5e024363b02..324c5aa9cdd0e 100644 --- a/clang/test/Driver/sycl-offload-nvptx.cpp +++ b/clang/test/Driver/sycl-offload-nvptx.cpp @@ -53,28 +53,22 @@ // CHK-PHASES-NO-CC: 7: backend, {6}, assembler, (host-sycl) // CHK-PHASES-NO-CC: 8: assembler, {7}, object, (host-sycl) // CHK-PHASES-NO-CC: 9: linker, {4}, ir, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 10: input, "{{.*}}libsycl-itt-user-wrappers.o{{.*}}", object -// CHK-PHASES-NO-CC: 11: clang-offload-unbundler, {10}, object -// CHK-PHASES-NO-CC: 12: offload, " (nvptx64-nvidia-cuda)" {11}, object -// CHK-PHASES-NO-CC: 13: input, "{{.*}}libsycl-itt-compiler-wrappers.o{{.*}}", object -// CHK-PHASES-NO-CC: 14: clang-offload-unbundler, {13}, object -// CHK-PHASES-NO-CC: 15: offload, " (nvptx64-nvidia-cuda)" {14}, object -// CHK-PHASES-NO-CC: 16: input, "{{.*}}libsycl-itt-stubs.o{{.*}}", object -// CHK-PHASES-NO-CC: 17: clang-offload-unbundler, {16}, object -// CHK-PHASES-NO-CC: 18: offload, " (nvptx64-nvidia-cuda)" {17}, object -// CHK-PHASES-NO-CC: 19: input, "{{.*}}nvidiacl{{.*}}", ir, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 20: input, "{{.*}}libdevice{{.*}}", ir, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 21: linker, {9, 12, 15, 18, 19, 20}, ir, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 22: sycl-post-link, {21}, ir, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 23: file-table-tform, {22}, ir, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 24: backend, {23}, assembler, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 25: assembler, {24}, object, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 26: linker, {24, 25}, cuda-fatbin, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 27: foreach, {23, 26}, cuda-fatbin, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 28: file-table-tform, {22, 27}, tempfiletable, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 29: clang-offload-wrapper, {28}, object, (device-sycl, sm_50) -// CHK-PHASES-NO-CC: 30: offload, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {29}, object -// CHK-PHASES-NO-CC: 31: linker, {8, 30}, image, (host-sycl) +// CHK-PHASES-NO-CC: 10: input, "{{.*}}libsycl-itt-user-wrappers.bc", ir, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 11: input, "{{.*}}libsycl-itt-compiler-wrappers.bc", ir, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 12: input, "{{.*}}libsycl-itt-stubs.bc", ir, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 13: input, "{{.*}}nvidiacl{{.*}}", ir, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 14: input, "{{.*}}libdevice{{.*}}", ir, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 15: linker, {9, 10, 11, 12, 13, 14}, ir, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 16: sycl-post-link, {15}, ir, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 17: file-table-tform, {16}, ir, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 18: backend, {17}, assembler, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 19: assembler, {18}, object, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 20: linker, {18, 19}, cuda-fatbin, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 21: foreach, {17, 20}, cuda-fatbin, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 22: file-table-tform, {16, 21}, tempfiletable, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 23: clang-offload-wrapper, {22}, object, (device-sycl, sm_50) +// CHK-PHASES-NO-CC: 24: offload, "device-sycl (nvptx64-nvidia-cuda:sm_50)" {23}, object +// CHK-PHASES-NO-CC: 25: linker, {8, 24}, image, (host-sycl) // /// Check phases specifying a compute capability. // RUN: %clangxx -ccc-print-phases --sysroot=%S/Inputs/SYCL -std=c++11 \ @@ -97,28 +91,22 @@ // CHK-PHASES: 7: backend, {6}, assembler, (host-sycl) // CHK-PHASES: 8: assembler, {7}, object, (host-sycl) // CHK-PHASES: 9: linker, {4}, ir, (device-sycl, sm_35) -// CHK-PHASES: 10: input, "{{.*}}libsycl-itt-user-wrappers.o", object -// CHK-PHASES: 11: clang-offload-unbundler, {10}, object -// CHK-PHASES: 12: offload, " (nvptx64-nvidia-cuda)" {11}, object -// CHK-PHASES: 13: input, "{{.*}}libsycl-itt-compiler-wrappers.o", object -// CHK-PHASES: 14: clang-offload-unbundler, {13}, object -// CHK-PHASES: 15: offload, " (nvptx64-nvidia-cuda)" {14}, object -// CHK-PHASES: 16: input, "{{.*}}libsycl-itt-stubs.o", object -// CHK-PHASES: 17: clang-offload-unbundler, {16}, object -// CHK-PHASES: 18: offload, " (nvptx64-nvidia-cuda)" {17}, object -// CHK-PHASES: 19: input, "{{.*}}nvidiacl{{.*}}", ir, (device-sycl, sm_35) -// CHK-PHASES: 20: input, "{{.*}}libdevice{{.*}}", ir, (device-sycl, sm_35) -// CHK-PHASES: 21: linker, {9, 12, 15, 18, 19, 20}, ir, (device-sycl, sm_35) - // CHK-PHASES: 22: sycl-post-link, {21}, ir, (device-sycl, sm_35) -// CHK-PHASES: 23: file-table-tform, {22}, ir, (device-sycl, sm_35) -// CHK-PHASES: 24: backend, {23}, assembler, (device-sycl, sm_35) -// CHK-PHASES: 25: assembler, {24}, object, (device-sycl, sm_35) -// CHK-PHASES: 26: linker, {24, 25}, cuda-fatbin, (device-sycl, sm_35) -// CHK-PHASES: 27: foreach, {23, 26}, cuda-fatbin, (device-sycl, sm_35) -// CHK-PHASES: 28: file-table-tform, {22, 27}, tempfiletable, (device-sycl, sm_35) -// CHK-PHASES: 29: clang-offload-wrapper, {28}, object, (device-sycl, sm_35) -// CHK-PHASES: 30: offload, "device-sycl (nvptx64-nvidia-cuda:sm_35)" {29}, object -// CHK-PHASES: 31: linker, {8, 30}, image, (host-sycl) +// CHK-PHASES: 10: input, "{{.*}}libsycl-itt-user-wrappers.bc", ir, (device-sycl, sm_35) +// CHK-PHASES: 11: input, "{{.*}}libsycl-itt-compiler-wrappers.bc", ir, (device-sycl, sm_35) +// CHK-PHASES: 12: input, "{{.*}}libsycl-itt-stubs.bc", ir, (device-sycl, sm_35) +// CHK-PHASES: 13: input, "{{.*}}nvidiacl{{.*}}", ir, (device-sycl, sm_35) +// CHK-PHASES: 14: input, "{{.*}}libdevice{{.*}}", ir, (device-sycl, sm_35) +// CHK-PHASES: 15: linker, {9, 10, 11, 12, 13, 14}, ir, (device-sycl, sm_35) +// CHK-PHASES: 16: sycl-post-link, {15}, ir, (device-sycl, sm_35) +// CHK-PHASES: 17: file-table-tform, {16}, ir, (device-sycl, sm_35) +// CHK-PHASES: 18: backend, {17}, assembler, (device-sycl, sm_35) +// CHK-PHASES: 19: assembler, {18}, object, (device-sycl, sm_35) +// CHK-PHASES: 20: linker, {18, 19}, cuda-fatbin, (device-sycl, sm_35) +// CHK-PHASES: 21: foreach, {17, 20}, cuda-fatbin, (device-sycl, sm_35) +// CHK-PHASES: 22: file-table-tform, {16, 21}, tempfiletable, (device-sycl, sm_35) +// CHK-PHASES: 23: clang-offload-wrapper, {22}, object, (device-sycl, sm_35) +// CHK-PHASES: 24: offload, "device-sycl (nvptx64-nvidia-cuda:sm_35)" {23}, object +// CHK-PHASES: 25: linker, {8, 24}, image, (host-sycl) /// Check calling preprocessor only // RUN: %clangxx -E -fsycl -fsycl-targets=nvptx64-nvidia-cuda -ccc-print-phases %s 2>&1 \ diff --git a/libclc/CMakeLists.txt b/libclc/CMakeLists.txt index f05492d777977..dff0f66ba25b9 100644 --- a/libclc/CMakeLists.txt +++ b/libclc/CMakeLists.txt @@ -233,6 +233,7 @@ if( ENABLE_RUNTIME_SUBNORMAL ) foreach( file subnormal_use_default subnormal_disable ) link_bc( TARGET ${file} + RSP_DIR ${LIBCLC_ARCH_OBJFILE_DIR} INPUTS ${CMAKE_CURRENT_SOURCE_DIR}/generic/lib/${file}.ll ) install( FILES $ ARCHIVE @@ -405,7 +406,6 @@ foreach( t ${LIBCLC_TARGETS_TO_BUILD} ) # Enable SPIR-V builtin function declarations, so they don't # have to be explicity declared in the soruce. list( APPEND flags -Xclang -fdeclare-spirv-builtins) - set( LIBCLC_ARCH_OBJFILE_DIR "${LIBCLC_OBJFILE_DIR}/${arch_suffix}" ) file( MAKE_DIRECTORY ${LIBCLC_ARCH_OBJFILE_DIR} ) diff --git a/libclc/cmake/modules/AddLibclc.cmake b/libclc/cmake/modules/AddLibclc.cmake index 2a843dd67fb8c..4711b9eb3e3b8 100644 --- a/libclc/cmake/modules/AddLibclc.cmake +++ b/libclc/cmake/modules/AddLibclc.cmake @@ -86,12 +86,15 @@ endfunction() # Custom target to create # * INPUT ... # List of bytecode files to link together +# * RSP_DIR +# Directory where a response file should be placed +# (Only needed for WIN32 or CYGWIN) # * DEPENDENCIES ... # List of extra dependencies to inject function(link_bc) cmake_parse_arguments(ARG "" - "TARGET" + "TARGET;RSP_DIR" "INPUTS;DEPENDENCIES" ${ARGN} ) @@ -100,7 +103,7 @@ function(link_bc) if( WIN32 OR CYGWIN ) # Create a response file in case the number of inputs exceeds command-line # character limits on certain platforms. - file( TO_CMAKE_PATH ${LIBCLC_ARCH_OBJFILE_DIR}/${ARG_TARGET}.rsp RSP_FILE ) + file( TO_CMAKE_PATH ${ARG_RSP_DIR}/${ARG_TARGET}.rsp RSP_FILE ) # Turn it into a space-separate list of input files list( JOIN ARG_INPUTS " " RSP_INPUT ) file( WRITE ${RSP_FILE} ${RSP_INPUT} ) @@ -216,6 +219,50 @@ function(add_libclc_alias alias target) endfunction(add_libclc_alias alias target) +# Runs opt and prepare-builtins on a bitcode file specified by lib_tgt +# +# ARGUMENTS: +# * LIB_TGT string +# Target name that becomes dependent on the out file named LIB_TGT.bc +# * IN_FILE string +# Target name of the input bytecode file +# * OUT_DIR string +# Name of the directory where the output should be placed +# * DEPENDENCIES ... +# List of extra dependencies to inject +function(process_bc out_file) + cmake_parse_arguments(ARG + "" + "LIB_TGT;IN_FILE;OUT_DIR" + "OPT_FLAGS;DEPENDENCIES" + ${ARGN}) + add_custom_command( OUTPUT ${ARG_LIB_TGT}.bc + COMMAND ${opt_exe} ${ARG_OPT_FLAGS} -o ${ARG_LIB_TGT}.bc + ${ARG_IN_FILE} + DEPENDS ${opt_target} ${ARG_IN_FILE} ${ARG_DEPENDENCIES} + ) + add_custom_target( ${ARG_LIB_TGT} + ALL DEPENDS ${ARG_LIB_TGT}.bc + ) + set_target_properties( ${ARG_LIB_TGT} + PROPERTIES TARGET_FILE ${ARG_LIB_TGT}.bc + ) + + set( builtins_opt_lib $ ) + + # Add prepare target + add_custom_command( OUTPUT ${ARG_OUT_DIR}/${out_file} + COMMAND ${prepare_builtins_exe} -o ${ARG_OUT_DIR}/${out_file} + ${builtins_opt_lib} + DEPENDS ${builtins_opt_lib} ${ARG_LIB_TGT} ${prepare_builtins_target} ) + add_custom_target( prepare-${out_file} ALL + DEPENDS ${ARG_OUT_DIR}/${out_file} + ) + set_target_properties( prepare-${out_file} + PROPERTIES TARGET_FILE ${ARG_OUT_DIR}/${out_file} + ) +endfunction() + # add_libclc_builtin_set(arch_suffix # TRIPLE string # Triple used to compile @@ -291,44 +338,28 @@ macro(add_libclc_builtin_set arch_suffix) link_bc( TARGET ${builtins_link_lib_tgt} INPUTS ${bytecode_files} + RSP_DIR ${LIBCLC_ARCH_OBJFILE_DIR} DEPENDENCIES ${builtins_comp_lib_tgt} ) set( builtins_link_lib $ ) - set( builtins_opt_lib_tgt builtins.opt.${arch_suffix} ) + add_custom_command( OUTPUT ${LIBCLC_LIBRARY_OUTPUT_INTDIR} + COMMAND ${CMAKE_COMMAND} -E make_directory ${LIBCLC_LIBRARY_OUTPUT_INTDIR} + DEPENDS ${builtins_link_lib} prepare_builtins ) - # Add opt target - add_custom_command( OUTPUT ${builtins_opt_lib_tgt}.bc - COMMAND ${opt_exe} ${ARG_OPT_FLAGS} -o ${builtins_opt_lib_tgt}.bc - ${builtins_link_lib} - DEPENDS ${opt_target} ${builtins_link_lib} ${builtins_link_lib_tgt} - ) - add_custom_target( ${builtins_opt_lib_tgt} - ALL DEPENDS ${builtins_opt_lib_tgt}.bc - ) - set_target_properties( ${builtins_opt_lib_tgt} - PROPERTIES TARGET_FILE ${builtins_opt_lib_tgt}.bc - ) + set( builtins_opt_lib_tgt builtins.opt.${arch_suffix} ) - set( builtins_opt_lib $ ) - - # Add prepare target - set( obj_suffix ${arch_suffix}.bc ) - add_custom_command( OUTPUT ${LIBCLC_LIBRARY_OUTPUT_INTDIR}/${obj_suffix} - COMMAND ${CMAKE_COMMAND} -E make_directory ${LIBCLC_LIBRARY_OUTPUT_INTDIR} - COMMAND ${prepare_builtins_exe} -o ${LIBCLC_LIBRARY_OUTPUT_INTDIR}/${obj_suffix} - ${builtins_opt_lib} - DEPENDS ${builtins_opt_lib} ${builtins_opt_lib_tgt} ${prepare_builtins_target} ) - add_custom_target( prepare-${obj_suffix} ALL - DEPENDS ${LIBCLC_LIBRARY_OUTPUT_INTDIR}/${obj_suffix} - ) - set_target_properties( prepare-${obj_suffix} - PROPERTIES TARGET_FILE ${LIBCLC_LIBRARY_OUTPUT_INTDIR}/${obj_suffix} - ) + process_bc(${arch_suffix}.bc + LIB_TGT ${builtins_opt_lib_tgt} + IN_FILE ${builtins_link_lib} + OUT_DIR ${LIBCLC_LIBRARY_OUTPUT_INTDIR} + OPT_FLAGS ${ARG_OPT_FLAGS} + DEPENDENCIES ${builtins_link_lib_tgt}) # Add dependency to top-level pseudo target to ease making other # targets dependent on libclc. + set( obj_suffix ${arch_suffix}.bc ) add_dependencies(${ARG_PARENT_TARGET} prepare-${obj_suffix}) set( builtins_lib $ ) diff --git a/libdevice/CMakeLists.txt b/libdevice/CMakeLists.txt index b6078f9a44cf8..564391547677f 100644 --- a/libdevice/CMakeLists.txt +++ b/libdevice/CMakeLists.txt @@ -1,6 +1,8 @@ # Utility project providing various functionalities for SPIR-V devices # without native support of these functionalities. +include(${CMAKE_CURRENT_SOURCE_DIR}/../libclc/cmake/modules/AddLibclc.cmake) + set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules" diff --git a/libdevice/cmake/modules/SYCLLibdevice.cmake b/libdevice/cmake/modules/SYCLLibdevice.cmake index c1aac6d017eff..f43c668ad9188 100644 --- a/libdevice/cmake/modules/SYCLLibdevice.cmake +++ b/libdevice/cmake/modules/SYCLLibdevice.cmake @@ -1,26 +1,31 @@ set(obj_binary_dir "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}") -set(obj_new_offload_binary_dir "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}") +set(obj-new-offload_binary_dir "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}") if (MSVC) - set(lib-suffix obj) - set(new-offload-lib-suffix new.obj) + set(obj-suffix obj) + set(obj-new-offload-suffix new.obj) set(spv_binary_dir "${CMAKE_RUNTIME_OUTPUT_DIRECTORY}") set(install_dest_spv bin) - set(devicelib_host_static sycl-devicelib-host.lib) - set(devicelib_host_static_new_offload sycl-devicelib-host.new.lib) + set(devicelib_host_static_obj sycl-devicelib-host.lib) + set(devicelib_host_static_obj-new-offload sycl-devicelib-host.new.lib) else() - set(lib-suffix o) - set(new-offload-lib-suffix new.o) + set(obj-suffix o) + set(obj-new-offload-suffix new.o) set(spv_binary_dir "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}") set(install_dest_spv lib${LLVM_LIBDIR_SUFFIX}) - set(devicelib_host_static libsycl-devicelib-host.a) - set(devicelib_host_static_new_offload libsycl-devicelib-host.new.a) + set(devicelib_host_static_obj libsycl-devicelib-host.a) + set(devicelib_host_static_obj-new-offload libsycl-devicelib-host.new.a) endif() +set(spv-suffix spv) +set(bc-suffix bc) set(bc_binary_dir "${CMAKE_LIBRARY_OUTPUT_DIRECTORY}") -set(install_dest_lib lib${LLVM_LIBDIR_SUFFIX}) +set(install_dest_obj lib${LLVM_LIBDIR_SUFFIX}) +set(install_dest_obj-new-offload lib${LLVM_LIBDIR_SUFFIX}) set(install_dest_bc lib${LLVM_LIBDIR_SUFFIX}) set(clang $) set(llvm-ar $) +set(llvm-link $) +set(llvm-opt $) string(CONCAT sycl_targets_opt "-fsycl-targets=" @@ -46,112 +51,142 @@ if (NOT SYCL_LIBDEVICE_GCC_TOOLCHAIN STREQUAL "") list(APPEND compile_opts "--gcc-toolchain=${SYCL_LIBDEVICE_GCC_TOOLCHAIN}") endif() -if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD) - string(APPEND sycl_targets_opt ",nvptx64-nvidia-cuda") - list(APPEND compile_opts - "-fno-sycl-libspirv" - "-fno-bundle-offload-arch" - "-nocudalib" - "--cuda-gpu-arch=sm_50") -endif() - if (WIN32) list(APPEND compile_opts -D_ALLOW_RUNTIME_LIBRARY_MISMATCH) list(APPEND compile_opts -D_ALLOW_ITERATOR_DEBUG_LEVEL_MISMATCH) endif() -add_custom_target(libsycldevice-obj) -add_custom_target(libsycldevice-obj-new-offload) -add_custom_target(libsycldevice-spv) -add_custom_target(libsycldevice-bc) - -add_custom_target(libsycldevice DEPENDS - libsycldevice-obj - libsycldevice-bc - libsycldevice-obj-new-offload - libsycldevice-spv) - -function(add_devicelib_obj obj_filename) - cmake_parse_arguments(OBJ "" "" "SRC;DEP;EXTRA_ARGS" ${ARGN}) - set(devicelib-obj-file ${obj_binary_dir}/${obj_filename}.${lib-suffix}) - add_custom_command(OUTPUT ${devicelib-obj-file} - COMMAND ${clang} -fsycl -c - ${compile_opts} ${sycl_targets_opt} ${OBJ_EXTRA_ARGS} - ${CMAKE_CURRENT_SOURCE_DIR}/${OBJ_SRC} - -o ${devicelib-obj-file} - MAIN_DEPENDENCY ${OBJ_SRC} - DEPENDS ${OBJ_DEP} - VERBATIM) - set(devicelib-obj-target ${obj_filename}-obj) - add_custom_target(${devicelib-obj-target} DEPENDS ${devicelib-obj-file}) - add_dependencies(libsycldevice-obj ${devicelib-obj-target}) - install(FILES ${devicelib-obj-file} - DESTINATION ${install_dest_lib} - COMPONENT libsycldevice) - - set(devicelib-obj-file-new-offload ${obj_new_offload_binary_dir}/${obj_filename}.${new-offload-lib-suffix}) - add_custom_command(OUTPUT ${devicelib-obj-file-new-offload} - COMMAND ${clang} -fsycl -c --offload-new-driver -foffload-lto=thin - ${compile_opts} ${sycl_targets_opt} ${OBJ_EXTRA_ARGS} - ${CMAKE_CURRENT_SOURCE_DIR}/${OBJ_SRC} - -o ${devicelib-obj-file-new-offload} - MAIN_DEPENDENCY ${OBJ_SRC} - DEPENDS ${OBJ_DEP} - VERBATIM) - set(devicelib-obj-target-new-offload ${obj_filename}-new-offload-obj) - add_custom_target(${devicelib-obj-target-new-offload} DEPENDS ${devicelib-obj-file-new-offload}) - add_dependencies(libsycldevice-obj ${devicelib-obj-target-new-offload}) - install(FILES ${devicelib-obj-file-new-offload} - DESTINATION ${install_dest_lib} - COMPONENT libsycldevice) -endfunction() +add_custom_target(libsycldevice) + +set(filetypes obj obj-new-offload spv bc) + +foreach(filetype IN LISTS filetypes) + add_custom_target(libsycldevice-${filetype}) + add_dependencies(libsycldevice libsycldevice-${filetype}) +endforeach() -function(add_devicelib_spv spv_filename) - cmake_parse_arguments(SPV "" "" "SRC;DEP;EXTRA_ARGS" ${ARGN}) - set(devicelib-spv-file ${spv_binary_dir}/${spv_filename}.spv) - add_custom_command(OUTPUT ${devicelib-spv-file} - COMMAND ${clang} -fsycl-device-only -fsycl-device-obj=spirv - ${compile_opts} ${SPV_EXTRA_ARGS} - ${CMAKE_CURRENT_SOURCE_DIR}/${SPV_SRC} - -o ${devicelib-spv-file} - MAIN_DEPENDENCY ${SPV_SRC} - DEPENDS ${SPV_DEP} - VERBATIM) - set(devicelib-spv-target ${spv_filename}-spv) - add_custom_target(${devicelib-spv-target} DEPENDS ${devicelib-spv-file}) - add_dependencies(libsycldevice-spv ${devicelib-spv-target}) - install(FILES ${devicelib-spv-file} - DESTINATION ${install_dest_spv} - COMPONENT libsycldevice) +# For NVPTX each device libary is compiled into a single bitcode +# file and all files created this way are linked into one large bitcode +# library. +# Additional compilation options are needed for compiling each device library. +set(devicelib_arch) +if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD) + list(APPEND devicelib_arch cuda) + set(compile_opts_cuda "-fsycl-targets=nvptx64-nvidia-cuda" + "-Xsycl-target-backend" "--cuda-gpu-arch=sm_50" "-nocudalib") + set(opt_flags_cuda "-O3" "--nvvm-reflect-enable=false") +endif() + +set(spv_device_compile_opts -fsycl-device-only -fsycl-device-obj=spirv) +set(bc_device_compile_opts -fsycl-device-only -fsycl-device-obj=llvmir) +set(obj-new-offload_device_compile_opts -fsycl -c --offload-new-driver + -foffload-lto=thin ${sycl_targets_opt}) +set(obj_device_compile_opts -fsycl -c ${sycl_targets_opt}) + +# Compiles and installs a single device library. +# +# Arguments: +# * FILETYPE +# Specifies the output file type of the compilation and its repsective +# installation directory. +# Adds a new target that the libsycldevice-FILETYPE target will depend on. +# * SRC ... +# Source code files needed for the compilation. +# * EXTRA_OPTS ... +# List of extra compiler options to use. +# Note that the ones specified by the compile_opts var are always used. +# * DEPENDENCIES ... +# List of extra dependencies to inject +# +# Depends on the clang target for compiling. +function(compile_lib filename) + cmake_parse_arguments(ARG + "" + "FILETYPE" + "SRC;EXTRA_OPTS;DEPENDENCIES" + ${ARGN}) + + set(devicelib-file + ${${ARG_FILETYPE}_binary_dir}/${filename}.${${ARG_FILETYPE}-suffix}) + + add_custom_command( + OUTPUT ${devicelib-file} + COMMAND ${clang} ${compile_opts} ${ARG_EXTRA_OPTS} + ${CMAKE_CURRENT_SOURCE_DIR}/${ARG_SRC} -o ${devicelib-file} + MAIN_DEPENDENCY ${ARG_SRC} + DEPENDS ${ARG_DEPENDENCIES} + VERBATIM + ) + set(devicelib-${ARG_FILETYPE}-target ${filename}-${ARG_FILETYPE}) + add_custom_target(${devicelib-${ARG_FILETYPE}-target} + DEPENDS ${devicelib-file}) + add_dependencies(libsycldevice-${ARG_FILETYPE} + ${devicelib-${ARG_FILETYPE}-target}) + + install( FILES ${devicelib-file} + DESTINATION ${install_dest_${ARG_FILETYPE}} + COMPONENT libsycldevice) endfunction() -function(add_devicelib_bc bc_filename) - cmake_parse_arguments(BC "" "" "SRC;DEP;EXTRA_ARGS" ${ARGN}) - set(devicelib-bc-file ${bc_binary_dir}/${bc_filename}.bc) - add_custom_command(OUTPUT ${devicelib-bc-file} - COMMAND ${clang} -fsycl-device-only - -fsycl-device-obj=llvmir ${compile_opts} - ${BC_EXTRA_ARGS} - ${CMAKE_CURRENT_SOURCE_DIR}/${BC_SRC} - -o ${devicelib-bc-file} - MAIN_DEPENDENCY ${BC_SRC} - DEPENDS ${BC_DEP} - VERBATIM) - set(devicelib-bc-target ${bc_filename}-bc) - add_custom_target(${devicelib-bc-target} DEPENDS ${devicelib-bc-file}) - add_dependencies(libsycldevice-bc ${devicelib-bc-target}) - install(FILES ${devicelib-bc-file} - DESTINATION ${install_dest_bc} - COMPONENT libsycldevice) +# Appends a list to a global property. +# +# Arguments: +# * PROPERTY_NAME +# The name of the property to append to. +function(append_to_property list) + cmake_parse_arguments(ARG + "" + "PROPERTY_NAME" + "" + ${ARGN}) + get_property(new_property GLOBAL PROPERTY ${ARG_PROPERTY_NAME}) + list(APPEND new_property ${list}) + set_property(GLOBAL PROPERTY ${ARG_PROPERTY_NAME} ${new_property}) endfunction() -function(add_devicelib filename) - cmake_parse_arguments(DL "" "" "SRC;DEP;EXTRA_ARGS" ${ARGN}) - add_devicelib_spv(${filename} SRC ${DL_SRC} DEP ${DL_DEP} EXTRA_ARGS ${DL_EXTRA_ARGS}) - add_devicelib_bc(${filename} SRC ${DL_SRC} DEP ${DL_DEP} EXTRA_ARGS ${DL_EXTRA_ARGS}) - add_devicelib_obj(${filename} SRC ${DL_SRC} DEP ${DL_DEP} EXTRA_ARGS ${DL_EXTRA_ARGS}) +# Creates device libaries for all filetypes. +# Adds bitcode library files additionally for each devicelib_arch target and +# adds the created file to an arch specific global property. +# +# Arguments: +# * SRC ... +# Source code files needed for the compilation. +# * EXTRA_OPTS ... +# List of extra compiler options to use. +# Note that the ones specified by the compile_opts var are always used. +# * DEPENDENCIES ... +# List of extra dependencies to inject +# +# Depends on the clang target for compiling. +function(add_devicelibs filename) + cmake_parse_arguments(ARG + "" + "" + "SRC;EXTRA_OPTS;DEPENDENCIES" + ${ARGN}) + + foreach(filetype IN LISTS filetypes) + compile_lib(${filename} + FILETYPE ${filetype} + SRC ${ARG_SRC} + DEPENDENCIES ${ARG_DEPENDENCIES} + EXTRA_OPTS ${ARG_EXTRA_OPTS} ${${filetype}_device_compile_opts}) + endforeach() + + foreach(arch IN LISTS devicelib_arch) + compile_lib(${filename}--${arch} + FILETYPE bc + SRC ${ARG_SRC} + DEPENDENCIES ${ARG_DEPENDENCIES} + EXTRA_OPTS ${ARG_EXTRA_OPTS} ${bc_device_compile_opts} + ${compile_opts_${arch}}) + + append_to_property(${bc_binary_dir}/${filename}--${arch}.bc + PROPERTY_NAME BC_DEVICE_LIBS_${arch}) + endforeach() endfunction() +# Set up the dependency lists for the libdevice libraries set(crt_obj_deps wrapper.h device.h spirv_vars.h sycl-compiler) set(complex_obj_deps device_complex.h device.h sycl-compiler) set(cmath_obj_deps device_math.h device.h sycl-compiler) @@ -174,37 +209,91 @@ if("native_cpu" IN_LIST SYCL_ENABLE_PLUGINS) # Include NativeCPU UR adapter path to enable finding header file with state struct. # libsycl-nativecpu_utils is only needed as BC file by NativeCPU. # Todo: add versions for other targets (for cross-compilation) - add_devicelib_bc(libsycl-nativecpu_utils SRC nativecpu_utils.cpp DEP ${itt_obj_deps} EXTRA_ARGS -I ${NATIVE_CPU_DIR} -fsycl-targets=native_cpu) + compile_lib(libsycl-nativecpu_utils + FILETYPE bc + SRC nativecpu_utils.cpp + DEPENDENCIES ${itt_obj_deps} + EXTRA_OPTS -I ${NATIVE_CPU_DIR} -fsycl-targets=native_cpu -fsycl-device-only + -fsycl-device-obj=llvmir) endif() -add_devicelib(libsycl-itt-stubs SRC itt_stubs.cpp DEP ${itt_obj_deps}) -add_devicelib(libsycl-itt-compiler-wrappers SRC itt_compiler_wrappers.cpp DEP ${itt_obj_deps}) -add_devicelib(libsycl-itt-user-wrappers SRC itt_user_wrappers.cpp DEP ${itt_obj_deps}) - -add_devicelib(libsycl-crt SRC crt_wrapper.cpp DEP ${crt_obj_deps}) -add_devicelib(libsycl-complex SRC complex_wrapper.cpp DEP ${complex_obj_deps}) -add_devicelib(libsycl-complex-fp64 SRC complex_wrapper_fp64.cpp DEP ${complex_obj_deps} ) -add_devicelib(libsycl-cmath SRC cmath_wrapper.cpp DEP ${cmath_obj_deps}) -add_devicelib(libsycl-cmath-fp64 SRC cmath_wrapper_fp64.cpp DEP ${cmath_obj_deps} ) -add_devicelib(libsycl-imf SRC imf_wrapper.cpp DEP ${imf_obj_deps}) -add_devicelib(libsycl-imf-fp64 SRC imf_wrapper_fp64.cpp DEP ${imf_obj_deps}) -add_devicelib(libsycl-imf-bf16 SRC imf_wrapper_bf16.cpp DEP ${imf_obj_deps}) -add_devicelib(libsycl-bfloat16 SRC bfloat16_wrapper.cpp DEP ${cmath_obj_deps} ) +# Add all device libraries for each filetype except for the Intel math function +# ones. +add_devicelibs(libsycl-itt-stubs + SRC itt_stubs.cpp + DEPENDENCIES ${itt_obj_deps}) +add_devicelibs(libsycl-itt-compiler-wrappers + SRC itt_compiler_wrappers.cpp + DEPENDENCIES ${itt_obj_deps}) +add_devicelibs(libsycl-itt-user-wrappers + SRC itt_user_wrappers.cpp + DEPENDENCIES ${itt_obj_deps}) + +add_devicelibs(libsycl-crt + SRC crt_wrapper.cpp + DEPENDENCIES ${crt_obj_deps}) +add_devicelibs(libsycl-complex + SRC complex_wrapper.cpp + DEPENDENCIES ${complex_obj_deps}) +add_devicelibs(libsycl-complex-fp64 + SRC complex_wrapper_fp64.cpp + DEPENDENCIES ${complex_obj_deps} ) +add_devicelibs(libsycl-cmath + SRC cmath_wrapper.cpp + DEPENDENCIES ${cmath_obj_deps}) +add_devicelibs(libsycl-cmath-fp64 + SRC cmath_wrapper_fp64.cpp + DEPENDENCIES ${cmath_obj_deps} ) +add_devicelibs(libsycl-imf + SRC imf_wrapper.cpp + DEPENDENCIES ${imf_obj_deps}) +add_devicelibs(libsycl-imf-fp64 + SRC imf_wrapper_fp64.cpp + DEPENDENCIES ${imf_obj_deps}) +add_devicelibs(libsycl-imf-bf16 + SRC imf_wrapper_bf16.cpp + DEPENDENCIES ${imf_obj_deps}) +add_devicelibs(libsycl-bfloat16 + SRC bfloat16_wrapper.cpp + DEPENDENCIES ${cmath_obj_deps}) if(MSVC) - add_devicelib(libsycl-msvc-math SRC msvc_math.cpp DEP ${cmath_obj_deps}) + add_devicelibs(libsycl-msvc-math + SRC msvc_math.cpp + DEPENDENCIES ${cmath_obj_deps}) else() - add_devicelib(libsycl-sanitizer SRC sanitizer_utils.cpp DEP ${sanitizer_obj_deps} EXTRA_ARGS -fno-sycl-instrument-device-code) + add_devicelibs(libsycl-sanitizer + SRC sanitizer_utils.cpp + DEPENDENCIES ${sanitizer_obj_deps} + EXTRA_OPTS -fno-sycl-instrument-device-code) endif() -add_devicelib(libsycl-fallback-cassert SRC fallback-cassert.cpp DEP ${crt_obj_deps} EXTRA_ARGS -fno-sycl-instrument-device-code) -add_devicelib(libsycl-fallback-cstring SRC fallback-cstring.cpp DEP ${crt_obj_deps}) -add_devicelib(libsycl-fallback-complex SRC fallback-complex.cpp DEP ${complex_obj_deps}) -add_devicelib(libsycl-fallback-complex-fp64 SRC fallback-complex-fp64.cpp DEP ${complex_obj_deps} ) -add_devicelib(libsycl-fallback-cmath SRC fallback-cmath.cpp DEP ${cmath_obj_deps}) -add_devicelib(libsycl-fallback-cmath-fp64 SRC fallback-cmath-fp64.cpp DEP ${cmath_obj_deps}) -add_devicelib(libsycl-fallback-bfloat16 SRC fallback-bfloat16.cpp DEP ${bfloat16_obj_deps}) -add_devicelib(libsycl-native-bfloat16 SRC bfloat16_wrapper.cpp DEP ${bfloat16_obj_deps}) - +add_devicelibs(libsycl-fallback-cassert + SRC fallback-cassert.cpp + DEPENDENCIES ${crt_obj_deps} + EXTRA_OPTS -fno-sycl-instrument-device-code) +add_devicelibs(libsycl-fallback-cstring + SRC fallback-cstring.cpp + DEPENDENCIES ${crt_obj_deps}) +add_devicelibs(libsycl-fallback-complex + SRC fallback-complex.cpp + DEPENDENCIES ${complex_obj_deps}) +add_devicelibs(libsycl-fallback-complex-fp64 + SRC fallback-complex-fp64.cpp + DEPENDENCIES ${complex_obj_deps}) +add_devicelibs(libsycl-fallback-cmath + SRC fallback-cmath.cpp + DEPENDENCIES ${cmath_obj_deps}) +add_devicelibs(libsycl-fallback-cmath-fp64 + SRC fallback-cmath-fp64.cpp + DEPENDENCIES ${cmath_obj_deps}) +add_devicelibs(libsycl-fallback-bfloat16 + SRC fallback-bfloat16.cpp + DEPENDENCIES ${bfloat16_obj_deps}) +add_devicelibs(libsycl-native-bfloat16 + SRC bfloat16_wrapper.cpp + DEPENDENCIES ${bfloat16_obj_deps}) + +# Create dependency and source lists for Intel math function libraries. file(MAKE_DIRECTORY ${obj_binary_dir}/libdevice) set(imf_fallback_src_dir ${obj_binary_dir}/libdevice) set(imf_src_dir ${CMAKE_CURRENT_SOURCE_DIR}) @@ -217,8 +306,7 @@ set(imf_fallback_fp32_deps device.h device_imf.hpp imf_half.hpp imf_rounding_op. imf/imf_inline_fp32.cpp imf/imf_fp32_dl.cpp) set(imf_fallback_fp64_deps device.h device_imf.hpp imf_half.hpp imf_rounding_op.hpp imf_impl_utils.hpp - imf_utils/double_convert.cpp - imf_utils/fp64_round.cpp + imf_utils/double_convert.cpp imf_utils/fp64_round.cpp imf/imf_inline_fp64.cpp imf/imf_fp64_dl.cpp) set(imf_fallback_bf16_deps device.h device_imf.hpp imf_bf16.hpp @@ -275,320 +363,197 @@ if (NOT WIN32) add_imf_host_cxx_flags_compile_flags_if_supported("-fcf-protection=full") endif() -add_custom_command(OUTPUT ${imf_fp32_fallback_src} - COMMAND ${CMAKE_COMMAND} -D SRC_DIR=${imf_src_dir} - -D DEST_DIR=${imf_fallback_src_dir} - -D IMF_TARGET=FP32 - -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules/ImfSrcConcate.cmake - DEPENDS ${imf_fallback_fp32_deps}) - -add_custom_command(OUTPUT ${imf_fp64_fallback_src} - COMMAND ${CMAKE_COMMAND} -D SRC_DIR=${imf_src_dir} - -D DEST_DIR=${imf_fallback_src_dir} - -D IMF_TARGET=FP64 - -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules/ImfSrcConcate.cmake - DEPENDS ${imf_fallback_fp64_deps}) - -add_custom_command(OUTPUT ${imf_bf16_fallback_src} - COMMAND ${CMAKE_COMMAND} -D SRC_DIR=${imf_src_dir} - -D DEST_DIR=${imf_fallback_src_dir} - -D IMF_TARGET=BF16 - -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules/ImfSrcConcate.cmake - DEPENDS ${imf_fallback_bf16_deps}) - -add_custom_target(get_imf_fallback_fp32 DEPENDS ${imf_fp32_fallback_src}) -add_custom_command(OUTPUT ${spv_binary_dir}/libsycl-fallback-imf.spv - COMMAND ${clang} -fsycl-device-only -fsycl-device-obj=spirv - ${compile_opts} -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${imf_fp32_fallback_src} - -o ${spv_binary_dir}/libsycl-fallback-imf.spv - DEPENDS ${imf_fallback_fp32_deps} get_imf_fallback_fp32 sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${bc_binary_dir}/libsycl-fallback-imf.bc - COMMAND ${clang} -fsycl-device-only -fsycl-device-obj=llvmir - ${compile_opts} -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${imf_fp32_fallback_src} - -o ${bc_binary_dir}/libsycl-fallback-imf.bc - DEPENDS ${imf_fallback_fp32_deps} get_imf_fallback_fp32 - sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/libsycl-fallback-imf.${lib-suffix} - COMMAND ${clang} -fsycl -c - ${compile_opts} ${sycl_targets_opt} - ${imf_fp32_fallback_src} -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - -o ${obj_binary_dir}/libsycl-fallback-imf.${lib-suffix} - DEPENDS ${imf_fallback_fp32_deps} get_imf_fallback_fp32 sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/libsycl-fallback-imf.${new-offload-lib-suffix} - COMMAND ${clang} -fsycl -c --offload-new-driver -foffload-lto=thin - ${compile_opts} ${sycl_targets_opt} - ${imf_fp32_fallback_src} -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - -o ${obj_binary_dir}/libsycl-fallback-imf.${new-offload-lib-suffix} - DEPENDS ${imf_fallback_fp32_deps} get_imf_fallback_fp32 sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/fallback-imf-fp32-host.${lib-suffix} - COMMAND ${clang} ${imf_host_cxx_flags} - -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${imf_fp32_fallback_src} - -o ${obj_binary_dir}/fallback-imf-fp32-host.${lib-suffix} - DEPENDS ${imf_fallback_fp32_deps} get_imf_fallback_fp32 sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/fallback-imf-fp32-host.${new-offload-lib-suffix} - COMMAND ${clang} ${imf_host_cxx_flags} --offload-new-driver -foffload-lto=thin - -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${imf_fp32_fallback_src} - -o ${obj_binary_dir}/fallback-imf-fp32-host.${new-offload-lib-suffix} - DEPENDS ${imf_fallback_fp32_deps} get_imf_fallback_fp32 sycl-compiler - VERBATIM) - -add_custom_target(get_imf_fallback_fp64 DEPENDS ${imf_fp64_fallback_src}) -add_custom_command(OUTPUT ${spv_binary_dir}/libsycl-fallback-imf-fp64.spv - COMMAND ${clang} -fsycl-device-only -fsycl-device-obj=spirv - ${compile_opts} -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${imf_fp64_fallback_src} - -o ${spv_binary_dir}/libsycl-fallback-imf-fp64.spv - DEPENDS ${imf_fallback_fp64_deps} get_imf_fallback_fp64 sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${bc_binary_dir}/libsycl-fallback-imf-fp64.bc - COMMAND ${clang} -fsycl-device-only -fsycl-device-obj=llvmir - ${compile_opts} -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${imf_fp64_fallback_src} - -o ${bc_binary_dir}/libsycl-fallback-imf-fp64.bc - DEPENDS ${imf_fallback_fp64_deps} get_imf_fallback_fp64 - sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/libsycl-fallback-imf-fp64.${lib-suffix} - COMMAND ${clang} -fsycl -c -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${compile_opts} ${sycl_targets_opt} - ${imf_fp64_fallback_src} - -o ${obj_binary_dir}/libsycl-fallback-imf-fp64.${lib-suffix} - DEPENDS ${imf_fallback_fp64_deps} get_imf_fallback_fp64 sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/libsycl-fallback-imf-fp64.${new-offload-lib-suffix} - COMMAND ${clang} -fsycl -c -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - --offload-new-driver -foffload-lto=thin - ${compile_opts} ${sycl_targets_opt} - ${imf_fp64_fallback_src} - -o ${obj_binary_dir}/libsycl-fallback-imf-fp64.${new-offload-lib-suffix} - DEPENDS ${imf_fallback_fp64_deps} get_imf_fallback_fp64 sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/fallback-imf-fp64-host.${lib-suffix} - COMMAND ${clang} ${imf_host_cxx_flags} - -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${imf_fp64_fallback_src} - -o ${obj_binary_dir}/fallback-imf-fp64-host.${lib-suffix} - DEPENDS ${imf_fallback_fp64_deps} get_imf_fallback_fp64 sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/fallback-imf-fp64-host.${new-offload-lib-suffix} - COMMAND ${clang} ${imf_host_cxx_flags} --offload-new-driver -foffload-lto=thin - -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${imf_fp64_fallback_src} - -o ${obj_binary_dir}/fallback-imf-fp64-host.${new-offload-lib-suffix} - DEPENDS ${imf_fallback_fp64_deps} get_imf_fallback_fp64 sycl-compiler - VERBATIM) - -add_custom_target(get_imf_fallback_bf16 DEPENDS ${imf_bf16_fallback_src}) -add_custom_command(OUTPUT ${spv_binary_dir}/libsycl-fallback-imf-bf16.spv - COMMAND ${clang} -fsycl-device-only -fsycl-device-obj=spirv - ${compile_opts} -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${imf_bf16_fallback_src} - -o ${spv_binary_dir}/libsycl-fallback-imf-bf16.spv - DEPENDS ${imf_fallback_bf16_deps} get_imf_fallback_bf16 sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${bc_binary_dir}/libsycl-fallback-imf-bf16.bc - COMMAND ${clang} -fsycl-device-only -fsycl-device-obj=llvmir - ${compile_opts} -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${imf_bf16_fallback_src} - -o ${bc_binary_dir}/libsycl-fallback-imf-bf16.bc - DEPENDS ${imf_fallback_bf16_deps} get_imf_fallback_bf16 - sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/libsycl-fallback-imf-bf16.${lib-suffix} - COMMAND ${clang} -fsycl -c -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${compile_opts} ${sycl_targets_opt} - ${imf_bf16_fallback_src} - -o ${obj_binary_dir}/libsycl-fallback-imf-bf16.${lib-suffix} - DEPENDS ${imf_fallback_bf16_deps} get_imf_fallback_bf16 sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/libsycl-fallback-imf-bf16.${new-offload-lib-suffix} - COMMAND ${clang} -fsycl -c -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - --offload-new-driver -foffload-lto=thin - ${compile_opts} ${sycl_targets_opt} - ${imf_bf16_fallback_src} - -o ${obj_binary_dir}/libsycl-fallback-imf-bf16.${new-offload-lib-suffix} - DEPENDS ${imf_fallback_bf16_deps} get_imf_fallback_bf16 sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/fallback-imf-bf16-host.${lib-suffix} - COMMAND ${clang} ${imf_host_cxx_flags} - -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${imf_bf16_fallback_src} - -o ${obj_binary_dir}/fallback-imf-bf16-host.${lib-suffix} - DEPENDS ${imf_fallback_bf16_deps} get_imf_fallback_bf16 sycl-compiler - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/fallback-imf-bf16-host.${new-offload-lib-suffix} - COMMAND ${clang} ${imf_host_cxx_flags} --offload-new-driver -foffload-lto=thin - -I ${CMAKE_CURRENT_SOURCE_DIR}/imf - ${imf_bf16_fallback_src} - -o ${obj_binary_dir}/fallback-imf-bf16-host.${new-offload-lib-suffix} - DEPENDS ${imf_fallback_bf16_deps} get_imf_fallback_bf16 sycl-compiler - VERBATIM) - -add_custom_target(imf_fallback_fp32_spv DEPENDS ${spv_binary_dir}/libsycl-fallback-imf.spv) -add_custom_target(imf_fallback_fp32_bc DEPENDS ${bc_binary_dir}/libsycl-fallback-imf.bc) -add_custom_target(imf_fallback_fp32_obj DEPENDS ${obj_binary_dir}/libsycl-fallback-imf.${lib-suffix}) -add_custom_target(imf_fallback_fp32_host_obj DEPENDS ${obj_binary_dir}/fallback-imf-fp32-host.${lib-suffix}) -add_custom_target(imf_fallback_fp32_new_offload_obj DEPENDS ${obj_binary_dir}/libsycl-fallback-imf.${new-offload-lib-suffix}) -add_custom_target(imf_fallback_fp32_host_new_offload_obj DEPENDS ${obj_binary_dir}/fallback-imf-fp32-host.${new-offload-lib-suffix}) -add_dependencies(libsycldevice-spv imf_fallback_fp32_spv) -add_dependencies(libsycldevice-bc imf_fallback_fp32_bc) -add_dependencies(libsycldevice-obj imf_fallback_fp32_obj) -add_dependencies(libsycldevice-obj imf_fallback_fp32_new_offload_obj) - -add_custom_target(imf_fallback_fp64_spv DEPENDS ${spv_binary_dir}/libsycl-fallback-imf-fp64.spv) -add_custom_target(imf_fallback_fp64_bc DEPENDS ${bc_binary_dir}/libsycl-fallback-imf-fp64.bc) -add_custom_target(imf_fallback_fp64_obj DEPENDS ${obj_binary_dir}/libsycl-fallback-imf-fp64.${lib-suffix}) -add_custom_target(imf_fallback_fp64_host_obj DEPENDS ${obj_binary_dir}/fallback-imf-fp64-host.${lib-suffix}) -add_custom_target(imf_fallback_fp64_new_offload_obj DEPENDS ${obj_binary_dir}/libsycl-fallback-imf-fp64.${new-offload-lib-suffix}) -add_custom_target(imf_fallback_fp64_host_new_offload_obj DEPENDS ${obj_binary_dir}/fallback-imf-fp64-host.${new-offload-lib-suffix}) -add_dependencies(libsycldevice-spv imf_fallback_fp64_spv) -add_dependencies(libsycldevice-bc imf_fallback_fp64_bc) -add_dependencies(libsycldevice-obj imf_fallback_fp64_obj) -add_dependencies(libsycldevice-obj imf_fallback_fp64_new_offload_obj) - -add_custom_target(imf_fallback_bf16_spv DEPENDS ${spv_binary_dir}/libsycl-fallback-imf-bf16.spv) -add_custom_target(imf_fallback_bf16_bc DEPENDS ${bc_binary_dir}/libsycl-fallback-imf-bf16.bc) -add_custom_target(imf_fallback_bf16_obj DEPENDS ${obj_binary_dir}/libsycl-fallback-imf-bf16.${lib-suffix}) -add_custom_target(imf_fallback_bf16_host_obj DEPENDS ${obj_binary_dir}/fallback-imf-bf16-host.${lib-suffix}) -add_custom_target(imf_fallback_bf16_new_offload_obj DEPENDS ${obj_binary_dir}/libsycl-fallback-imf-bf16.${new-offload-lib-suffix}) -add_custom_target(imf_fallback_bf16_host_new_offload_obj DEPENDS ${obj_binary_dir}/fallback-imf-bf16-host.${new-offload-lib-suffix}) -add_dependencies(libsycldevice-spv imf_fallback_bf16_spv) -add_dependencies(libsycldevice-bc imf_fallback_bf16_bc) -add_dependencies(libsycldevice-obj imf_fallback_bf16_obj) -add_dependencies(libsycldevice-obj imf_fallback_bf16_new_offload_obj) - -add_custom_command(OUTPUT ${obj_binary_dir}/imf-fp32-host.${lib-suffix} - COMMAND ${clang} ${imf_host_cxx_flags} - ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper.cpp - -o ${obj_binary_dir}/imf-fp32-host.${lib-suffix} - MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper.cpp - DEPENDS ${imf_obj_deps} - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/imf-fp32-host.${new-offload-lib-suffix} - COMMAND ${clang} ${imf_host_cxx_flags} --offload-new-driver -foffload-lto=thin - ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper.cpp - -o ${obj_binary_dir}/imf-fp32-host.${new-offload-lib-suffix} - MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper.cpp - DEPENDS ${imf_obj_deps} - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/imf-fp64-host.${lib-suffix} - COMMAND ${clang} ${imf_host_cxx_flags} - ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper_fp64.cpp - -o ${obj_binary_dir}/imf-fp64-host.${lib-suffix} - MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper_fp64.cpp - DEPENDS ${imf_obj_deps} - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/imf-fp64-host.${new-offload-lib-suffix} - COMMAND ${clang} ${imf_host_cxx_flags} --offload-new-driver -foffload-lto=thin - ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper_fp64.cpp - -o ${obj_binary_dir}/imf-fp64-host.${new-offload-lib-suffix} - MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper_fp64.cpp - DEPENDS ${imf_obj_deps} - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/imf-bf16-host.${lib-suffix} - COMMAND ${clang} ${imf_host_cxx_flags} - ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper_bf16.cpp - -o ${obj_binary_dir}/imf-bf16-host.${lib-suffix} - MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper_bf16.cpp - DEPENDS ${imf_obj_deps} - VERBATIM) - -add_custom_command(OUTPUT ${obj_binary_dir}/imf-bf16-host.${new-offload-lib-suffix} - COMMAND ${clang} ${imf_host_cxx_flags} --offload-new-driver -foffload-lto=thin - ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper_bf16.cpp - -o ${obj_binary_dir}/imf-bf16-host.${new-offload-lib-suffix} - MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/imf_wrapper_bf16.cpp - DEPENDS ${imf_obj_deps} - VERBATIM) - -add_custom_target(imf_fp32_host_obj DEPENDS ${obj_binary_dir}/imf-fp32-host.${lib-suffix}) -add_custom_target(imf_fp64_host_obj DEPENDS ${obj_binary_dir}/imf-fp64-host.${lib-suffix}) -add_custom_target(imf_bf16_host_obj DEPENDS ${obj_binary_dir}/imf-bf16-host.${lib-suffix}) - -add_custom_target(imf_fp32_host_new_offload_obj DEPENDS ${obj_binary_dir}/imf-fp32-host.${new-offload-lib-suffix}) -add_custom_target(imf_fp64_host_new_offload_obj DEPENDS ${obj_binary_dir}/imf-fp64-host.${new-offload-lib-suffix}) -add_custom_target(imf_bf16_host_new_offload_obj DEPENDS ${obj_binary_dir}/imf-bf16-host.${new-offload-lib-suffix}) - -add_custom_target(imf_host_obj DEPENDS ${obj_binary_dir}/${devicelib_host_static}) -add_custom_command(OUTPUT ${obj_binary_dir}/${devicelib_host_static} - COMMAND ${llvm-ar} rcs ${obj_binary_dir}/${devicelib_host_static} - ${obj_binary_dir}/imf-fp32-host.${lib-suffix} - ${obj_binary_dir}/fallback-imf-fp32-host.${lib-suffix} - ${obj_binary_dir}/imf-fp64-host.${lib-suffix} - ${obj_binary_dir}/fallback-imf-fp64-host.${lib-suffix} - ${obj_binary_dir}/imf-bf16-host.${lib-suffix} - ${obj_binary_dir}/fallback-imf-bf16-host.${lib-suffix} - DEPENDS imf_fp32_host_obj imf_fallback_fp32_host_obj - DEPENDS imf_fp64_host_obj imf_fallback_fp64_host_obj - DEPENDS imf_bf16_host_obj imf_fallback_bf16_host_obj - DEPENDS sycl-compiler - VERBATIM) -add_custom_target(imf_host_new_offload_obj DEPENDS ${obj_binary_dir}/${devicelib_host_static_new_offload}) -add_custom_command(OUTPUT ${obj_binary_dir}/${devicelib_host_static_new_offload} - COMMAND ${llvm-ar} rcs ${obj_binary_dir}/${devicelib_host_static_new_offload} - ${obj_binary_dir}/imf-fp32-host.${new-offload-lib-suffix} - ${obj_binary_dir}/fallback-imf-fp32-host.${new-offload-lib-suffix} - ${obj_binary_dir}/imf-fp64-host.${new-offload-lib-suffix} - ${obj_binary_dir}/fallback-imf-fp64-host.${new-offload-lib-suffix} - ${obj_binary_dir}/imf-bf16-host.${new-offload-lib-suffix} - ${obj_binary_dir}/fallback-imf-bf16-host.${new-offload-lib-suffix} - DEPENDS imf_fp32_host_new_offload_obj imf_fallback_fp32_host_new_offload_obj - DEPENDS imf_fp64_host_new_offload_obj imf_fallback_fp64_host_new_offload_obj - DEPENDS imf_bf16_host_new_offload_obj imf_fallback_bf16_host_new_offload_obj - DEPENDS sycl-compiler - VERBATIM) -add_dependencies(libsycldevice-obj imf_host_obj) -add_dependencies(libsycldevice-obj imf_host_new_offload_obj) -install(FILES ${spv_binary_dir}/libsycl-fallback-imf.spv - ${spv_binary_dir}/libsycl-fallback-imf-fp64.spv - ${spv_binary_dir}/libsycl-fallback-imf-bf16.spv - DESTINATION ${install_dest_spv} - COMPONENT libsycldevice) - -install(FILES ${bc_binary_dir}/libsycl-fallback-imf.bc - ${bc_binary_dir}/libsycl-fallback-imf-fp64.bc - ${bc_binary_dir}/libsycl-fallback-imf-bf16.bc - DESTINATION ${install_dest_bc} - COMPONENT libsycldevice) - -install(FILES ${obj_binary_dir}/libsycl-fallback-imf.${lib-suffix} - ${obj_binary_dir}/libsycl-fallback-imf-fp64.${lib-suffix} - ${obj_binary_dir}/libsycl-fallback-imf-bf16.${lib-suffix} - ${obj_binary_dir}/${devicelib_host_static} - DESTINATION ${install_dest_lib} - COMPONENT libsycldevice) - -install(FILES ${obj_binary_dir}/libsycl-fallback-imf.${new-offload-lib-suffix} - ${obj_binary_dir}/libsycl-fallback-imf-fp64.${new-offload-lib-suffix} - ${obj_binary_dir}/libsycl-fallback-imf-bf16.${new-offload-lib-suffix} - ${obj_binary_dir}/${devicelib_host_static_new_offload} - DESTINATION ${install_dest_lib} - COMPONENT libsycldevice) +set(obj-new-offload_host_compile_opts ${imf_host_cxx_flags} --offload-new-driver + -foffload-lto=thin) +set(obj_host_compile_opts ${imf_host_cxx_flags}) + +foreach(datatype IN ITEMS fp32 fp64 bf16) + string(TOUPPER ${datatype} upper_datatype) + + add_custom_command( + OUTPUT ${imf_${datatype}_fallback_src} + COMMAND ${CMAKE_COMMAND} + -D SRC_DIR=${imf_src_dir} + -D DEST_DIR=${imf_fallback_src_dir} + -D IMF_TARGET=${upper_datatype} + -P ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules/ImfSrcConcate.cmake + DEPENDS ${imf_fallback_${datatype}_deps}) + + add_custom_target(get_imf_fallback_${datatype} + DEPENDS ${imf_${datatype}_fallback_src}) +endforeach() + +# Adds Intel math functions libraries. +# +# Arguments: +# * SRC ... +# Source code files needed for the compilation. +# * DIR +# The directory where the output file should be located in. +# * FTYPE +# Filetype of the output library file (e.g. 'bc'). +# * DTYPE +# The datatype of the library, which determines the input source +# and dependencies of the compilation command. +# * TGT_NAME +# Name of the new target that depends on the compilation of the library. +# * EXTRA_OPTS ... +# List of extra compiler options to use. +# Note that the ones specified by the compile_opts var are always used. +# +# Depends on the clang target for compiling. +function(add_lib_imf name) + cmake_parse_arguments(ARG + "" + "DIR;FTYPE;DTYPE;TGT_NAME" + "EXTRA_OPTS" + ${ARGN}) + + add_custom_command( + OUTPUT ${ARG_DIR}/${name}.${${ARG_FTYPE}-suffix} + COMMAND ${clang} ${compile_opts} ${ARG_EXTRA_OPTS} + -I ${CMAKE_CURRENT_SOURCE_DIR}/imf + ${imf_${ARG_DTYPE}_fallback_src} + -o + ${ARG_DIR}/${name}.${${ARG_FTYPE}-suffix} + DEPENDS ${imf_fallback_${ARG_DTYPE}_deps} + get_imf_fallback_${ARG_DTYPE} sycl-compiler + VERBATIM) + + add_custom_target(${ARG_TGT_NAME} + DEPENDS ${ARG_DIR}/${name}.${${ARG_FTYPE}-suffix}) + + add_dependencies(libsycldevice-${ARG_FTYPE} ${ARG_TGT_NAME}) +endfunction() + +# Add device fallback imf libraries for the SPIRV targets and all filetypes. +foreach(dtype IN ITEMS bf16 fp32 fp64) + foreach(ftype IN LISTS filetypes) + set(libsycl_name libsycl-fallback-imf) + if (NOT (dtype STREQUAL "fp32")) + set(libsycl_name libsycl-fallback-imf-${dtype}) + endif() + set(tgt_name imf_fallback_${dtype}_${ftype}) + + add_lib_imf(${libsycl_name} + DIR ${${ftype}_binary_dir} + FTYPE ${ftype} + DTYPE ${dtype} + EXTRA_OPTS ${${ftype}_device_compile_opts} + TGT_NAME ${tgt_name}) + endforeach() +endforeach() + +# Add device fallback imf libraries for the CUDA target. +# The output files are bitcode. +foreach(arch IN LISTS devicelib_arch) + foreach(dtype IN ITEMS bf16 fp32 fp64) + set(tgt_name imf_fallback_${dtype}_bc_${arch}) + + add_lib_imf(libsycl-fallback-imf-${arch}-${dtype} + ARCH ${arch} + DIR ${bc_binary_dir} + FTYPE bc + DTYPE ${dtype} + EXTRA_OPTS ${bc_device_compile_opts} ${compile_opts_${arch}} + TGT_NAME ${tgt_name}) + + append_to_property( + ${bc_binary_dir}/libsycl-fallback-imf-${arch}-${dtype}.${bc-suffix} + PROPERTY_NAME ${arch}) + endforeach() +endforeach() + +# Create one large bitcode file for the CUDA targets. +# Use all the files collected in the respective global properties. +foreach(arch IN LISTS devicelib_arch) + get_property(BC_DEVICE_LIBS_${arch} GLOBAL PROPERTY BC_DEVICE_LIBS_${arch}) + # Link the bitcode files together. + link_bc(TARGET device_lib_device_${arch} + RSP_DIR ${CMAKE_CURRENT_BINARY_DIR} + INPUTS ${BC_DEVICE_LIBS_${arch}}) + set( builtins_link_lib_${arch} + $) + add_dependencies(libsycldevice-bc device_lib_device_${arch}) + set( builtins_opt_lib_tgt_${arch} builtins_${arch}.opt) + + # Run the optimizer on the resulting bitcode file and call prepare_builtins + # on it, which strips away debug and arch information. + process_bc(devicelib--${arch}.bc + LIB_TGT builtins_${arch}.opt + IN_FILE ${builtins_link_lib_${arch}} + OUT_DIR ${bc_binary_dir} + OPT_FLAGS ${opt_flags_${arch}} + DEPENDENCIES device_lib_device_${arch}) + add_dependencies(libsycldevice-bc prepare-devicelib--${arch}.bc) + set(complete_${arch}_libdev + $) + install( FILES ${complete_${arch}_libdev} + DESTINATION ${install_dest_bc} + COMPONENT libsycldevice) +endforeach() + +# Add host device imf libraries for obj and new offload objects. +foreach(dtype IN ITEMS bf16 fp32 fp64) + foreach(ftype IN ITEMS obj obj-new-offload) + set(tgt_name imf_fallback_${dtype}_host_${ftype}) + + add_lib_imf(fallback-imf-${dtype}-host + DIR ${${ftype}_binary_dir} + FTYPE ${ftype} + DTYPE ${dtype} + EXTRA_OPTS ${${ftype}_host_compile_opts} + TGT_NAME ${tgt_name}) + + set(wrapper_name imf_wrapper.cpp) + if (NOT ("${dtype}" STREQUAL "fp32")) + set(wrapper_name imf_wrapper_${dtype}.cpp) + endif() + add_custom_command( + OUTPUT ${${ftype}_binary_dir}/imf-${dtype}-host.${${ftype}-suffix} + COMMAND ${clang} ${${ftype}_host_compile_opts} + ${CMAKE_CURRENT_SOURCE_DIR}/${wrapper_name} + -o ${${ftype}_binary_dir}/imf-${dtype}-host.${${ftype}-suffix} + MAIN_DEPENDENCY ${CMAKE_CURRENT_SOURCE_DIR}/${wrapper_name} + DEPENDS ${imf_obj_deps} + VERBATIM) + + add_custom_target(imf_${dtype}_host_${ftype} DEPENDS + ${obj_binary_dir}/imf-${dtype}-host.${${ftype}-suffix}) + endforeach() +endforeach() + +foreach(ftype IN ITEMS obj obj-new-offload) + add_custom_target(imf_host_${ftype} + DEPENDS ${${ftype}_binary_dir}/${devicelib_host_static_${ftype}}) + add_custom_command( + OUTPUT ${${ftype}_binary_dir}/${devicelib_host_static_${ftype}} + COMMAND ${llvm-ar} rcs + ${${ftype}_binary_dir}/${devicelib_host_static_${ftype}} + ${${ftype}_binary_dir}/imf-fp32-host.${${ftype}-suffix} + ${${ftype}_binary_dir}/fallback-imf-fp32-host.${${ftype}-suffix} + ${${ftype}_binary_dir}/imf-fp64-host.${${ftype}-suffix} + ${${ftype}_binary_dir}/fallback-imf-fp64-host.${${ftype}-suffix} + ${${ftype}_binary_dir}/imf-bf16-host.${${ftype}-suffix} + ${${ftype}_binary_dir}/fallback-imf-bf16-host.${${ftype}-suffix} + DEPENDS imf_fp32_host_${ftype} imf_fallback_fp32_host_${ftype} + DEPENDS imf_fp64_host_${ftype} imf_fallback_fp64_host_${ftype} + DEPENDS imf_bf16_host_${ftype} imf_fallback_bf16_host_${ftype} + DEPENDS sycl-compiler + VERBATIM) + add_dependencies(libsycldevice-obj imf_host_${ftype}) + + install( FILES ${obj_binary_dir}/${devicelib_host_static_${ftype}} + DESTINATION ${install_dest_obj} + COMPONENT libsycldevice) +endforeach() + +foreach(ftype IN LISTS filetypes) + install( + FILES ${${ftype}_binary_dir}/libsycl-fallback-imf.${${ftype}-suffix} + ${${ftype}_binary_dir}/libsycl-fallback-imf-fp64.${${ftype}-suffix} + ${${ftype}_binary_dir}/libsycl-fallback-imf-bf16.${${ftype}-suffix} + DESTINATION ${install_dest_${ftype}} + COMPONENT libsycldevice) +endforeach() +