diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 0ed2f337477b1..367f6b7f9c895 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -2738,7 +2738,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, ConstWithoutErrnoOrExceptions && ErrnoOverridenToFalseWithOpt; } if (GenerateIntrinsics && - !(getLangOpts().SYCLIsDevice && getTarget().getTriple().isNVPTX())) { + !(getLangOpts().SYCLIsDevice && (getTarget().getTriple().isNVPTX() || + getTarget().getTriple().isAMDGCN()))) { switch (BuiltinIDIfNoAsmLabel) { case Builtin::BIacos: case Builtin::BIacosf: diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index d69f54c563ae6..fc6cf6290f0f3 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -5555,7 +5555,7 @@ class OffloadingActionBuilder final { // AOT compilation. bool SYCLDeviceLibLinked = false; Action *NativeCPULib = nullptr; - if (IsSPIR || IsNVPTX || IsSYCLNativeCPU) { + if (IsSPIR || IsNVPTX || IsAMDGCN || IsSYCLNativeCPU) { bool UseJitLink = IsSPIR && Args.hasFlag(options::OPT_fsycl_device_lib_jit_link, diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index 1f3cad761e4b3..9c48051dd29f4 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -165,9 +165,9 @@ static bool selectBfloatLibs(const llvm::Triple &Triple, const Compilation &C, // spir64 target is actually JIT compilation, so we defer selection of // bfloat16 libraries to runtime. For AOT we need libraries, but skip - // for Nvidia. - NeedLibs = - Triple.getSubArch() != llvm::Triple::NoSubArch && !Triple.isNVPTX(); + // for Nvidia and AMD. + NeedLibs = Triple.getSubArch() != llvm::Triple::NoSubArch && + !Triple.isNVPTX() && !Triple.isAMDGCN(); UseNative = false; if (NeedLibs && Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen && C.hasOffloadToolChain()) { @@ -212,9 +212,9 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, SmallVector LibraryList; const llvm::opt::ArgList &Args = C.getArgs(); - // For NVPTX we only use one single bitcode library and ignore + // For NVPTX and AMDGCN we only use one single bitcode library and ignore // manually specified SYCL device libraries. - bool IgnoreSingleLibs = TargetTriple.isNVPTX(); + bool IgnoreSingleLibs = TargetTriple.isNVPTX() || TargetTriple.isAMDGCN(); struct DeviceLibOptInfo { StringRef DeviceLibName; @@ -278,6 +278,9 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple, if (TargetTriple.isNVPTX() && IgnoreSingleLibs) LibraryList.push_back(Args.MakeArgString("devicelib--cuda.bc")); + if (TargetTriple.isAMDGCN() && IgnoreSingleLibs) + LibraryList.push_back(Args.MakeArgString("devicelib--amd.bc")); + if (IgnoreSingleLibs) return LibraryList; diff --git a/clang/test/CodeGenSYCL/sycl-libdevice-cmath.cpp b/clang/test/CodeGenSYCL/sycl-libdevice-cmath.cpp index 2a2043ac5dc55..5c282449dc851 100644 --- a/clang/test/CodeGenSYCL/sycl-libdevice-cmath.cpp +++ b/clang/test/CodeGenSYCL/sycl-libdevice-cmath.cpp @@ -7,6 +7,8 @@ // RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-cuda -emit-llvm -o - | FileCheck %s // RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-cuda -ffast-math -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 %s -fsycl-is-device -triple amdgcn-amd-amdhsa -emit-llvm -o - | FileCheck %s +// RUN: %clang_cc1 %s -fsycl-is-device -triple amdgcn-amd-amdhsa -ffast-math -emit-llvm -o - | FileCheck %s #include "Inputs/sycl.hpp" diff --git a/clang/test/Driver/Inputs/SYCL/lib/devicelib--amd.bc b/clang/test/Driver/Inputs/SYCL/lib/devicelib--amd.bc new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/clang/test/Driver/sycl-device-lib-amdgcn.cpp b/clang/test/Driver/sycl-device-lib-amdgcn.cpp new file mode 100644 index 0000000000000..134e7835d11d7 --- /dev/null +++ b/clang/test/Driver/sycl-device-lib-amdgcn.cpp @@ -0,0 +1,44 @@ +// Tests specific to `-fsycl-targets=amdgcn-amd-amdhsa` +// Verify that the correct devicelib linking actions are spawned by the driver. +// Check also if the correct warnings are generated. + +// UNSUPPORTED: system-windows + +// Check if internal libraries are still linked against when linkage of all +// device libs is manually excluded. +// RUN: %clangxx -ccc-print-phases -std=c++11 -fsycl -fno-sycl-device-lib=all --sysroot=%S/Inputs/SYCL \ +// RUN: -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906 %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-NO-DEVLIB %s + +// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib--amd.bc", ir, (device-sycl, gfx906) +// CHK-NO-DEVLIB: [[LIB1:[0-9]+]]: input, "{{.*}}libsycl-itt-user-wrappers.bc", ir, (device-sycl, gfx906) +// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib--amd.bc", ir, (device-sycl, gfx906) +// CHK-NO-DEVLIB: [[LIB2:[0-9]+]]: input, "{{.*}}libsycl-itt-compiler-wrappers.bc", ir, (device-sycl, gfx906) +// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib--amd.bc", ir, (device-sycl, gfx906) +// CHK-NO-DEVLIB: [[LIB3:[0-9]+]]: input, "{{.*}}libsycl-itt-stubs.bc", ir, (device-sycl, gfx906) +// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib--amd.bc", ir, (device-sycl, gfx906) +// CHK-NO-DEVLIB: {{[0-9]+}}: linker, {{{.*}}[[LIB1]], [[LIB2]], [[LIB3]]{{.*}}}, ir, (device-sycl, gfx906) + +// Check that the -fsycl-device-lib flag has no effect when "all" is specified. +// RUN: %clangxx -ccc-print-phases -std=c++11 -fsycl -fsycl-device-lib=all --sysroot=%S/Inputs/SYCL \ +// RUN: -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906 %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-ALL %s + +// Check that the -fsycl-device-lib flag has no effect when subsets of libs +// are specified. +// RUN: %clangxx -ccc-print-phases -std=c++11 --sysroot=%S/Inputs/SYCL \ +// RUN: -fsycl -fsycl-device-lib=libc,libm-fp32,libm-fp64,libimf-fp32,libimf-fp64,libimf-bf16,libm-bfloat16 \ +// RUN: -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906 %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-ALL %s + +// Check that -fno-sycl-device-lib is ignored when it does not contain "all". +// A warning should be printed that the flag got ignored. +// RUN: %clangxx -ccc-print-phases -std=c++11 -fsycl --sysroot=%S/Inputs/SYCL \ +// RUN: -fno-sycl-device-lib=libc,libm-fp32,libm-fp64,libimf-fp32,libimf-fp64,libimf-bf16,libm-bfloat16 \ +// RUN: -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906 %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=CHK-UNUSED-WARN,CHK-ALL %s + +// CHK-UNUSED-WARN: warning: argument unused during compilation: '-fno-sycl-device-lib=' +// CHK-ALL: [[DEVLIB:[0-9]+]]: input, "{{.*}}devicelib--amd.bc", ir, (device-sycl, gfx906) +// CHK-ALL: {{[0-9]+}}: linker, {{{.*}}[[DEVLIB]]{{.*}}}, ir, (device-sycl, gfx906) + diff --git a/clang/test/Driver/sycl-offload-amdgcn.cpp b/clang/test/Driver/sycl-offload-amdgcn.cpp index 389cdc641119c..dbbddf1670dba 100644 --- a/clang/test/Driver/sycl-offload-amdgcn.cpp +++ b/clang/test/Driver/sycl-offload-amdgcn.cpp @@ -25,7 +25,7 @@ /// Check phases w/out specifying a compute capability. // RUN: %clangxx -ccc-print-phases -std=c++11 -target x86_64-unknown-linux-gnu -fsycl \ -// RUN: -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906 %s 2>&1 \ +// RUN: -fsycl-targets=amdgcn-amd-amdhsa -fsycl-device-lib=all -Xsycl-target-backend --offload-arch=gfx906 %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-PHASES-NO-CC %s // CHK-PHASES-NO-CC: 0: input, "{{.*}}", c++, (host-sycl) // CHK-PHASES-NO-CC: 1: preprocessor, {0}, c++-cpp-output, (host-sycl) @@ -37,17 +37,19 @@ // CHK-PHASES-NO-CC: 7: backend, {6}, assembler, (host-sycl) // CHK-PHASES-NO-CC: 8: assembler, {7}, object, (host-sycl) // CHK-PHASES-NO-CC: 9: linker, {4}, ir, (device-sycl, gfx906) -// CHK-PHASES-NO-CC: 10: sycl-post-link, {9}, ir, (device-sycl, gfx906) -// CHK-PHASES-NO-CC: 11: file-table-tform, {10}, ir, (device-sycl, gfx906) -// CHK-PHASES-NO-CC: 12: backend, {11}, assembler, (device-sycl, gfx906) -// CHK-PHASES-NO-CC: 13: assembler, {12}, object, (device-sycl, gfx906) -// CHK-PHASES-NO-CC: 14: linker, {13}, image, (device-sycl, gfx906) -// CHK-PHASES-NO-CC: 15: linker, {14}, hip-fatbin, (device-sycl, gfx906) -// CHK-PHASES-NO-CC: 16: foreach, {11, 15}, hip-fatbin, (device-sycl, gfx906) -// CHK-PHASES-NO-CC: 17: file-table-tform, {10, 16}, tempfiletable, (device-sycl, gfx906) -// CHK-PHASES-NO-CC: 18: clang-offload-wrapper, {17}, object, (device-sycl, gfx906) -// CHK-PHASES-NO-CC: 19: offload, "device-sycl (amdgcn-amd-amdhsa:gfx906)" {18}, object -// CHK-PHASES-NO-CC: 20: linker, {8, 19}, image, (host-sycl) +// CHK-PHASES-NO-CC: 10: input, "{{.*}}devicelib--amd.bc", ir, (device-sycl, gfx906) +// CHK-PHASES-NO-CC: 11: linker, {9, 10}, ir, (device-sycl, gfx906) +// CHK-PHASES-NO-CC: 12: sycl-post-link, {11}, ir, (device-sycl, gfx906) +// CHK-PHASES-NO-CC: 13: file-table-tform, {12}, ir, (device-sycl, gfx906) +// CHK-PHASES-NO-CC: 14: backend, {13}, assembler, (device-sycl, gfx906) +// CHK-PHASES-NO-CC: 15: assembler, {14}, object, (device-sycl, gfx906) +// CHK-PHASES-NO-CC: 16: linker, {15}, image, (device-sycl, gfx906) +// CHK-PHASES-NO-CC: 17: linker, {16}, hip-fatbin, (device-sycl, gfx906) +// CHK-PHASES-NO-CC: 18: foreach, {13, 17}, hip-fatbin, (device-sycl, gfx906) +// CHK-PHASES-NO-CC: 19: file-table-tform, {12, 18}, tempfiletable, (device-sycl, gfx906) +// CHK-PHASES-NO-CC: 20: clang-offload-wrapper, {19}, object, (device-sycl, gfx906) +// CHK-PHASES-NO-CC: 21: offload, "device-sycl (amdgcn-amd-amdhsa:gfx906)" {20}, object +// CHK-PHASES-NO-CC: 22: linker, {8, 21}, image, (host-sycl) /// Check that we only unbundle an archive once. // RUN: %clangxx -### -target x86_64-unknown-linux-gnu -fsycl -nogpulib \ diff --git a/libdevice/cmake/modules/SYCLLibdevice.cmake b/libdevice/cmake/modules/SYCLLibdevice.cmake index b502512fdacb4..4574b98ab5f99 100644 --- a/libdevice/cmake/modules/SYCLLibdevice.cmake +++ b/libdevice/cmake/modules/SYCLLibdevice.cmake @@ -65,7 +65,7 @@ foreach(filetype IN LISTS filetypes) add_dependencies(libsycldevice libsycldevice-${filetype}) endforeach() -# For NVPTX each device libary is compiled into a single bitcode +# For NVPTX and AMDGCN each device libary is compiled into a single bitcode # file and all files created this way are linked into one large bitcode # library. # Additional compilation options are needed for compiling each device library. @@ -76,6 +76,13 @@ if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD) "-Xsycl-target-backend" "--cuda-gpu-arch=sm_50" "-nocudalib") set(opt_flags_cuda "-O3" "--nvvm-reflect-enable=false") endif() +if("AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD) + list(APPEND devicelib_arch amd) + set(compile_opts_amd "-nogpulib" "-fsycl-targets=amdgcn-amd-amdhsa" + "-Xsycl-target-backend" "--offload-arch=gfx940") + set(opt_flags_amd "-O3" "--amdgpu-oclc-reflect-enable=false") +endif() + set(spv_device_compile_opts -fsycl-device-only -fsycl-device-obj=spirv) set(bc_device_compile_opts -fsycl-device-only -fsycl-device-obj=llvmir) @@ -444,7 +451,7 @@ foreach(dtype IN ITEMS bf16 fp32 fp64) endforeach() endforeach() -# Add device fallback imf libraries for the CUDA target. +# Add device fallback imf libraries for the NVPTX and AMD targets. # The output files are bitcode. foreach(arch IN LISTS devicelib_arch) foreach(dtype IN ITEMS bf16 fp32 fp64) @@ -464,7 +471,7 @@ foreach(arch IN LISTS devicelib_arch) endforeach() endforeach() -# Create one large bitcode file for the CUDA targets. +# Create one large bitcode file for the CUDA and AMD targets. # Use all the files collected in the respective global properties. foreach(arch IN LISTS devicelib_arch) get_property(BC_DEVICE_LIBS_${arch} GLOBAL PROPERTY BC_DEVICE_LIBS_${arch}) diff --git a/libdevice/cmath_wrapper.cpp b/libdevice/cmath_wrapper.cpp index 64b4c7e4ecc54..27d705d5406b2 100644 --- a/libdevice/cmath_wrapper.cpp +++ b/libdevice/cmath_wrapper.cpp @@ -8,7 +8,8 @@ #include "device_math.h" -#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) +#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \ + defined(__AMDGCN__) DEVICE_EXTERN_C_INLINE int abs(int x) { return __devicelib_abs(x); } @@ -199,4 +200,4 @@ DEVICE_EXTERN_C_INLINE float rintf(float x) { return __nv_rintf(x); } #endif // __NVPTX__ -#endif // __SPIR__ || __SPIRV__ || __NVPTX__ +#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__ diff --git a/libdevice/cmath_wrapper_fp64.cpp b/libdevice/cmath_wrapper_fp64.cpp index b99a0c6fcef50..1dc9c0283dd3b 100644 --- a/libdevice/cmath_wrapper_fp64.cpp +++ b/libdevice/cmath_wrapper_fp64.cpp @@ -9,7 +9,8 @@ #include "device_math.h" -#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) +#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \ + defined(__AMDGCN__) // All exported functions in math and complex device libraries are weak // reference. If users provide their own math or complex functions(with @@ -496,4 +497,4 @@ double _Sinh(double x, double y) { // compute y * sinh(x), |y| <= 1 } } #endif // defined(_WIN32) -#endif // __SPIR__ || __SPIRV__ || __NVPTX__ +#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__ diff --git a/libdevice/crt_wrapper.cpp b/libdevice/crt_wrapper.cpp index e8160013a66df..8978c32d2d5e4 100644 --- a/libdevice/crt_wrapper.cpp +++ b/libdevice/crt_wrapper.cpp @@ -17,7 +17,8 @@ DeviceGlobal RandNext; #endif -#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) +#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \ + defined(__AMDGCN__) DEVICE_EXTERN_C_INLINE void *memcpy(void *dest, const void *src, size_t n) { return __devicelib_memcpy(dest, src, n); @@ -126,4 +127,4 @@ void __assert_fail(const char *expr, const char *file, unsigned int line, __spirv_LocalInvocationId_z()); } #endif -#endif // __SPIR__ || __SPIRV__ || __NVPTX__ +#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__ diff --git a/libdevice/device.h b/libdevice/device.h index 360af54f9b4c4..2ab565f5a939c 100644 --- a/libdevice/device.h +++ b/libdevice/device.h @@ -15,7 +15,8 @@ #define EXTERN_C #endif // __cplusplus -#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) +#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \ + defined(__AMDGCN__) #ifdef __SYCL_DEVICE_ONLY__ #define DEVICE_EXTERNAL SYCL_EXTERNAL __attribute__((weak)) #else // __SYCL_DEVICE_ONLY__ @@ -27,7 +28,7 @@ DEVICE_EXTERNAL EXTERN_C __attribute__((always_inline)) #define DEVICE_EXTERN_C_NOINLINE \ DEVICE_EXTERNAL EXTERN_C __attribute__((noinline)) -#endif // __SPIR__ || __SPIRV__ || __NVPTX__ +#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__ #if defined(__SPIR__) || defined(__SPIRV__) || defined(__LIBDEVICE_HOST_IMPL__) #define __LIBDEVICE_IMF_ENABLED__ diff --git a/libdevice/device_math.h b/libdevice/device_math.h index faed0a2687dea..f4ee1711060c6 100644 --- a/libdevice/device_math.h +++ b/libdevice/device_math.h @@ -10,7 +10,8 @@ #define __LIBDEVICE_DEVICE_MATH_H__ #include "device.h" -#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) +#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \ + defined(__AMDGCN__) #include typedef struct { @@ -367,5 +368,5 @@ float __devicelib_scalbnf(float x, int n); DEVICE_EXTERN_C double __devicelib_scalbn(double x, int exp); -#endif // __SPIR__ || __SPIRV__ || __NVPTX__ +#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__ #endif // __LIBDEVICE_DEVICE_MATH_H__ diff --git a/libdevice/fallback-cassert.cpp b/libdevice/fallback-cassert.cpp index 5d3c99d63c556..5f7bcafa6ecc0 100644 --- a/libdevice/fallback-cassert.cpp +++ b/libdevice/fallback-cassert.cpp @@ -100,7 +100,7 @@ DEVICE_EXTERN_C void __devicelib_assert_fail(const char *expr, const char *file, } #endif // __SPIR__ || __SPIRV__ -#ifdef __NVPTX__ +#if defined(__NVPTX__) || defined(__AMDGCN__) DEVICE_EXTERN_C void __assertfail(const char *__message, const char *__file, unsigned __line, const char *__function, @@ -119,4 +119,4 @@ DEVICE_EXTERN_C void _wassert(const char *_Message, const char *_File, __assertfail(_Message, _File, _Line, 0, 1); } -#endif +#endif // __NVPTX__ || __AMDGCN__ diff --git a/libdevice/fallback-cmath-fp64.cpp b/libdevice/fallback-cmath-fp64.cpp index 8affe7aa86cf6..d7e4364e2595d 100644 --- a/libdevice/fallback-cmath-fp64.cpp +++ b/libdevice/fallback-cmath-fp64.cpp @@ -9,7 +9,8 @@ #include "device_math.h" -#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) +#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \ + defined(__AMDGCN__) // To support fallback device libraries on-demand loading, please update the // DeviceLibFuncMap in llvm/tools/sycl-post-link/sycl-post-link.cpp if you add @@ -193,4 +194,4 @@ DEVICE_EXTERN_C_INLINE double __devicelib_scalbn(double x, int exp) { return __spirv_ocl_ldexp(x, exp); } -#endif // __SPIR__ || __SPIRV__ || __NVPTX__ +#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__ diff --git a/libdevice/fallback-cmath.cpp b/libdevice/fallback-cmath.cpp index 39ff2e2252f05..37b2250b4b6aa 100644 --- a/libdevice/fallback-cmath.cpp +++ b/libdevice/fallback-cmath.cpp @@ -8,7 +8,8 @@ #include "device_math.h" -#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) +#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \ + defined(__AMDGCN__) // To support fallback device libraries on-demand loading, please update the // DeviceLibFuncMap in llvm/tools/sycl-post-link/sycl-post-link.cpp if you add @@ -209,4 +210,4 @@ float __devicelib_asinhf(float x) { return __spirv_ocl_asinh(x); } DEVICE_EXTERN_C_INLINE float __devicelib_atanhf(float x) { return __spirv_ocl_atanh(x); } -#endif // __SPIR__ || __SPIRV__ || __NVPTX__ +#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__ diff --git a/libdevice/fallback-cstring.cpp b/libdevice/fallback-cstring.cpp index 5d384f00a78cb..e63e83052dfeb 100644 --- a/libdevice/fallback-cstring.cpp +++ b/libdevice/fallback-cstring.cpp @@ -9,7 +9,8 @@ #include "wrapper.h" #include -#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) +#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \ + defined(__AMDGCN__) static void *__devicelib_memcpy_uint8_aligned(void *dest, const void *src, size_t n) { @@ -202,4 +203,4 @@ int __devicelib_memcmp(const void *s1, const void *s2, size_t n) { return head_cmp; } -#endif // __SPIR__ || __SPIRV__ || __NVPTX__ +#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN_ diff --git a/libdevice/imf/imf_fp32_dl.cpp b/libdevice/imf/imf_fp32_dl.cpp index f08ee1d305999..eff8c2ac7472d 100644 --- a/libdevice/imf/imf_fp32_dl.cpp +++ b/libdevice/imf/imf_fp32_dl.cpp @@ -11,9 +11,12 @@ /// overhead in these deep learning frameworks. //===----------------------------------------------------------------------===// -#include "../device_imf.hpp" +#include "../device.h" + #ifdef __LIBDEVICE_IMF_ENABLED__ +#include "../device_imf.hpp" + DEVICE_EXTERN_C_INLINE int32_t __devicelib_imf_abs(int32_t x) { return (x >= 0) ? x : -x; } diff --git a/libdevice/imf/imf_fp64_dl.cpp b/libdevice/imf/imf_fp64_dl.cpp index 37fbd906f71eb..d9382bc2ddc21 100644 --- a/libdevice/imf/imf_fp64_dl.cpp +++ b/libdevice/imf/imf_fp64_dl.cpp @@ -11,9 +11,12 @@ /// overhead in these deep learning frameworks. //===----------------------------------------------------------------------===// -#include "../device_imf.hpp" +#include "../device.h" + #ifdef __LIBDEVICE_IMF_ENABLED__ +#include "../device_imf.hpp" + DEVICE_EXTERN_C_INLINE double __devicelib_imf_fabs(double x) { return __fabs(x); } diff --git a/libdevice/imf/imf_inline_bf16.cpp b/libdevice/imf/imf_inline_bf16.cpp index c7165a1ee0183..96335de774fd0 100644 --- a/libdevice/imf/imf_inline_bf16.cpp +++ b/libdevice/imf/imf_inline_bf16.cpp @@ -5,9 +5,12 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -#include "../device_imf.hpp" +#include "../device.h" #ifdef __LIBDEVICE_IMF_ENABLED__ + +#include "../device_imf.hpp" + DEVICE_EXTERN_C_INLINE _iml_bf16_internal __devicelib_imf_fmabf16(_iml_bf16_internal a, _iml_bf16_internal b, diff --git a/libdevice/imf/imf_inline_fp32.cpp b/libdevice/imf/imf_inline_fp32.cpp index e71499f8fe057..44061ec40ab45 100644 --- a/libdevice/imf/imf_inline_fp32.cpp +++ b/libdevice/imf/imf_inline_fp32.cpp @@ -5,9 +5,13 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -#include "../device_imf.hpp" + +#include "../device.h" + #ifdef __LIBDEVICE_IMF_ENABLED__ +#include "../device_imf.hpp" + DEVICE_EXTERN_C_INLINE _iml_half_internal __devicelib_imf_fmaf16( _iml_half_internal a, _iml_half_internal b, _iml_half_internal c) { _iml_half ha(a), hb(b), hc(c); diff --git a/libdevice/imf/imf_inline_fp64.cpp b/libdevice/imf/imf_inline_fp64.cpp index f8d5418513f11..24c016c49344c 100644 --- a/libdevice/imf/imf_inline_fp64.cpp +++ b/libdevice/imf/imf_inline_fp64.cpp @@ -5,9 +5,13 @@ // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// -#include "../device_imf.hpp" + +#include "../device.h" + #ifdef __LIBDEVICE_IMF_ENABLED__ +#include "../device_imf.hpp" + DEVICE_EXTERN_C_INLINE double __devicelib_imf_fma(double a, double b, double c) { return __fma(a, b, c); diff --git a/libdevice/imf_utils/bfloat16_convert.cpp b/libdevice/imf_utils/bfloat16_convert.cpp index 750cb4e5877b4..1591c90768939 100644 --- a/libdevice/imf_utils/bfloat16_convert.cpp +++ b/libdevice/imf_utils/bfloat16_convert.cpp @@ -7,9 +7,12 @@ // //===----------------------------------------------------------------------===// -#include "../device_imf.hpp" +#include "../device.h" #ifdef __LIBDEVICE_IMF_ENABLED__ + +#include "../device_imf.hpp" + DEVICE_EXTERN_C_INLINE float __devicelib_imf_bfloat162float(_iml_bf16_internal b) { return __bfloat162float(b); diff --git a/libdevice/imf_utils/double_convert.cpp b/libdevice/imf_utils/double_convert.cpp index c4cd6dea07bf1..3c80dfe3ae769 100644 --- a/libdevice/imf_utils/double_convert.cpp +++ b/libdevice/imf_utils/double_convert.cpp @@ -7,10 +7,12 @@ // //===----------------------------------------------------------------------===// -#include "../device_imf.hpp" +#include "../device.h" #ifdef __LIBDEVICE_IMF_ENABLED__ +#include "../device_imf.hpp" + static inline float __double2float_rd(double x) { #if defined(__LIBDEVICE_HOST_IMPL__) return __double2Tp_host(x, FE_DOWNWARD); diff --git a/libdevice/imf_utils/float_convert.cpp b/libdevice/imf_utils/float_convert.cpp index 85299c0f33823..299ea5c25f96f 100644 --- a/libdevice/imf_utils/float_convert.cpp +++ b/libdevice/imf_utils/float_convert.cpp @@ -7,10 +7,12 @@ // //===----------------------------------------------------------------------===// -#include "../device_imf.hpp" +#include "../device.h" #ifdef __LIBDEVICE_IMF_ENABLED__ +#include "../device_imf.hpp" + static inline int __float2int_rd(float x) { #if defined(__LIBDEVICE_HOST_IMPL__) return __float2Tp_host(x, FE_DOWNWARD); diff --git a/libdevice/imf_utils/fp32_round.cpp b/libdevice/imf_utils/fp32_round.cpp index 32548b1ccf912..973371feca0d9 100644 --- a/libdevice/imf_utils/fp32_round.cpp +++ b/libdevice/imf_utils/fp32_round.cpp @@ -6,10 +6,13 @@ // //===----------------------------------------------------------------------===// +#include "../device.h" + +#ifdef __LIBDEVICE_IMF_ENABLED__ + #include "../device_imf.hpp" #include "../imf_rounding_op.hpp" -#ifdef __LIBDEVICE_IMF_ENABLED__ DEVICE_EXTERN_C_INLINE float __devicelib_imf_fadd_rd(float x, float y) { return __fp_add_sub_entry(x, y, __IML_RTN); diff --git a/libdevice/imf_utils/fp64_round.cpp b/libdevice/imf_utils/fp64_round.cpp index aa4de27a669e1..2f88265a1103a 100644 --- a/libdevice/imf_utils/fp64_round.cpp +++ b/libdevice/imf_utils/fp64_round.cpp @@ -6,10 +6,13 @@ // //===----------------------------------------------------------------------===// +#include "../device.h" + +#ifdef __LIBDEVICE_IMF_ENABLED__ + #include "../device_imf.hpp" #include "../imf_rounding_op.hpp" -#ifdef __LIBDEVICE_IMF_ENABLED__ DEVICE_EXTERN_C_INLINE double __devicelib_imf_dadd_rd(double x, double y) { return __fp_add_sub_entry(x, y, __IML_RTN); diff --git a/libdevice/imf_utils/half_convert.cpp b/libdevice/imf_utils/half_convert.cpp index 3e23d3a46f01e..e16b9ec699f65 100644 --- a/libdevice/imf_utils/half_convert.cpp +++ b/libdevice/imf_utils/half_convert.cpp @@ -7,10 +7,12 @@ // //===----------------------------------------------------------------------===// -#include "../device_imf.hpp" +#include "../device.h" #ifdef __LIBDEVICE_IMF_ENABLED__ +#include "../device_imf.hpp" + DEVICE_EXTERN_C_INLINE float __devicelib_imf_half2float(_iml_half_internal x) { return __half2float(x); diff --git a/libdevice/imf_utils/integer_misc.cpp b/libdevice/imf_utils/integer_misc.cpp index fdc850ee42281..06642eec7d267 100644 --- a/libdevice/imf_utils/integer_misc.cpp +++ b/libdevice/imf_utils/integer_misc.cpp @@ -7,9 +7,12 @@ // //===----------------------------------------------------------------------===// -#include "../device_imf.hpp" +#include "../device.h" + #ifdef __LIBDEVICE_IMF_ENABLED__ +#include "../device_imf.hpp" + DEVICE_EXTERN_C_INLINE unsigned int __devicelib_imf_brev(unsigned int x) { unsigned int res = 0; diff --git a/libdevice/imf_utils/simd_emulate.cpp b/libdevice/imf_utils/simd_emulate.cpp index 7369a1598aacb..a8ac73f42ab8b 100644 --- a/libdevice/imf_utils/simd_emulate.cpp +++ b/libdevice/imf_utils/simd_emulate.cpp @@ -7,9 +7,12 @@ // //===----------------------------------------------------------------------===// -#include "../device_imf.hpp" +#include "../device.h" + #ifdef __LIBDEVICE_IMF_ENABLED__ +#include "../device_imf.hpp" + template struct __twice_size; template using __twice_size_t = typename __twice_size::type; template struct __twice_size_tag { diff --git a/libdevice/imf_wrapper.cpp b/libdevice/imf_wrapper.cpp index 336725cad5f63..be630bccbf579 100644 --- a/libdevice/imf_wrapper.cpp +++ b/libdevice/imf_wrapper.cpp @@ -6,10 +6,12 @@ // //===----------------------------------------------------------------------===// -#include "device_imf.hpp" +#include "device.h" #ifdef __LIBDEVICE_IMF_ENABLED__ +#include "device_imf.hpp" + DEVICE_EXTERN_C_INLINE float __devicelib_imf_saturatef(float); diff --git a/libdevice/imf_wrapper_bf16.cpp b/libdevice/imf_wrapper_bf16.cpp index d02903b0a720f..0c72d95bccc63 100644 --- a/libdevice/imf_wrapper_bf16.cpp +++ b/libdevice/imf_wrapper_bf16.cpp @@ -7,10 +7,12 @@ // //===----------------------------------------------------------------------===// -#include "imf_bf16.hpp" +#include "device.h" #ifdef __LIBDEVICE_IMF_ENABLED__ +#include "imf_bf16.hpp" + DEVICE_EXTERN_C_INLINE float __devicelib_imf_bfloat162float(_iml_bf16_internal); diff --git a/libdevice/imf_wrapper_fp64.cpp b/libdevice/imf_wrapper_fp64.cpp index 10cf98e844774..e90979d2bb724 100644 --- a/libdevice/imf_wrapper_fp64.cpp +++ b/libdevice/imf_wrapper_fp64.cpp @@ -7,10 +7,12 @@ // //===----------------------------------------------------------------------===// -#include "device_imf.hpp" +#include "device.h" #ifdef __LIBDEVICE_IMF_ENABLED__ +#include "device_imf.hpp" + DEVICE_EXTERN_C_INLINE float __devicelib_imf_double2float_rd(double); diff --git a/libdevice/spirv_vars.h b/libdevice/spirv_vars.h index 4445520bff741..869c343206994 100644 --- a/libdevice/spirv_vars.h +++ b/libdevice/spirv_vars.h @@ -11,7 +11,8 @@ #include "device.h" -#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) +#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \ + defined(__AMDGCN__) #include #include @@ -58,5 +59,5 @@ const size_t_vec __spirv_BuiltInGlobalInvocationId{}; const size_t_vec __spirv_BuiltInLocalInvocationId{}; #endif // !__SPIR__ && !__SPIRV__ -#endif // __SPIR__ || __SPIRV__ || __NVPTX__ +#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__ #endif // __LIBDEVICE_SPIRV_VARS_H diff --git a/libdevice/wrapper.h b/libdevice/wrapper.h index bbc0cd4f1ca87..45555785fd2fb 100644 --- a/libdevice/wrapper.h +++ b/libdevice/wrapper.h @@ -11,7 +11,8 @@ #include "device.h" -#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) +#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \ + defined(__AMDGCN__) #include #include @@ -29,5 +30,5 @@ void __devicelib_assert_fail(const char *expr, const char *file, int32_t line, const char *func, uint64_t gid0, uint64_t gid1, uint64_t gid2, uint64_t lid0, uint64_t lid1, uint64_t lid2); -#endif // __SPIR__ || __SPIRV__ || __NVPTX__ +#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__ #endif // __LIBDEVICE_WRAPPER_H__ diff --git a/sycl/test-e2e/DeviceLib/assert.cpp b/sycl/test-e2e/DeviceLib/assert.cpp index 6edabb07a34b3..ad5a015020f9b 100644 --- a/sycl/test-e2e/DeviceLib/assert.cpp +++ b/sycl/test-e2e/DeviceLib/assert.cpp @@ -1,4 +1,4 @@ -// REQUIRES: (cpu || cuda ) && linux +// REQUIRES: (cpu || cuda || hip ) && linux // RUN: %{build} -DSYCL_FALLBACK_ASSERT=1 -o %t.out // (see the other RUN lines below; it is a bit complicated) // diff --git a/sycl/test-e2e/DeviceLib/cmath_test.cpp b/sycl/test-e2e/DeviceLib/cmath_test.cpp index 3d5786e97fe2d..2fe760fff2a34 100644 --- a/sycl/test-e2e/DeviceLib/cmath_test.cpp +++ b/sycl/test-e2e/DeviceLib/cmath_test.cpp @@ -1,6 +1,5 @@ // DEFINE: %{mathflags} = %if cl_options %{/clang:-fno-fast-math%} %else %{-fno-fast-math%} -// UNSUPPORTED: hip // RUN: %{build} -fno-builtin %{mathflags} -o %t.out // RUN: %{run} %t.out diff --git a/sycl/test-e2e/DeviceLib/math_fp64_test.cpp b/sycl/test-e2e/DeviceLib/math_fp64_test.cpp index 94b91255a5f1b..235029d766228 100644 --- a/sycl/test-e2e/DeviceLib/math_fp64_test.cpp +++ b/sycl/test-e2e/DeviceLib/math_fp64_test.cpp @@ -1,12 +1,11 @@ // REQUIRES: aspect-fp64 -// UNSUPPORTED: hip // DEFINE: %{mathflags} = %if cl_options %{/clang:-fno-fast-math%} %else %{-fno-fast-math%} // RUN: %{build} %{mathflags} -o %t.out // RUN: %{run} %t.out -// RUN: %clangxx -fsycl -fsycl-device-lib-jit-link %{mathflags} %s -o %t.out +// RUN: %clangxx -Wno-error=unused-command-line-argument -fsycl -fsycl-device-lib-jit-link %{mathflags} %s -o %t.out // RUN: %if !gpu %{ %{run} %t.out %} #include "math_utils.hpp" diff --git a/sycl/test-e2e/DeviceLib/math_test.cpp b/sycl/test-e2e/DeviceLib/math_test.cpp index 0380234575061..aeda8550294da 100644 --- a/sycl/test-e2e/DeviceLib/math_test.cpp +++ b/sycl/test-e2e/DeviceLib/math_test.cpp @@ -1,5 +1,3 @@ -// UNSUPPORTED: hip - // DEFINE: %{mathflags} = %if cl_options %{/clang:-fno-fast-math%} %else %{-fno-fast-math%} // RUN: %{build} %{mathflags} -o %t.out diff --git a/sycl/test-e2e/DeviceLib/string_test.cpp b/sycl/test-e2e/DeviceLib/string_test.cpp index 0d71417b54bce..6b61e0a51a043 100644 --- a/sycl/test-e2e/DeviceLib/string_test.cpp +++ b/sycl/test-e2e/DeviceLib/string_test.cpp @@ -1,4 +1,3 @@ -// UNSUPPORTED: hip // RUN: %{build} -Wno-error=deprecated-declarations -Wno-error=pointer-to-int-cast -fno-builtin -o %t.out // RUN: %{run} %t.out //