Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion clang/lib/CodeGen/CGBuiltin.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2738,7 +2738,8 @@ RValue CodeGenFunction::EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID,
ConstWithoutErrnoOrExceptions && ErrnoOverridenToFalseWithOpt;
}
if (GenerateIntrinsics &&
!(getLangOpts().SYCLIsDevice && getTarget().getTriple().isNVPTX())) {
!(getLangOpts().SYCLIsDevice && (getTarget().getTriple().isNVPTX() ||
getTarget().getTriple().isAMDGCN()))) {
switch (BuiltinIDIfNoAsmLabel) {
case Builtin::BIacos:
case Builtin::BIacosf:
Expand Down
2 changes: 1 addition & 1 deletion clang/lib/Driver/Driver.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5555,7 +5555,7 @@ class OffloadingActionBuilder final {
// AOT compilation.
bool SYCLDeviceLibLinked = false;
Action *NativeCPULib = nullptr;
if (IsSPIR || IsNVPTX || IsSYCLNativeCPU) {
if (IsSPIR || IsNVPTX || IsAMDGCN || IsSYCLNativeCPU) {
bool UseJitLink =
IsSPIR &&
Args.hasFlag(options::OPT_fsycl_device_lib_jit_link,
Expand Down
13 changes: 8 additions & 5 deletions clang/lib/Driver/ToolChains/SYCL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -165,9 +165,9 @@ static bool selectBfloatLibs(const llvm::Triple &Triple, const Compilation &C,

// spir64 target is actually JIT compilation, so we defer selection of
// bfloat16 libraries to runtime. For AOT we need libraries, but skip
// for Nvidia.
NeedLibs =
Triple.getSubArch() != llvm::Triple::NoSubArch && !Triple.isNVPTX();
// for Nvidia and AMD.
NeedLibs = Triple.getSubArch() != llvm::Triple::NoSubArch &&
!Triple.isNVPTX() && !Triple.isAMDGCN();
UseNative = false;
if (NeedLibs && Triple.getSubArch() == llvm::Triple::SPIRSubArch_gen &&
C.hasOffloadToolChain<Action::OFK_SYCL>()) {
Expand Down Expand Up @@ -212,9 +212,9 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple,
SmallVector<std::string, 8> LibraryList;
const llvm::opt::ArgList &Args = C.getArgs();

// For NVPTX we only use one single bitcode library and ignore
// For NVPTX and AMDGCN we only use one single bitcode library and ignore
// manually specified SYCL device libraries.
bool IgnoreSingleLibs = TargetTriple.isNVPTX();
bool IgnoreSingleLibs = TargetTriple.isNVPTX() || TargetTriple.isAMDGCN();

struct DeviceLibOptInfo {
StringRef DeviceLibName;
Expand Down Expand Up @@ -278,6 +278,9 @@ SYCL::getDeviceLibraries(const Compilation &C, const llvm::Triple &TargetTriple,
if (TargetTriple.isNVPTX() && IgnoreSingleLibs)
LibraryList.push_back(Args.MakeArgString("devicelib--cuda.bc"));

if (TargetTriple.isAMDGCN() && IgnoreSingleLibs)
LibraryList.push_back(Args.MakeArgString("devicelib--amd.bc"));

if (IgnoreSingleLibs)
return LibraryList;

Expand Down
2 changes: 2 additions & 0 deletions clang/test/CodeGenSYCL/sycl-libdevice-cmath.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

// RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-cuda -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-cuda -ffast-math -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 %s -fsycl-is-device -triple amdgcn-amd-amdhsa -emit-llvm -o - | FileCheck %s
// RUN: %clang_cc1 %s -fsycl-is-device -triple amdgcn-amd-amdhsa -ffast-math -emit-llvm -o - | FileCheck %s

#include "Inputs/sycl.hpp"

Expand Down
45 changes: 45 additions & 0 deletions clang/test/Driver/sycl-device-lib-amdgcn.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
// Tests specific to `-fsycl-targets=amdgcn-amd-amdhsa`
// Verify that the correct devicelib linking actions are spawned by the driver.
// Check also if the correct warnings are generated.

// UNSUPPORTED: system-windows
// REQUIRES: amdgpu-registered-target

// Check if internal libraries are still linked against when linkage of all
// device libs is manually excluded.
// RUN: %clangxx -ccc-print-phases -std=c++11 -fsycl -fno-sycl-device-lib=all --sysroot=%S/Inputs/SYCL \
// RUN: -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906 %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-NO-DEVLIB %s

// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib--amd.bc", ir, (device-sycl, gfx906)
// CHK-NO-DEVLIB: [[LIB1:[0-9]+]]: input, "{{.*}}libsycl-itt-user-wrappers.bc", ir, (device-sycl, gfx906)
// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib--amd.bc", ir, (device-sycl, gfx906)
// CHK-NO-DEVLIB: [[LIB2:[0-9]+]]: input, "{{.*}}libsycl-itt-compiler-wrappers.bc", ir, (device-sycl, gfx906)
// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib--amd.bc", ir, (device-sycl, gfx906)
// CHK-NO-DEVLIB: [[LIB3:[0-9]+]]: input, "{{.*}}libsycl-itt-stubs.bc", ir, (device-sycl, gfx906)
// CHK-NO-DEVLIB-NOT: {{[0-9]+}}: input, "{{.*}}devicelib--amd.bc", ir, (device-sycl, gfx906)
// CHK-NO-DEVLIB: {{[0-9]+}}: linker, {{{.*}}[[LIB1]], [[LIB2]], [[LIB3]]{{.*}}}, ir, (device-sycl, gfx906)

// Check that the -fsycl-device-lib flag has no effect when "all" is specified.
// RUN: %clangxx -ccc-print-phases -std=c++11 -fsycl -fsycl-device-lib=all --sysroot=%S/Inputs/SYCL \
// RUN: -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906 %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-ALL %s

// Check that the -fsycl-device-lib flag has no effect when subsets of libs
// are specified.
// RUN: %clangxx -ccc-print-phases -std=c++11 --sysroot=%S/Inputs/SYCL \
// RUN: -fsycl -fsycl-device-lib=libc,libm-fp32,libm-fp64,libimf-fp32,libimf-fp64,libimf-bf16,libm-bfloat16 \
// RUN: -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906 %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-ALL %s

// Check that -fno-sycl-device-lib is ignored when it does not contain "all".
// A warning should be printed that the flag got ignored.
// RUN: %clangxx -ccc-print-phases -std=c++11 -fsycl --sysroot=%S/Inputs/SYCL \
// RUN: -fno-sycl-device-lib=libc,libm-fp32,libm-fp64,libimf-fp32,libimf-fp64,libimf-bf16,libm-bfloat16 \
// RUN: -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906 %s 2>&1 \
// RUN: | FileCheck -check-prefixes=CHK-UNUSED-WARN,CHK-ALL %s

// CHK-UNUSED-WARN: warning: argument unused during compilation: '-fno-sycl-device-lib='
// CHK-ALL: [[DEVLIB:[0-9]+]]: input, "{{.*}}devicelib--amd.bc", ir, (device-sycl, gfx906)
// CHK-ALL: {{[0-9]+}}: linker, {{{.*}}[[DEVLIB]]{{.*}}}, ir, (device-sycl, gfx906)

26 changes: 14 additions & 12 deletions clang/test/Driver/sycl-offload-amdgcn.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

/// Check phases w/out specifying a compute capability.
// RUN: %clangxx -ccc-print-phases -std=c++11 -target x86_64-unknown-linux-gnu -fsycl \
// RUN: -fsycl-targets=amdgcn-amd-amdhsa -Xsycl-target-backend --offload-arch=gfx906 %s 2>&1 \
// RUN: -fsycl-targets=amdgcn-amd-amdhsa -fsycl-device-lib=all -Xsycl-target-backend --offload-arch=gfx906 %s 2>&1 \
// RUN: | FileCheck -check-prefix=CHK-PHASES-NO-CC %s
// CHK-PHASES-NO-CC: 0: input, "{{.*}}", c++, (host-sycl)
// CHK-PHASES-NO-CC: 1: preprocessor, {0}, c++-cpp-output, (host-sycl)
Expand All @@ -37,17 +37,19 @@
// CHK-PHASES-NO-CC: 7: backend, {6}, assembler, (host-sycl)
// CHK-PHASES-NO-CC: 8: assembler, {7}, object, (host-sycl)
// CHK-PHASES-NO-CC: 9: linker, {4}, ir, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 10: sycl-post-link, {9}, ir, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 11: file-table-tform, {10}, ir, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 12: backend, {11}, assembler, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 13: assembler, {12}, object, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 14: linker, {13}, image, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 15: linker, {14}, hip-fatbin, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 16: foreach, {11, 15}, hip-fatbin, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 17: file-table-tform, {10, 16}, tempfiletable, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 18: clang-offload-wrapper, {17}, object, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 19: offload, "device-sycl (amdgcn-amd-amdhsa:gfx906)" {18}, object
// CHK-PHASES-NO-CC: 20: linker, {8, 19}, image, (host-sycl)
// CHK-PHASES-NO-CC: 10: input, "{{.*}}devicelib--amd.bc", ir, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 11: linker, {9, 10}, ir, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 12: sycl-post-link, {11}, ir, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 13: file-table-tform, {12}, ir, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 14: backend, {13}, assembler, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 15: assembler, {14}, object, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 16: linker, {15}, image, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 17: linker, {16}, hip-fatbin, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 18: foreach, {13, 17}, hip-fatbin, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 19: file-table-tform, {12, 18}, tempfiletable, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 20: clang-offload-wrapper, {19}, object, (device-sycl, gfx906)
// CHK-PHASES-NO-CC: 21: offload, "device-sycl (amdgcn-amd-amdhsa:gfx906)" {20}, object
// CHK-PHASES-NO-CC: 22: linker, {8, 21}, image, (host-sycl)

/// Check that we only unbundle an archive once.
// RUN: %clangxx -### -target x86_64-unknown-linux-gnu -fsycl -nogpulib \
Expand Down
13 changes: 10 additions & 3 deletions libdevice/cmake/modules/SYCLLibdevice.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ foreach(filetype IN LISTS filetypes)
add_dependencies(libsycldevice libsycldevice-${filetype})
endforeach()

# For NVPTX each device libary is compiled into a single bitcode
# For NVPTX and AMDGCN each device libary is compiled into a single bitcode
# file and all files created this way are linked into one large bitcode
# library.
# Additional compilation options are needed for compiling each device library.
Expand All @@ -76,6 +76,13 @@ if ("NVPTX" IN_LIST LLVM_TARGETS_TO_BUILD)
"-Xsycl-target-backend" "--cuda-gpu-arch=sm_50" "-nocudalib")
set(opt_flags_cuda "-O3" "--nvvm-reflect-enable=false")
endif()
if("AMDGPU" IN_LIST LLVM_TARGETS_TO_BUILD)
list(APPEND devicelib_arch amd)
set(compile_opts_amd "-nogpulib" "-fsycl-targets=amdgcn-amd-amdhsa"
"-Xsycl-target-backend" "--offload-arch=gfx940")
set(opt_flags_amd "-O3" "--amdgpu-oclc-reflect-enable=false")
endif()


set(spv_device_compile_opts -fsycl-device-only -fsycl-device-obj=spirv)
set(bc_device_compile_opts -fsycl-device-only -fsycl-device-obj=llvmir)
Expand Down Expand Up @@ -444,7 +451,7 @@ foreach(dtype IN ITEMS bf16 fp32 fp64)
endforeach()
endforeach()

# Add device fallback imf libraries for the CUDA target.
# Add device fallback imf libraries for the NVPTX and AMD targets.
# The output files are bitcode.
foreach(arch IN LISTS devicelib_arch)
foreach(dtype IN ITEMS bf16 fp32 fp64)
Expand All @@ -464,7 +471,7 @@ foreach(arch IN LISTS devicelib_arch)
endforeach()
endforeach()

# Create one large bitcode file for the CUDA targets.
# Create one large bitcode file for the CUDA and AMD targets.
# Use all the files collected in the respective global properties.
foreach(arch IN LISTS devicelib_arch)
get_property(BC_DEVICE_LIBS_${arch} GLOBAL PROPERTY BC_DEVICE_LIBS_${arch})
Expand Down
5 changes: 3 additions & 2 deletions libdevice/cmath_wrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@

#include "device_math.h"

#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__)
#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \
defined(__AMDGCN__)

DEVICE_EXTERN_C_INLINE
int abs(int x) { return __devicelib_abs(x); }
Expand Down Expand Up @@ -199,4 +200,4 @@ DEVICE_EXTERN_C_INLINE
float rintf(float x) { return __nv_rintf(x); }
#endif // __NVPTX__

#endif // __SPIR__ || __SPIRV__ || __NVPTX__
#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__
5 changes: 3 additions & 2 deletions libdevice/cmath_wrapper_fp64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@

#include "device_math.h"

#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__)
#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \
defined(__AMDGCN__)

// All exported functions in math and complex device libraries are weak
// reference. If users provide their own math or complex functions(with
Expand Down Expand Up @@ -496,4 +497,4 @@ double _Sinh(double x, double y) { // compute y * sinh(x), |y| <= 1
}
}
#endif // defined(_WIN32)
#endif // __SPIR__ || __SPIRV__ || __NVPTX__
#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__
5 changes: 3 additions & 2 deletions libdevice/crt_wrapper.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,8 @@
DeviceGlobal<uint64_t[RAND_NEXT_LEN]> RandNext;
#endif

#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__)
#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \
defined(__AMDGCN__)
DEVICE_EXTERN_C_INLINE
void *memcpy(void *dest, const void *src, size_t n) {
return __devicelib_memcpy(dest, src, n);
Expand Down Expand Up @@ -126,4 +127,4 @@ void __assert_fail(const char *expr, const char *file, unsigned int line,
__spirv_LocalInvocationId_z());
}
#endif
#endif // __SPIR__ || __SPIRV__ || __NVPTX__
#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__
5 changes: 3 additions & 2 deletions libdevice/device.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,8 @@
#define EXTERN_C
#endif // __cplusplus

#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__)
#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \
defined(__AMDGCN__)
#ifdef __SYCL_DEVICE_ONLY__
#define DEVICE_EXTERNAL SYCL_EXTERNAL __attribute__((weak))
#else // __SYCL_DEVICE_ONLY__
Expand All @@ -27,7 +28,7 @@
DEVICE_EXTERNAL EXTERN_C __attribute__((always_inline))
#define DEVICE_EXTERN_C_NOINLINE \
DEVICE_EXTERNAL EXTERN_C __attribute__((noinline))
#endif // __SPIR__ || __SPIRV__ || __NVPTX__
#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__

#if defined(__SPIR__) || defined(__SPIRV__) || defined(__LIBDEVICE_HOST_IMPL__)
#define __LIBDEVICE_IMF_ENABLED__
Expand Down
5 changes: 3 additions & 2 deletions libdevice/device_math.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
#define __LIBDEVICE_DEVICE_MATH_H__

#include "device.h"
#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__)
#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \
defined(__AMDGCN__)
#include <cstdint>

typedef struct {
Expand Down Expand Up @@ -367,5 +368,5 @@ float __devicelib_scalbnf(float x, int n);
DEVICE_EXTERN_C
double __devicelib_scalbn(double x, int exp);

#endif // __SPIR__ || __SPIRV__ || __NVPTX__
#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__
#endif // __LIBDEVICE_DEVICE_MATH_H__
4 changes: 2 additions & 2 deletions libdevice/fallback-cassert.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ DEVICE_EXTERN_C void __devicelib_assert_fail(const char *expr, const char *file,
}
#endif // __SPIR__ || __SPIRV__

#ifdef __NVPTX__
#if defined(__NVPTX__) || defined(__AMDGCN__)

DEVICE_EXTERN_C void __assertfail(const char *__message, const char *__file,
unsigned __line, const char *__function,
Expand All @@ -119,4 +119,4 @@ DEVICE_EXTERN_C void _wassert(const char *_Message, const char *_File,
__assertfail(_Message, _File, _Line, 0, 1);
}

#endif
#endif // __NVPTX__ || __AMDGCN__
5 changes: 3 additions & 2 deletions libdevice/fallback-cmath-fp64.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@

#include "device_math.h"

#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__)
#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \
defined(__AMDGCN__)

// To support fallback device libraries on-demand loading, please update the
// DeviceLibFuncMap in llvm/tools/sycl-post-link/sycl-post-link.cpp if you add
Expand Down Expand Up @@ -193,4 +194,4 @@ DEVICE_EXTERN_C_INLINE
double __devicelib_scalbn(double x, int exp) {
return __spirv_ocl_ldexp(x, exp);
}
#endif // __SPIR__ || __SPIRV__ || __NVPTX__
#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__
5 changes: 3 additions & 2 deletions libdevice/fallback-cmath.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@

#include "device_math.h"

#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__)
#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \
defined(__AMDGCN__)

// To support fallback device libraries on-demand loading, please update the
// DeviceLibFuncMap in llvm/tools/sycl-post-link/sycl-post-link.cpp if you add
Expand Down Expand Up @@ -209,4 +210,4 @@ float __devicelib_asinhf(float x) { return __spirv_ocl_asinh(x); }
DEVICE_EXTERN_C_INLINE
float __devicelib_atanhf(float x) { return __spirv_ocl_atanh(x); }

#endif // __SPIR__ || __SPIRV__ || __NVPTX__
#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN__
5 changes: 3 additions & 2 deletions libdevice/fallback-cstring.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
#include "wrapper.h"
#include <cstdint>

#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__)
#if defined(__SPIR__) || defined(__SPIRV__) || defined(__NVPTX__) || \
defined(__AMDGCN__)

static void *__devicelib_memcpy_uint8_aligned(void *dest, const void *src,
size_t n) {
Expand Down Expand Up @@ -202,4 +203,4 @@ int __devicelib_memcmp(const void *s1, const void *s2, size_t n) {

return head_cmp;
}
#endif // __SPIR__ || __SPIRV__ || __NVPTX__
#endif // __SPIR__ || __SPIRV__ || __NVPTX__ || __AMDGCN_
5 changes: 4 additions & 1 deletion libdevice/imf/imf_fp32_dl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,12 @@
/// overhead in these deep learning frameworks.
//===----------------------------------------------------------------------===//

#include "../device_imf.hpp"
#include "../device.h"

#ifdef __LIBDEVICE_IMF_ENABLED__

#include "../device_imf.hpp"

DEVICE_EXTERN_C_INLINE int32_t __devicelib_imf_abs(int32_t x) {
return (x >= 0) ? x : -x;
}
Expand Down
5 changes: 4 additions & 1 deletion libdevice/imf/imf_fp64_dl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,12 @@
/// overhead in these deep learning frameworks.
//===----------------------------------------------------------------------===//

#include "../device_imf.hpp"
#include "../device.h"

#ifdef __LIBDEVICE_IMF_ENABLED__

#include "../device_imf.hpp"

DEVICE_EXTERN_C_INLINE double __devicelib_imf_fabs(double x) {
return __fabs(x);
}
Expand Down
5 changes: 4 additions & 1 deletion libdevice/imf/imf_inline_bf16.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,12 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "../device_imf.hpp"
#include "../device.h"

#ifdef __LIBDEVICE_IMF_ENABLED__

#include "../device_imf.hpp"

DEVICE_EXTERN_C_INLINE
_iml_bf16_internal __devicelib_imf_fmabf16(_iml_bf16_internal a,
_iml_bf16_internal b,
Expand Down
6 changes: 5 additions & 1 deletion libdevice/imf/imf_inline_fp32.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,13 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
#include "../device_imf.hpp"

#include "../device.h"

#ifdef __LIBDEVICE_IMF_ENABLED__

#include "../device_imf.hpp"

DEVICE_EXTERN_C_INLINE _iml_half_internal __devicelib_imf_fmaf16(
_iml_half_internal a, _iml_half_internal b, _iml_half_internal c) {
_iml_half ha(a), hb(b), hc(c);
Expand Down
Loading
Loading