Skip to content

[Offload][Conformance] Add support for CUDA Math and HIP Math providers #152362

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Aug 7, 2025

Conversation

leandrolcampos
Copy link
Contributor

This patch extends the conformance testing infrastructure to support two new providers of math function implementations for GPUs: CUDA Math (cuda-math) and HIP Math (hip-math).

@llvmbot llvmbot added the offload label Aug 6, 2025
@llvmbot
Copy link
Member

llvmbot commented Aug 6, 2025

@llvm/pr-subscribers-offload

Author: Leandro Lacerda (leandrolcampos)

Changes

This patch extends the conformance testing infrastructure to support two new providers of math function implementations for GPUs: CUDA Math (cuda-math) and HIP Math (hip-math).


Patch is 21.17 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/152362.diff

7 Files Affected:

  • (modified) offload/unittests/Conformance/device_code/CMakeLists.txt (+27-1)
  • (added) offload/unittests/Conformance/device_code/CUDAMath.cpp (+178)
  • (added) offload/unittests/Conformance/device_code/DeviceAPIs.hpp (+113)
  • (added) offload/unittests/Conformance/device_code/HIPMath.cpp (+178)
  • (renamed) offload/unittests/Conformance/device_code/KernelRunner.hpp (+7-9)
  • (modified) offload/unittests/Conformance/device_code/LLVMLibm.cpp (+3-2)
  • (modified) offload/unittests/Conformance/include/mathtest/TestRunner.hpp (+2-2)
diff --git a/offload/unittests/Conformance/device_code/CMakeLists.txt b/offload/unittests/Conformance/device_code/CMakeLists.txt
index 789dd167bb9ff..992f54c0c2376 100644
--- a/offload/unittests/Conformance/device_code/CMakeLists.txt
+++ b/offload/unittests/Conformance/device_code/CMakeLists.txt
@@ -1,4 +1,30 @@
+set(cuda_math_flags "")
+find_package(CUDAToolkit QUIET)
+if(CUDAToolkit_FOUND)
+  file(GLOB libdevice_paths "${CUDAToolkit_LIBRARY_ROOT}/nvvm/libdevice/libdevice.*.bc")
+  list(GET libdevice_paths 0 libdevice_path)
+
+  if (EXISTS ${libdevice_path})
+      list(APPEND cuda_math_flags "-Xclang" "-mlink-builtin-bitcode" "-Xclang" "${libdevice_path}")
+      list(APPEND cuda_math_flags "-DCUDA_MATH_FOUND=1")
+    endif()
+endif()
+
+set(hip_math_flags "")
+find_package(AMDDeviceLibs QUIET HINTS ${CMAKE_INSTALL_PREFIX} PATHS /opt/rocm)
+if(AMDDeviceLibs_FOUND)
+  get_target_property(ocml_path ocml IMPORTED_LOCATION)
+  list(APPEND hip_math_flags "-Xclang" "-mlink-builtin-bitcode" "-Xclang" "${ocml_path}")
+  list(APPEND hip_math_flags "-DHIP_MATH_FOUND=1")
+endif()
+
+add_offload_test_device_code(CUDAMath.cpp cuda-math -O3 -stdlib -fno-builtin ${cuda_math_flags})
+add_offload_test_device_code(HIPMath.cpp hip-math -O3 -stdlib -fno-builtin ${hip_math_flags})
 add_offload_test_device_code(LLVMLibm.cpp llvm-libm -O3 -stdlib -fno-builtin)
 
-add_custom_target(conformance_device_binaries DEPENDS llvm-libm.bin)
+add_custom_target(conformance_device_binaries DEPENDS
+  cuda-math.bin
+  hip-math.bin
+  llvm-libm.bin
+)
 set(OFFLOAD_CONFORMANCE_DEVICE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE)
diff --git a/offload/unittests/Conformance/device_code/CUDAMath.cpp b/offload/unittests/Conformance/device_code/CUDAMath.cpp
new file mode 100644
index 0000000000000..a351e924b8f89
--- /dev/null
+++ b/offload/unittests/Conformance/device_code/CUDAMath.cpp
@@ -0,0 +1,178 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the implementation of the device kernels that wrap the
+/// math functions from the cuda-math provider.
+///
+//===----------------------------------------------------------------------===//
+
+#ifdef CUDA_MATH_FOUND
+
+#include "Conformance/device_code/DeviceAPIs.hpp"
+#include "Conformance/device_code/KernelRunner.hpp"
+
+#include <gpuintrin.h>
+#include <stddef.h>
+
+using namespace kernels;
+
+//===----------------------------------------------------------------------===//
+// Helpers
+//===----------------------------------------------------------------------===//
+
+static inline float sincosfSin(float X) {
+  float SinX, CosX;
+  __nv_sincosf(X, &SinX, &CosX);
+  return SinX;
+}
+
+static inline float sincosfCos(float X) {
+  float SinX, CosX;
+  __nv_sincosf(X, &SinX, &CosX);
+  return CosX;
+}
+
+//===----------------------------------------------------------------------===//
+// Kernels
+//===----------------------------------------------------------------------===//
+
+extern "C" {
+
+__gpu_kernel void acosfKernel(const float *X, float *Out,
+                              size_t NumElements) noexcept {
+  runKernelBody<__nv_acosf>(NumElements, Out, X);
+}
+
+__gpu_kernel void acoshfKernel(const float *X, float *Out,
+                               size_t NumElements) noexcept {
+  runKernelBody<__nv_acoshf>(NumElements, Out, X);
+}
+
+__gpu_kernel void asinfKernel(const float *X, float *Out,
+                              size_t NumElements) noexcept {
+  runKernelBody<__nv_asinf>(NumElements, Out, X);
+}
+
+__gpu_kernel void asinhfKernel(const float *X, float *Out,
+                               size_t NumElements) noexcept {
+  runKernelBody<__nv_asinhf>(NumElements, Out, X);
+}
+
+__gpu_kernel void atanfKernel(const float *X, float *Out,
+                              size_t NumElements) noexcept {
+  runKernelBody<__nv_atanf>(NumElements, Out, X);
+}
+
+__gpu_kernel void atanhfKernel(const float *X, float *Out,
+                               size_t NumElements) noexcept {
+  runKernelBody<__nv_atanhf>(NumElements, Out, X);
+}
+
+__gpu_kernel void cbrtfKernel(const float *X, float *Out,
+                              size_t NumElements) noexcept {
+  runKernelBody<__nv_cbrtf>(NumElements, Out, X);
+}
+
+__gpu_kernel void cosfKernel(const float *X, float *Out,
+                             size_t NumElements) noexcept {
+  runKernelBody<__nv_cosf>(NumElements, Out, X);
+}
+
+__gpu_kernel void coshfKernel(const float *X, float *Out,
+                              size_t NumElements) noexcept {
+  runKernelBody<__nv_coshf>(NumElements, Out, X);
+}
+
+__gpu_kernel void cospifKernel(const float *X, float *Out,
+                               size_t NumElements) noexcept {
+  runKernelBody<__nv_cospif>(NumElements, Out, X);
+}
+
+__gpu_kernel void erffKernel(const float *X, float *Out,
+                             size_t NumElements) noexcept {
+  runKernelBody<__nv_erff>(NumElements, Out, X);
+}
+
+__gpu_kernel void expfKernel(const float *X, float *Out,
+                             size_t NumElements) noexcept {
+  runKernelBody<__nv_expf>(NumElements, Out, X);
+}
+
+__gpu_kernel void exp10fKernel(const float *X, float *Out,
+                               size_t NumElements) noexcept {
+  runKernelBody<__nv_exp10f>(NumElements, Out, X);
+}
+
+__gpu_kernel void exp2fKernel(const float *X, float *Out,
+                              size_t NumElements) noexcept {
+  runKernelBody<__nv_exp2f>(NumElements, Out, X);
+}
+
+__gpu_kernel void expm1fKernel(const float *X, float *Out,
+                               size_t NumElements) noexcept {
+  runKernelBody<__nv_expm1f>(NumElements, Out, X);
+}
+
+__gpu_kernel void logfKernel(const float *X, float *Out,
+                             size_t NumElements) noexcept {
+  runKernelBody<__nv_logf>(NumElements, Out, X);
+}
+
+__gpu_kernel void log10fKernel(const float *X, float *Out,
+                               size_t NumElements) noexcept {
+  runKernelBody<__nv_log10f>(NumElements, Out, X);
+}
+
+__gpu_kernel void log1pfKernel(const float *X, float *Out,
+                               size_t NumElements) noexcept {
+  runKernelBody<__nv_log1pf>(NumElements, Out, X);
+}
+
+__gpu_kernel void log2fKernel(const float *X, float *Out,
+                              size_t NumElements) noexcept {
+  runKernelBody<__nv_log2f>(NumElements, Out, X);
+}
+
+__gpu_kernel void sinfKernel(const float *X, float *Out,
+                             size_t NumElements) noexcept {
+  runKernelBody<__nv_sinf>(NumElements, Out, X);
+}
+
+__gpu_kernel void sincosfSinKernel(const float *X, float *Out,
+                                   size_t NumElements) noexcept {
+  runKernelBody<sincosfSin>(NumElements, Out, X);
+}
+
+__gpu_kernel void sincosfCosKernel(const float *X, float *Out,
+                                   size_t NumElements) noexcept {
+  runKernelBody<sincosfCos>(NumElements, Out, X);
+}
+
+__gpu_kernel void sinhfKernel(const float *X, float *Out,
+                              size_t NumElements) noexcept {
+  runKernelBody<__nv_sinhf>(NumElements, Out, X);
+}
+
+__gpu_kernel void sinpifKernel(const float *X, float *Out,
+                               size_t NumElements) noexcept {
+  runKernelBody<__nv_sinpif>(NumElements, Out, X);
+}
+
+__gpu_kernel void tanfKernel(const float *X, float *Out,
+                             size_t NumElements) noexcept {
+  runKernelBody<__nv_tanf>(NumElements, Out, X);
+}
+
+__gpu_kernel void tanhfKernel(const float *X, float *Out,
+                              size_t NumElements) noexcept {
+  runKernelBody<__nv_tanhf>(NumElements, Out, X);
+}
+} // extern "C"
+
+#endif // CUDA_MATH_FOUND
diff --git a/offload/unittests/Conformance/device_code/DeviceAPIs.hpp b/offload/unittests/Conformance/device_code/DeviceAPIs.hpp
new file mode 100644
index 0000000000000..8476dcbeff0c9
--- /dev/null
+++ b/offload/unittests/Conformance/device_code/DeviceAPIs.hpp
@@ -0,0 +1,113 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains platform-specific definitions and forward declarations
+/// for device-side APIs used by the kernels.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef CONFORMANCE_DEVICE_CODE_DEVICEAPIS_HPP
+#define CONFORMANCE_DEVICE_CODE_DEVICEAPIS_HPP
+
+#include <stdint.h>
+
+typedef _Float16 float16;
+
+#ifdef __AMDGPU__
+
+// The ROCm device library uses control globals to alter codegen for the
+// different targets. To avoid needing to link them in manually, we simply
+// define them here.
+extern "C" {
+extern const inline uint8_t __oclc_unsafe_math_opt = 0;
+extern const inline uint8_t __oclc_daz_opt = 0;
+extern const inline uint8_t __oclc_correctly_rounded_sqrt32 = 1;
+extern const inline uint8_t __oclc_finite_only_opt = 0;
+extern const inline uint32_t __oclc_ISA_version = 9000;
+}
+
+// These aliases cause Clang to emit the control constants with ODR linkage.
+// This allows us to link against the symbols without preventing them from being
+// optimized out or causing symbol collisions.
+[[gnu::alias("__oclc_unsafe_math_opt")]] const uint8_t __oclc_unsafe_math_opt__;
+[[gnu::alias("__oclc_daz_opt")]] const uint8_t __oclc_daz_opt__;
+[[gnu::alias("__oclc_correctly_rounded_sqrt32")]] const uint8_t
+    __oclc_correctly_rounded_sqrt32__;
+[[gnu::alias("__oclc_finite_only_opt")]] const uint8_t __oclc_finite_only_opt__;
+[[gnu::alias("__oclc_ISA_version")]] const uint32_t __oclc_ISA_version__;
+
+#endif // __AMDGPU__
+
+#ifdef CUDA_MATH_FOUND
+
+extern "C" {
+
+float __nv_acosf(float);
+float __nv_acoshf(float);
+float __nv_asinf(float);
+float __nv_asinhf(float);
+float __nv_atanf(float);
+float __nv_atanhf(float);
+float __nv_cbrtf(float);
+float __nv_cosf(float);
+float __nv_coshf(float);
+float __nv_cospif(float);
+float __nv_erff(float);
+float __nv_expf(float);
+float __nv_exp10f(float);
+float __nv_exp2f(float);
+float __nv_expm1f(float);
+float __nv_logf(float);
+float __nv_log10f(float);
+float __nv_log1pf(float);
+float __nv_log2f(float);
+float __nv_sinf(float);
+void __nv_sincosf(float, float *, float *);
+float __nv_sinhf(float);
+float __nv_sinpif(float);
+float __nv_tanf(float);
+float __nv_tanhf(float);
+} // extern "C"
+
+#endif // CUDA_MATH_FOUND
+
+#ifdef HIP_MATH_FOUND
+
+extern "C" {
+
+float __ocml_acos_f32(float);
+float __ocml_acosh_f32(float);
+float __ocml_asin_f32(float);
+float __ocml_asinh_f32(float);
+float __ocml_atan_f32(float);
+float __ocml_atanh_f32(float);
+float __ocml_cbrt_f32(float);
+float __ocml_cos_f32(float);
+float __ocml_cosh_f32(float);
+float __ocml_cospi_f32(float);
+float __ocml_erf_f32(float);
+float __ocml_exp_f32(float);
+float __ocml_exp10_f32(float);
+float __ocml_exp2_f32(float);
+float __ocml_expm1_f32(float);
+float __ocml_log_f32(float);
+float __ocml_log10_f32(float);
+float __ocml_log1p_f32(float);
+float __ocml_log2_f32(float);
+float __ocml_sin_f32(float);
+float __ocml_sincos_f32(float, float *);
+float __ocml_sinh_f32(float);
+float __ocml_sinpi_f32(float);
+float __ocml_tan_f32(float);
+float __ocml_tanh_f32(float);
+} // extern "C"
+
+#endif // HIP_MATH_FOUND
+
+#endif // CONFORMANCE_DEVICE_CODE_DEVICEAPIS_HPP
diff --git a/offload/unittests/Conformance/device_code/HIPMath.cpp b/offload/unittests/Conformance/device_code/HIPMath.cpp
new file mode 100644
index 0000000000000..36efe6b2696ab
--- /dev/null
+++ b/offload/unittests/Conformance/device_code/HIPMath.cpp
@@ -0,0 +1,178 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the implementation of the device kernels that wrap the
+/// math functions from the hip-math provider.
+///
+//===----------------------------------------------------------------------===//
+
+#ifdef HIP_MATH_FOUND
+
+#include "Conformance/device_code/DeviceAPIs.hpp"
+#include "Conformance/device_code/KernelRunner.hpp"
+
+#include <gpuintrin.h>
+#include <stddef.h>
+
+using namespace kernels;
+
+//===----------------------------------------------------------------------===//
+// Helpers
+//===----------------------------------------------------------------------===//
+
+static inline float sincosfSin(float X) {
+  float CosX;
+  float SinX = __ocml_sincos_f32(X, &CosX);
+  return SinX;
+}
+
+static inline float sincosfCos(float X) {
+  float CosX;
+  float SinX = __ocml_sincos_f32(X, &CosX);
+  return CosX;
+}
+
+//===----------------------------------------------------------------------===//
+// Kernels
+//===----------------------------------------------------------------------===//
+
+extern "C" {
+
+__gpu_kernel void acosfKernel(const float *X, float *Out,
+                              size_t NumElements) noexcept {
+  runKernelBody<__ocml_acos_f32>(NumElements, Out, X);
+}
+
+__gpu_kernel void acoshfKernel(const float *X, float *Out,
+                               size_t NumElements) noexcept {
+  runKernelBody<__ocml_acosh_f32>(NumElements, Out, X);
+}
+
+__gpu_kernel void asinfKernel(const float *X, float *Out,
+                              size_t NumElements) noexcept {
+  runKernelBody<__ocml_asin_f32>(NumElements, Out, X);
+}
+
+__gpu_kernel void asinhfKernel(const float *X, float *Out,
+                               size_t NumElements) noexcept {
+  runKernelBody<__ocml_asinh_f32>(NumElements, Out, X);
+}
+
+__gpu_kernel void atanfKernel(const float *X, float *Out,
+                              size_t NumElements) noexcept {
+  runKernelBody<__ocml_atan_f32>(NumElements, Out, X);
+}
+
+__gpu_kernel void atanhfKernel(const float *X, float *Out,
+                               size_t NumElements) noexcept {
+  runKernelBody<__ocml_atanh_f32>(NumElements, Out, X);
+}
+
+__gpu_kernel void cbrtfKernel(const float *X, float *Out,
+                              size_t NumElements) noexcept {
+  runKernelBody<__ocml_cbrt_f32>(NumElements, Out, X);
+}
+
+__gpu_kernel void cosfKernel(const float *X, float *Out,
+                             size_t NumElements) noexcept {
+  runKernelBody<__ocml_cos_f32>(NumElements, Out, X);
+}
+
+__gpu_kernel void coshfKernel(const float *X, float *Out,
+                              size_t NumElements) noexcept {
+  runKernelBody<__ocml_cosh_f32>(NumElements, Out, X);
+}
+
+__gpu_kernel void cospifKernel(const float *X, float *Out,
+                               size_t NumElements) noexcept {
+  runKernelBody<__ocml_cospi_f32>(NumElements, Out, X);
+}
+
+__gpu_kernel void erffKernel(const float *X, float *Out,
+                             size_t NumElements) noexcept {
+  runKernelBody<__ocml_erf_f32>(NumElements, Out, X);
+}
+
+__gpu_kernel void expfKernel(const float *X, float *Out,
+                             size_t NumElements) noexcept {
+  runKernelBody<__ocml_exp_f32>(NumElements, Out, X);
+}
+
+__gpu_kernel void exp10fKernel(const float *X, float *Out,
+                               size_t NumElements) noexcept {
+  runKernelBody<__ocml_exp10_f32>(NumElements, Out, X);
+}
+
+__gpu_kernel void exp2fKernel(const float *X, float *Out,
+                              size_t NumElements) noexcept {
+  runKernelBody<__ocml_exp2_f32>(NumElements, Out, X);
+}
+
+__gpu_kernel void expm1fKernel(const float *X, float *Out,
+                               size_t NumElements) noexcept {
+  runKernelBody<__ocml_expm1_f32>(NumElements, Out, X);
+}
+
+__gpu_kernel void logfKernel(const float *X, float *Out,
+                             size_t NumElements) noexcept {
+  runKernelBody<__ocml_log_f32>(NumElements, Out, X);
+}
+
+__gpu_kernel void log10fKernel(const float *X, float *Out,
+                               size_t NumElements) noexcept {
+  runKernelBody<__ocml_log10_f32>(NumElements, Out, X);
+}
+
+__gpu_kernel void log1pfKernel(const float *X, float *Out,
+                               size_t NumElements) noexcept {
+  runKernelBody<__ocml_log1p_f32>(NumElements, Out, X);
+}
+
+__gpu_kernel void log2fKernel(const float *X, float *Out,
+                              size_t NumElements) noexcept {
+  runKernelBody<__ocml_log2_f32>(NumElements, Out, X);
+}
+
+__gpu_kernel void sinfKernel(const float *X, float *Out,
+                             size_t NumElements) noexcept {
+  runKernelBody<__ocml_sin_f32>(NumElements, Out, X);
+}
+
+__gpu_kernel void sincosfSinKernel(const float *X, float *Out,
+                                   size_t NumElements) noexcept {
+  runKernelBody<sincosfSin>(NumElements, Out, X);
+}
+
+__gpu_kernel void sincosfCosKernel(const float *X, float *Out,
+                                   size_t NumElements) noexcept {
+  runKernelBody<sincosfCos>(NumElements, Out, X);
+}
+
+__gpu_kernel void sinhfKernel(const float *X, float *Out,
+                              size_t NumElements) noexcept {
+  runKernelBody<__ocml_sinh_f32>(NumElements, Out, X);
+}
+
+__gpu_kernel void sinpifKernel(const float *X, float *Out,
+                               size_t NumElements) noexcept {
+  runKernelBody<__ocml_sinpi_f32>(NumElements, Out, X);
+}
+
+__gpu_kernel void tanfKernel(const float *X, float *Out,
+                             size_t NumElements) noexcept {
+  runKernelBody<__ocml_tan_f32>(NumElements, Out, X);
+}
+
+__gpu_kernel void tanhfKernel(const float *X, float *Out,
+                              size_t NumElements) noexcept {
+  runKernelBody<__ocml_tanh_f32>(NumElements, Out, X);
+}
+} // extern "C"
+
+#endif // HIP_MATH_FOUND
diff --git a/offload/unittests/Conformance/device_code/Common.hpp b/offload/unittests/Conformance/device_code/KernelRunner.hpp
similarity index 70%
rename from offload/unittests/Conformance/device_code/Common.hpp
rename to offload/unittests/Conformance/device_code/KernelRunner.hpp
index bcf3ac617b54c..e64a62fbdf018 100644
--- a/offload/unittests/Conformance/device_code/Common.hpp
+++ b/offload/unittests/Conformance/device_code/KernelRunner.hpp
@@ -7,21 +7,19 @@
 //===----------------------------------------------------------------------===//
 ///
 /// \file
-/// This file contains common utilities for defining device kernel wrappers to
-/// math functions.
+/// This file contains the definition of the runKernelBody, a template helper
+/// that executes the per-thread logic of a math function's kernel wrapper.
 ///
 //===----------------------------------------------------------------------===//
 
-#ifndef CONFORMANCE_DEVICE_CODE_COMMON_HPP
-#define CONFORMANCE_DEVICE_CODE_COMMON_HPP
+#ifndef CONFORMANCE_DEVICE_CODE_KERNELRUNNER_HPP
+#define CONFORMANCE_DEVICE_CODE_KERNELRUNNER_HPP
 
 #include <gpuintrin.h>
 #include <stddef.h>
 #include <stdint.h>
 
-namespace common {
-
-typedef _Float16 float16;
+namespace kernels {
 
 template <auto Func, typename OutType, typename... InTypes>
 void runKernelBody(size_t NumElements, OutType *Out, const InTypes *...Ins) {
@@ -32,6 +30,6 @@ void runKernelBody(size_t NumElements, OutType *Out, const InTypes *...Ins) {
     Out[Index] = Func(Ins[Index]...);
   }
 }
-} // namespace common
+} // namespace kernels
 
-#endif // CONFORMANCE_DEVICE_CODE_COMMON_HPP
+#endif // CONFORMANCE_DEVICE_CODE_KERNELRUNNER_HPP
diff --git a/offload/unittests/Conformance/device_code/LLVMLibm.cpp b/offload/unittests/Conformance/device_code/LLVMLibm.cpp
index f137ba3d23752..8869d87017486 100644
--- a/offload/unittests/Conformance/device_code/LLVMLibm.cpp
+++ b/offload/unittests/Conformance/device_code/LLVMLibm.cpp
@@ -12,13 +12,14 @@
 ///
 //===----------------------------------------------------------------------===//
 
-#include "Conformance/device_code/Common.h...
[truncated]

@leandrolcampos
Copy link
Contributor Author

leandrolcampos commented Aug 6, 2025

Test Results for Single-Precision Math Functions

Function ULP Tolerance Max ULP Distance
llvm-libm
(AMDGPU)
llvm-libm
(CUDA)
cuda-math
(CUDA)
hip-math
(AMDGPU)
acosf 4 1 1 1 1
acoshf 4 1 1 2 1
asinf 4 1 1 1 3
asinhf 4 1 1 2 1
atanf 5 0 0 1 2
atanhf 5 0 0 3 1
cbrtf 2 0 0 1 1
cosf 4 1 1 2 2
coshf 4 0 0 2 1
cospif 4 0 0 1 1
erff 16 0 0 1 2
expf 3 0 0 2 1
exp10f 3 0 0 2 1
exp2f 3 1 1 2 1
expm1f 3 1 1 1 1
logf 3 1 1 1 2
log10f 3 1 1 2 2
log1pf 2 1 1 1 1
log2f 3 0 0 1 1
sincosf
(sin part)
4 1 1 1 2
sincosf
(cos part)
4 1 1 2 2
sinhf 4 1 1 3 1
sinpif 4 0 0 1 1
tanf 5 0 0 3 2
tanhf 5 0 0 2 1

Notes:

  • ULP (Units in the Last Place) tolerances are based on The Khronos Group, The OpenCL C Specification v3.0.19, Sec. 7.4, Table 65, Khronos Registry [July 10, 2025].
  • The AMD GPU used for testing is the gfx1030.
  • The NVIDIA GPU used for testing is the NVIDIA GeForce RTX 4070 Laptop GPU.

@jhuber6 jhuber6 merged commit 27ed1f9 into llvm:main Aug 7, 2025
9 checks passed
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants