llvm
diff --git a/‎offload/unittests/CMakeLists.txt‎
Lines changed: 4 additions & 0 deletions b/‎offload/unittests/CMakeLists.txt‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎offload/unittests/Conformance/device_code/CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion b/‎offload/unittests/Conformance/device_code/CMakeLists.txt‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎offload/unittests/Conformance/device_code/Common.hpp‎
Lines changed: 37 additions & 0 deletions b/‎offload/unittests/Conformance/device_code/Common.hpp‎
Lines changed: 37 additions & 0 deletions
diff --git a/‎offload/unittests/Conformance/device_code/LLVMLibm.cpp‎
Lines changed: 156 additions & 12 deletions b/‎offload/unittests/Conformance/device_code/LLVMLibm.cpp‎
Lines changed: 156 additions & 12 deletions
diff --git a/‎offload/unittests/Conformance/tests/AcosfTest.cpp‎
Lines changed: 53 additions & 0 deletions b/‎offload/unittests/Conformance/tests/AcosfTest.cpp‎
Lines changed: 53 additions & 0 deletions
diff --git a/‎offload/unittests/Conformance/tests/AcoshfTest.cpp‎
Lines changed: 57 additions & 0 deletions b/‎offload/unittests/Conformance/tests/AcoshfTest.cpp‎
Lines changed: 57 additions & 0 deletions
@@ -15,6 +15,8 @@ if (NOT TARGET llvm_gtest)
   return ()
 endif ()
 
+set(OFFLOAD_UNITTESTS_DIR ${CMAKE_CURRENT_SOURCE_DIR})
+
 function(add_offload_test_device_code test_filename test_name)
   set(SRC_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${test_filename})
   set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
@@ -39,6 +41,7 @@ function(add_offload_test_device_code test_filename test_name)
       add_custom_command(
         OUTPUT ${output_file}
         COMMAND ${CMAKE_CXX_COMPILER}
+        -I${OFFLOAD_UNITTESTS_DIR}
         --target=nvptx64-nvidia-cuda -march=${nvptx_arch}
         -nogpulib --cuda-path=${cuda_path} -flto ${ARGN}
         ${SRC_PATH} -o ${output_file}
@@ -63,6 +66,7 @@ function(add_offload_test_device_code test_filename test_name)
       add_custom_command(
         OUTPUT ${output_file}
         COMMAND ${CMAKE_CXX_COMPILER}
+        -I${OFFLOAD_UNITTESTS_DIR}
         --target=amdgcn-amd-amdhsa -mcpu=${amdgpu_arch}
         -nogpulib -flto ${ARGN} ${SRC_PATH} -o ${output_file}
         DEPENDS ${SRC_PATH}
 
@@ -1,4 +1,4 @@
-add_offload_test_device_code(LLVMLibm.cpp llvm-libm -stdlib -fno-builtin)
+add_offload_test_device_code(LLVMLibm.cpp llvm-libm -O3 -stdlib -fno-builtin)
 
 add_custom_target(conformance_device_binaries DEPENDS llvm-libm.bin)
 set(OFFLOAD_CONFORMANCE_DEVICE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE)
@@ -0,0 +1,37 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains common utilities for defining device kernel wrappers to
+/// math functions.
+///
+//===----------------------------------------------------------------------===//
+
+#ifndef CONFORMANCE_DEVICE_CODE_COMMON_HPP
+#define CONFORMANCE_DEVICE_CODE_COMMON_HPP
+
+#include <gpuintrin.h>
+#include <stddef.h>
+#include <stdint.h>
+
+namespace common {
+
+typedef _Float16 float16;
+
+template <auto Func, typename OutType, typename... InTypes>
+void runKernelBody(size_t NumElements, OutType *Out, const InTypes *...Ins) {
+  uint32_t Index =
+      __gpu_num_threads_x() * __gpu_block_id_x() + __gpu_thread_id_x();
+
+  if (Index < NumElements) {
+    Out[Index] = Func(Ins[Index]...);
+  }
+}
+} // namespace common
+
+#endif // CONFORMANCE_DEVICE_CODE_COMMON_HPP
@@ -12,29 +12,173 @@
 ///
 //===----------------------------------------------------------------------===//
 
+#include "Conformance/device_code/Common.hpp"
+
 #include <gpuintrin.h>
 #include <math.h>
 #include <stddef.h>
-#include <stdint.h>
 
-typedef _Float16 float16;
+using namespace common;
+
+//===----------------------------------------------------------------------===//
+// Helpers
+//===----------------------------------------------------------------------===//
+
+static inline float sincosfSin(float X) {
+  float SinX, CosX;
+  sincosf(X, &SinX, &CosX);
+  return SinX;
+}
+
+static inline float sincosfCos(float X) {
+  float SinX, CosX;
+  sincosf(X, &SinX, &CosX);
+  return CosX;
+}
+
+//===----------------------------------------------------------------------===//
+// Kernels
+//===----------------------------------------------------------------------===//
 
 extern "C" {
 
+__gpu_kernel void acosfKernel(const float *X, float *Out,
+                              size_t NumElements) noexcept {
+  runKernelBody<acosf>(NumElements, Out, X);
+}
+
+__gpu_kernel void acoshfKernel(const float *X, float *Out,
+                               size_t NumElements) noexcept {
+  runKernelBody<acoshf>(NumElements, Out, X);
+}
+
+__gpu_kernel void asinfKernel(const float *X, float *Out,
+                              size_t NumElements) noexcept {
+  runKernelBody<asinf>(NumElements, Out, X);
+}
+
+__gpu_kernel void asinhfKernel(const float *X, float *Out,
+                               size_t NumElements) noexcept {
+  runKernelBody<asinhf>(NumElements, Out, X);
+}
+
+__gpu_kernel void atanfKernel(const float *X, float *Out,
+                              size_t NumElements) noexcept {
+  runKernelBody<atanf>(NumElements, Out, X);
+}
+
+__gpu_kernel void atanhfKernel(const float *X, float *Out,
+                               size_t NumElements) noexcept {
+  runKernelBody<atanhf>(NumElements, Out, X);
+}
+
+__gpu_kernel void cbrtfKernel(const float *X, float *Out,
+                              size_t NumElements) noexcept {
+  runKernelBody<cbrtf>(NumElements, Out, X);
+}
+
+__gpu_kernel void cosfKernel(const float *X, float *Out,
+                             size_t NumElements) noexcept {
+  runKernelBody<cosf>(NumElements, Out, X);
+}
+
+__gpu_kernel void coshfKernel(const float *X, float *Out,
+                              size_t NumElements) noexcept {
+  runKernelBody<coshf>(NumElements, Out, X);
+}
+
+__gpu_kernel void cospifKernel(const float *X, float *Out,
+                               size_t NumElements) noexcept {
+  runKernelBody<cospif>(NumElements, Out, X);
+}
+
+__gpu_kernel void erffKernel(const float *X, float *Out,
+                             size_t NumElements) noexcept {
+  runKernelBody<erff>(NumElements, Out, X);
+}
+
+__gpu_kernel void expfKernel(const float *X, float *Out,
+                             size_t NumElements) noexcept {
+  runKernelBody<expf>(NumElements, Out, X);
+}
+
+__gpu_kernel void exp10fKernel(const float *X, float *Out,
+                               size_t NumElements) noexcept {
+  runKernelBody<exp10f>(NumElements, Out, X);
+}
+
+__gpu_kernel void exp2fKernel(const float *X, float *Out,
+                              size_t NumElements) noexcept {
+  runKernelBody<exp2f>(NumElements, Out, X);
+}
+
+__gpu_kernel void expm1fKernel(const float *X, float *Out,
+                               size_t NumElements) noexcept {
+  runKernelBody<expm1f>(NumElements, Out, X);
+}
+
 __gpu_kernel void hypotf16Kernel(const float16 *X, float16 *Y, float16 *Out,
-                                 size_t NumElements) {
-  uint32_t Index =
-      __gpu_num_threads_x() * __gpu_block_id_x() + __gpu_thread_id_x();
+                                 size_t NumElements) noexcept {
+  runKernelBody<hypotf16>(NumElements, Out, X, Y);
+}
+
+__gpu_kernel void logfKernel(const float *X, float *Out,
+                             size_t NumElements) noexcept {
+  runKernelBody<logf>(NumElements, Out, X);
+}
+
+__gpu_kernel void log10fKernel(const float *X, float *Out,
+                               size_t NumElements) noexcept {
+  runKernelBody<log10f>(NumElements, Out, X);
+}
 
-  if (Index < NumElements)
-    Out[Index] = hypotf16(X[Index], Y[Index]);
+__gpu_kernel void log1pfKernel(const float *X, float *Out,
+                               size_t NumElements) noexcept {
+  runKernelBody<log1pf>(NumElements, Out, X);
 }
 
-__gpu_kernel void logfKernel(const float *X, float *Out, size_t NumElements) {
-  uint32_t Index =
-      __gpu_num_threads_x() * __gpu_block_id_x() + __gpu_thread_id_x();
+__gpu_kernel void log2fKernel(const float *X, float *Out,
+                              size_t NumElements) noexcept {
+  runKernelBody<log2f>(NumElements, Out, X);
+}
+
+__gpu_kernel void sinfKernel(const float *X, float *Out,
+                             size_t NumElements) noexcept {
+  runKernelBody<sinf>(NumElements, Out, X);
+}
+
+__gpu_kernel void sincosfSinKernel(const float *X, float *Out,
+                                   size_t NumElements) noexcept {
+  runKernelBody<sincosfSin>(NumElements, Out, X);
+}
+
+__gpu_kernel void sincosfCosKernel(const float *X, float *Out,
+                                   size_t NumElements) noexcept {
+  runKernelBody<sincosfCos>(NumElements, Out, X);
+}
+
+__gpu_kernel void sinhfKernel(const float *X, float *Out,
+                              size_t NumElements) noexcept {
+  runKernelBody<sinhf>(NumElements, Out, X);
+}
+
+__gpu_kernel void sinpifKernel(const float *X, float *Out,
+                               size_t NumElements) noexcept {
+  runKernelBody<sinpif>(NumElements, Out, X);
+}
+
+__gpu_kernel void tanfKernel(const float *X, float *Out,
+                             size_t NumElements) noexcept {
+  runKernelBody<tanf>(NumElements, Out, X);
+}
+
+__gpu_kernel void tanhfKernel(const float *X, float *Out,
+                              size_t NumElements) noexcept {
+  runKernelBody<tanhf>(NumElements, Out, X);
+}
 
-  if (Index < NumElements)
-    Out[Index] = logf(X[Index]);
+__gpu_kernel void tanpifKernel(const float *X, float *Out,
+                               size_t NumElements) noexcept {
+  runKernelBody<tanpif>(NumElements, Out, X);
 }
 } // extern "C"
@@ -0,0 +1,53 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the conformance test of the acosf function.
+///
+//===----------------------------------------------------------------------===//
+
+#include "mathtest/CommandLineExtras.hpp"
+#include "mathtest/ExhaustiveGenerator.hpp"
+#include "mathtest/IndexedRange.hpp"
+#include "mathtest/TestConfig.hpp"
+#include "mathtest/TestRunner.hpp"
+
+#include "llvm/ADT/StringRef.h"
+
+#include <cstdlib>
+#include <math.h>
+
+namespace mathtest {
+
+template <> struct FunctionConfig<acosf> {
+  static constexpr llvm::StringRef Name = "acosf";
+  static constexpr llvm::StringRef KernelName = "acosfKernel";
+
+  // Source: The Khronos Group, The OpenCL C Specification v3.0.19, Sec. 7.4,
+  //         Table 65, Khronos Registry [July 10, 2025].
+  static constexpr uint64_t UlpTolerance = 4;
+};
+} // namespace mathtest
+
+int main(int argc, const char **argv) {
+  llvm::cl::ParseCommandLineOptions(argc, argv,
+                                    "Conformance test of the acosf function");
+
+  using namespace mathtest;
+
+  IndexedRange<float> Range;
+  ExhaustiveGenerator<float> Generator(Range);
+
+  const auto Configs = cl::getTestConfigs();
+  const llvm::StringRef DeviceBinaryDir = DEVICE_BINARY_DIR;
+  const bool IsVerbose = cl::IsVerbose;
+
+  bool Passed = runTests<acosf>(Generator, Configs, DeviceBinaryDir, IsVerbose);
+
+  return Passed ? EXIT_SUCCESS : EXIT_FAILURE;
+}
@@ -0,0 +1,57 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+///
+/// \file
+/// This file contains the conformance test of the acoshf function.
+///
+//===----------------------------------------------------------------------===//
+
+#include "mathtest/CommandLineExtras.hpp"
+#include "mathtest/ExhaustiveGenerator.hpp"
+#include "mathtest/IndexedRange.hpp"
+#include "mathtest/TestConfig.hpp"
+#include "mathtest/TestRunner.hpp"
+
+#include "llvm/ADT/StringRef.h"
+
+#include <cstdlib>
+#include <limits>
+#include <math.h>
+
+namespace mathtest {
+
+template <> struct FunctionConfig<acoshf> {
+  static constexpr llvm::StringRef Name = "acoshf";
+  static constexpr llvm::StringRef KernelName = "acoshfKernel";
+
+  // Source: The Khronos Group, The OpenCL C Specification v3.0.19, Sec. 7.4,
+  //         Table 65, Khronos Registry [July 10, 2025].
+  static constexpr uint64_t UlpTolerance = 4;
+};
+} // namespace mathtest
+
+int main(int argc, const char **argv) {
+  llvm::cl::ParseCommandLineOptions(argc, argv,
+                                    "Conformance test of the acoshf function");
+
+  using namespace mathtest;
+
+  IndexedRange<float> Range(/*Begin=*/1.0f,
+                            /*End=*/std::numeric_limits<float>::infinity(),
+                            /*Inclusive=*/true);
+  ExhaustiveGenerator<float> Generator(Range);
+
+  const auto Configs = cl::getTestConfigs();
+  const llvm::StringRef DeviceBinaryDir = DEVICE_BINARY_DIR;
+  const bool IsVerbose = cl::IsVerbose;
+
+  bool Passed =
+      runTests<acoshf>(Generator, Configs, DeviceBinaryDir, IsVerbose);
+
+  return Passed ? EXIT_SUCCESS : EXIT_FAILURE;
+}