Skip to content

Commit cd02680

Browse files
[Offload][Conformance] Add tests for single-precision math functions (#152013)
This patch adds a new set of conformance tests for single-precision math functions provided by the LLVM libm for GPUs. The functions included in this set were selected based on the following criteria: - An implementation exists in `libc/src/math/generic` (i.e., it is not just a wrapper around a compiler built-in). - The corresponding LLVM CPU libm implementation is correctly rounded. - The function is listed in Table 65 of the OpenCL C Specification v3.0.19.
1 parent 448c884 commit cd02680

31 files changed

+1603
-17
lines changed

offload/unittests/CMakeLists.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,8 @@ if (NOT TARGET llvm_gtest)
1515
return ()
1616
endif ()
1717

18+
set(OFFLOAD_UNITTESTS_DIR ${CMAKE_CURRENT_SOURCE_DIR})
19+
1820
function(add_offload_test_device_code test_filename test_name)
1921
set(SRC_PATH ${CMAKE_CURRENT_SOURCE_DIR}/${test_filename})
2022
set(CMAKE_TRY_COMPILE_TARGET_TYPE STATIC_LIBRARY)
@@ -39,6 +41,7 @@ function(add_offload_test_device_code test_filename test_name)
3941
add_custom_command(
4042
OUTPUT ${output_file}
4143
COMMAND ${CMAKE_CXX_COMPILER}
44+
-I${OFFLOAD_UNITTESTS_DIR}
4245
--target=nvptx64-nvidia-cuda -march=${nvptx_arch}
4346
-nogpulib --cuda-path=${cuda_path} -flto ${ARGN}
4447
${SRC_PATH} -o ${output_file}
@@ -63,6 +66,7 @@ function(add_offload_test_device_code test_filename test_name)
6366
add_custom_command(
6467
OUTPUT ${output_file}
6568
COMMAND ${CMAKE_CXX_COMPILER}
69+
-I${OFFLOAD_UNITTESTS_DIR}
6670
--target=amdgcn-amd-amdhsa -mcpu=${amdgpu_arch}
6771
-nogpulib -flto ${ARGN} ${SRC_PATH} -o ${output_file}
6872
DEPENDS ${SRC_PATH}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
add_offload_test_device_code(LLVMLibm.cpp llvm-libm -stdlib -fno-builtin)
1+
add_offload_test_device_code(LLVMLibm.cpp llvm-libm -O3 -stdlib -fno-builtin)
22

33
add_custom_target(conformance_device_binaries DEPENDS llvm-libm.bin)
44
set(OFFLOAD_CONFORMANCE_DEVICE_BINARY_DIR ${CMAKE_CURRENT_BINARY_DIR} PARENT_SCOPE)
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
///
9+
/// \file
10+
/// This file contains common utilities for defining device kernel wrappers to
11+
/// math functions.
12+
///
13+
//===----------------------------------------------------------------------===//
14+
15+
#ifndef CONFORMANCE_DEVICE_CODE_COMMON_HPP
16+
#define CONFORMANCE_DEVICE_CODE_COMMON_HPP
17+
18+
#include <gpuintrin.h>
19+
#include <stddef.h>
20+
#include <stdint.h>
21+
22+
namespace common {
23+
24+
typedef _Float16 float16;
25+
26+
template <auto Func, typename OutType, typename... InTypes>
27+
void runKernelBody(size_t NumElements, OutType *Out, const InTypes *...Ins) {
28+
uint32_t Index =
29+
__gpu_num_threads_x() * __gpu_block_id_x() + __gpu_thread_id_x();
30+
31+
if (Index < NumElements) {
32+
Out[Index] = Func(Ins[Index]...);
33+
}
34+
}
35+
} // namespace common
36+
37+
#endif // CONFORMANCE_DEVICE_CODE_COMMON_HPP

offload/unittests/Conformance/device_code/LLVMLibm.cpp

Lines changed: 156 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -12,29 +12,173 @@
1212
///
1313
//===----------------------------------------------------------------------===//
1414

15+
#include "Conformance/device_code/Common.hpp"
16+
1517
#include <gpuintrin.h>
1618
#include <math.h>
1719
#include <stddef.h>
18-
#include <stdint.h>
1920

20-
typedef _Float16 float16;
21+
using namespace common;
22+
23+
//===----------------------------------------------------------------------===//
24+
// Helpers
25+
//===----------------------------------------------------------------------===//
26+
27+
static inline float sincosfSin(float X) {
28+
float SinX, CosX;
29+
sincosf(X, &SinX, &CosX);
30+
return SinX;
31+
}
32+
33+
static inline float sincosfCos(float X) {
34+
float SinX, CosX;
35+
sincosf(X, &SinX, &CosX);
36+
return CosX;
37+
}
38+
39+
//===----------------------------------------------------------------------===//
40+
// Kernels
41+
//===----------------------------------------------------------------------===//
2142

2243
extern "C" {
2344

45+
__gpu_kernel void acosfKernel(const float *X, float *Out,
46+
size_t NumElements) noexcept {
47+
runKernelBody<acosf>(NumElements, Out, X);
48+
}
49+
50+
__gpu_kernel void acoshfKernel(const float *X, float *Out,
51+
size_t NumElements) noexcept {
52+
runKernelBody<acoshf>(NumElements, Out, X);
53+
}
54+
55+
__gpu_kernel void asinfKernel(const float *X, float *Out,
56+
size_t NumElements) noexcept {
57+
runKernelBody<asinf>(NumElements, Out, X);
58+
}
59+
60+
__gpu_kernel void asinhfKernel(const float *X, float *Out,
61+
size_t NumElements) noexcept {
62+
runKernelBody<asinhf>(NumElements, Out, X);
63+
}
64+
65+
__gpu_kernel void atanfKernel(const float *X, float *Out,
66+
size_t NumElements) noexcept {
67+
runKernelBody<atanf>(NumElements, Out, X);
68+
}
69+
70+
__gpu_kernel void atanhfKernel(const float *X, float *Out,
71+
size_t NumElements) noexcept {
72+
runKernelBody<atanhf>(NumElements, Out, X);
73+
}
74+
75+
__gpu_kernel void cbrtfKernel(const float *X, float *Out,
76+
size_t NumElements) noexcept {
77+
runKernelBody<cbrtf>(NumElements, Out, X);
78+
}
79+
80+
__gpu_kernel void cosfKernel(const float *X, float *Out,
81+
size_t NumElements) noexcept {
82+
runKernelBody<cosf>(NumElements, Out, X);
83+
}
84+
85+
__gpu_kernel void coshfKernel(const float *X, float *Out,
86+
size_t NumElements) noexcept {
87+
runKernelBody<coshf>(NumElements, Out, X);
88+
}
89+
90+
__gpu_kernel void cospifKernel(const float *X, float *Out,
91+
size_t NumElements) noexcept {
92+
runKernelBody<cospif>(NumElements, Out, X);
93+
}
94+
95+
__gpu_kernel void erffKernel(const float *X, float *Out,
96+
size_t NumElements) noexcept {
97+
runKernelBody<erff>(NumElements, Out, X);
98+
}
99+
100+
__gpu_kernel void expfKernel(const float *X, float *Out,
101+
size_t NumElements) noexcept {
102+
runKernelBody<expf>(NumElements, Out, X);
103+
}
104+
105+
__gpu_kernel void exp10fKernel(const float *X, float *Out,
106+
size_t NumElements) noexcept {
107+
runKernelBody<exp10f>(NumElements, Out, X);
108+
}
109+
110+
__gpu_kernel void exp2fKernel(const float *X, float *Out,
111+
size_t NumElements) noexcept {
112+
runKernelBody<exp2f>(NumElements, Out, X);
113+
}
114+
115+
__gpu_kernel void expm1fKernel(const float *X, float *Out,
116+
size_t NumElements) noexcept {
117+
runKernelBody<expm1f>(NumElements, Out, X);
118+
}
119+
24120
__gpu_kernel void hypotf16Kernel(const float16 *X, float16 *Y, float16 *Out,
25-
size_t NumElements) {
26-
uint32_t Index =
27-
__gpu_num_threads_x() * __gpu_block_id_x() + __gpu_thread_id_x();
121+
size_t NumElements) noexcept {
122+
runKernelBody<hypotf16>(NumElements, Out, X, Y);
123+
}
124+
125+
__gpu_kernel void logfKernel(const float *X, float *Out,
126+
size_t NumElements) noexcept {
127+
runKernelBody<logf>(NumElements, Out, X);
128+
}
129+
130+
__gpu_kernel void log10fKernel(const float *X, float *Out,
131+
size_t NumElements) noexcept {
132+
runKernelBody<log10f>(NumElements, Out, X);
133+
}
28134

29-
if (Index < NumElements)
30-
Out[Index] = hypotf16(X[Index], Y[Index]);
135+
__gpu_kernel void log1pfKernel(const float *X, float *Out,
136+
size_t NumElements) noexcept {
137+
runKernelBody<log1pf>(NumElements, Out, X);
31138
}
32139

33-
__gpu_kernel void logfKernel(const float *X, float *Out, size_t NumElements) {
34-
uint32_t Index =
35-
__gpu_num_threads_x() * __gpu_block_id_x() + __gpu_thread_id_x();
140+
__gpu_kernel void log2fKernel(const float *X, float *Out,
141+
size_t NumElements) noexcept {
142+
runKernelBody<log2f>(NumElements, Out, X);
143+
}
144+
145+
__gpu_kernel void sinfKernel(const float *X, float *Out,
146+
size_t NumElements) noexcept {
147+
runKernelBody<sinf>(NumElements, Out, X);
148+
}
149+
150+
__gpu_kernel void sincosfSinKernel(const float *X, float *Out,
151+
size_t NumElements) noexcept {
152+
runKernelBody<sincosfSin>(NumElements, Out, X);
153+
}
154+
155+
__gpu_kernel void sincosfCosKernel(const float *X, float *Out,
156+
size_t NumElements) noexcept {
157+
runKernelBody<sincosfCos>(NumElements, Out, X);
158+
}
159+
160+
__gpu_kernel void sinhfKernel(const float *X, float *Out,
161+
size_t NumElements) noexcept {
162+
runKernelBody<sinhf>(NumElements, Out, X);
163+
}
164+
165+
__gpu_kernel void sinpifKernel(const float *X, float *Out,
166+
size_t NumElements) noexcept {
167+
runKernelBody<sinpif>(NumElements, Out, X);
168+
}
169+
170+
__gpu_kernel void tanfKernel(const float *X, float *Out,
171+
size_t NumElements) noexcept {
172+
runKernelBody<tanf>(NumElements, Out, X);
173+
}
174+
175+
__gpu_kernel void tanhfKernel(const float *X, float *Out,
176+
size_t NumElements) noexcept {
177+
runKernelBody<tanhf>(NumElements, Out, X);
178+
}
36179

37-
if (Index < NumElements)
38-
Out[Index] = logf(X[Index]);
180+
__gpu_kernel void tanpifKernel(const float *X, float *Out,
181+
size_t NumElements) noexcept {
182+
runKernelBody<tanpif>(NumElements, Out, X);
39183
}
40184
} // extern "C"
Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
///
9+
/// \file
10+
/// This file contains the conformance test of the acosf function.
11+
///
12+
//===----------------------------------------------------------------------===//
13+
14+
#include "mathtest/CommandLineExtras.hpp"
15+
#include "mathtest/ExhaustiveGenerator.hpp"
16+
#include "mathtest/IndexedRange.hpp"
17+
#include "mathtest/TestConfig.hpp"
18+
#include "mathtest/TestRunner.hpp"
19+
20+
#include "llvm/ADT/StringRef.h"
21+
22+
#include <cstdlib>
23+
#include <math.h>
24+
25+
namespace mathtest {
26+
27+
template <> struct FunctionConfig<acosf> {
28+
static constexpr llvm::StringRef Name = "acosf";
29+
static constexpr llvm::StringRef KernelName = "acosfKernel";
30+
31+
// Source: The Khronos Group, The OpenCL C Specification v3.0.19, Sec. 7.4,
32+
// Table 65, Khronos Registry [July 10, 2025].
33+
static constexpr uint64_t UlpTolerance = 4;
34+
};
35+
} // namespace mathtest
36+
37+
int main(int argc, const char **argv) {
38+
llvm::cl::ParseCommandLineOptions(argc, argv,
39+
"Conformance test of the acosf function");
40+
41+
using namespace mathtest;
42+
43+
IndexedRange<float> Range;
44+
ExhaustiveGenerator<float> Generator(Range);
45+
46+
const auto Configs = cl::getTestConfigs();
47+
const llvm::StringRef DeviceBinaryDir = DEVICE_BINARY_DIR;
48+
const bool IsVerbose = cl::IsVerbose;
49+
50+
bool Passed = runTests<acosf>(Generator, Configs, DeviceBinaryDir, IsVerbose);
51+
52+
return Passed ? EXIT_SUCCESS : EXIT_FAILURE;
53+
}
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
//===----------------------------------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
///
9+
/// \file
10+
/// This file contains the conformance test of the acoshf function.
11+
///
12+
//===----------------------------------------------------------------------===//
13+
14+
#include "mathtest/CommandLineExtras.hpp"
15+
#include "mathtest/ExhaustiveGenerator.hpp"
16+
#include "mathtest/IndexedRange.hpp"
17+
#include "mathtest/TestConfig.hpp"
18+
#include "mathtest/TestRunner.hpp"
19+
20+
#include "llvm/ADT/StringRef.h"
21+
22+
#include <cstdlib>
23+
#include <limits>
24+
#include <math.h>
25+
26+
namespace mathtest {
27+
28+
template <> struct FunctionConfig<acoshf> {
29+
static constexpr llvm::StringRef Name = "acoshf";
30+
static constexpr llvm::StringRef KernelName = "acoshfKernel";
31+
32+
// Source: The Khronos Group, The OpenCL C Specification v3.0.19, Sec. 7.4,
33+
// Table 65, Khronos Registry [July 10, 2025].
34+
static constexpr uint64_t UlpTolerance = 4;
35+
};
36+
} // namespace mathtest
37+
38+
int main(int argc, const char **argv) {
39+
llvm::cl::ParseCommandLineOptions(argc, argv,
40+
"Conformance test of the acoshf function");
41+
42+
using namespace mathtest;
43+
44+
IndexedRange<float> Range(/*Begin=*/1.0f,
45+
/*End=*/std::numeric_limits<float>::infinity(),
46+
/*Inclusive=*/true);
47+
ExhaustiveGenerator<float> Generator(Range);
48+
49+
const auto Configs = cl::getTestConfigs();
50+
const llvm::StringRef DeviceBinaryDir = DEVICE_BINARY_DIR;
51+
const bool IsVerbose = cl::IsVerbose;
52+
53+
bool Passed =
54+
runTests<acoshf>(Generator, Configs, DeviceBinaryDir, IsVerbose);
55+
56+
return Passed ? EXIT_SUCCESS : EXIT_FAILURE;
57+
}

0 commit comments

Comments
 (0)