From 67e258ac8609f8d298fe1b9f6a4f8c1113d151d5 Mon Sep 17 00:00:00 2001 From: Leandro Augusto Lacerda Campos Date: Thu, 21 Aug 2025 00:21:06 -0300 Subject: [PATCH 1/3] Add tests for single-precision bivariate math functions --- .../Conformance/device_code/CUDAMath.cpp | 25 +++++++ .../Conformance/device_code/DeviceAPIs.hpp | 8 ++ .../Conformance/device_code/HIPMath.cpp | 25 +++++++ .../Conformance/device_code/LLVMLibm.cpp | 25 +++++++ .../Conformance/tests/Atan2fTest.cpp | 58 +++++++++++++++ .../Conformance/tests/CMakeLists.txt | 3 + .../Conformance/tests/HypotfTest.cpp | 58 +++++++++++++++ .../unittests/Conformance/tests/PowfTest.cpp | 74 +++++++++++++++++++ 8 files changed, 276 insertions(+) create mode 100644 offload/unittests/Conformance/tests/Atan2fTest.cpp create mode 100644 offload/unittests/Conformance/tests/HypotfTest.cpp create mode 100644 offload/unittests/Conformance/tests/PowfTest.cpp diff --git a/offload/unittests/Conformance/device_code/CUDAMath.cpp b/offload/unittests/Conformance/device_code/CUDAMath.cpp index 86c5d698d80af..d47607a7c862e 100644 --- a/offload/unittests/Conformance/device_code/CUDAMath.cpp +++ b/offload/unittests/Conformance/device_code/CUDAMath.cpp @@ -26,6 +26,10 @@ using namespace kernels; // Helpers //===----------------------------------------------------------------------===// +static inline float powfRoundedExponent(float Base, float Exponent) { + return __nv_powf(Base, __nv_roundf(Exponent)); +} + static inline float sincosfSin(float X) { float SinX, CosX; __nv_sincosf(X, &SinX, &CosX); @@ -69,6 +73,11 @@ __gpu_kernel void atanfKernel(const float *X, float *Out, runKernelBody<__nv_atanf>(NumElements, Out, X); } +__gpu_kernel void atan2fKernel(const float *X, const float *Y, float *Out, + size_t NumElements) noexcept { + runKernelBody<__nv_atan2f>(NumElements, Out, X, Y); +} + __gpu_kernel void atanhfKernel(const float *X, float *Out, size_t NumElements) noexcept { runKernelBody<__nv_atanhf>(NumElements, Out, X); @@ -119,6 +128,11 @@ __gpu_kernel void expm1fKernel(const float *X, float *Out, runKernelBody<__nv_expm1f>(NumElements, Out, X); } +__gpu_kernel void hypotfKernel(const float *X, float *Y, float *Out, + size_t NumElements) noexcept { + runKernelBody<__nv_hypotf>(NumElements, Out, X, Y); +} + __gpu_kernel void logKernel(const double *X, double *Out, size_t NumElements) noexcept { runKernelBody<__nv_log>(NumElements, Out, X); @@ -144,6 +158,17 @@ __gpu_kernel void log2fKernel(const float *X, float *Out, runKernelBody<__nv_log2f>(NumElements, Out, X); } +__gpu_kernel void powfKernel(const float *X, float *Y, float *Out, + size_t NumElements) noexcept { + runKernelBody<__nv_powf>(NumElements, Out, X, Y); +} + +__gpu_kernel void powfRoundedExponentKernel(const float *X, float *Y, + float *Out, + size_t NumElements) noexcept { + runKernelBody(NumElements, Out, X, Y); +} + __gpu_kernel void sinfKernel(const float *X, float *Out, size_t NumElements) noexcept { runKernelBody<__nv_sinf>(NumElements, Out, X); diff --git a/offload/unittests/Conformance/device_code/DeviceAPIs.hpp b/offload/unittests/Conformance/device_code/DeviceAPIs.hpp index 7941a05010cc7..6504fff125640 100644 --- a/offload/unittests/Conformance/device_code/DeviceAPIs.hpp +++ b/offload/unittests/Conformance/device_code/DeviceAPIs.hpp @@ -53,6 +53,7 @@ float __nv_acoshf(float); float __nv_asinf(float); float __nv_asinhf(float); float __nv_atanf(float); +float __nv_atan2f(float, float); float __nv_atanhf(float); float __nv_cbrtf(float); float __nv_cosf(float); @@ -63,11 +64,14 @@ float __nv_expf(float); float __nv_exp10f(float); float __nv_exp2f(float); float __nv_expm1f(float); +float __nv_hypotf(float, float); double __nv_log(double); float __nv_logf(float); float __nv_log10f(float); float __nv_log1pf(float); float __nv_log2f(float); +float __nv_powf(float, float); +float __nv_roundf(float); float __nv_sinf(float); void __nv_sincosf(float, float *, float *); float __nv_sinhf(float); @@ -87,6 +91,7 @@ float __ocml_acosh_f32(float); float __ocml_asin_f32(float); float __ocml_asinh_f32(float); float __ocml_atan_f32(float); +float __ocml_atan2_f32(float, float); float __ocml_atanh_f32(float); float __ocml_cbrt_f32(float); float __ocml_cos_f32(float); @@ -97,11 +102,14 @@ float __ocml_exp_f32(float); float __ocml_exp10_f32(float); float __ocml_exp2_f32(float); float __ocml_expm1_f32(float); +float __ocml_hypot_f32(float, float); double __ocml_log_f64(double); float __ocml_log_f32(float); float __ocml_log10_f32(float); float __ocml_log1p_f32(float); float __ocml_log2_f32(float); +float __ocml_pow_f32(float, float); +float __ocml_round_f32(float); float __ocml_sin_f32(float); float __ocml_sincos_f32(float, float *); float __ocml_sinh_f32(float); diff --git a/offload/unittests/Conformance/device_code/HIPMath.cpp b/offload/unittests/Conformance/device_code/HIPMath.cpp index 55f67669872c5..e440be05a5aba 100644 --- a/offload/unittests/Conformance/device_code/HIPMath.cpp +++ b/offload/unittests/Conformance/device_code/HIPMath.cpp @@ -26,6 +26,10 @@ using namespace kernels; // Helpers //===----------------------------------------------------------------------===// +static inline float powfRoundedExponent(float Base, float Exponent) { + return __ocml_pow_f32(Base, __ocml_round_f32(Exponent)); +} + static inline float sincosfSin(float X) { float CosX; float SinX = __ocml_sincos_f32(X, &CosX); @@ -69,6 +73,11 @@ __gpu_kernel void atanfKernel(const float *X, float *Out, runKernelBody<__ocml_atan_f32>(NumElements, Out, X); } +__gpu_kernel void atan2fKernel(const float *X, const float *Y, float *Out, + size_t NumElements) noexcept { + runKernelBody<__ocml_atan2_f32>(NumElements, Out, X, Y); +} + __gpu_kernel void atanhfKernel(const float *X, float *Out, size_t NumElements) noexcept { runKernelBody<__ocml_atanh_f32>(NumElements, Out, X); @@ -119,6 +128,11 @@ __gpu_kernel void expm1fKernel(const float *X, float *Out, runKernelBody<__ocml_expm1_f32>(NumElements, Out, X); } +__gpu_kernel void hypotfKernel(const float *X, float *Y, float *Out, + size_t NumElements) noexcept { + runKernelBody__ocml_hypot_f32 > (NumElements, Out, X, Y); +} + __gpu_kernel void logKernel(const double *X, double *Out, size_t NumElements) noexcept { runKernelBody<__ocml_log_f64>(NumElements, Out, X); @@ -144,6 +158,17 @@ __gpu_kernel void log2fKernel(const float *X, float *Out, runKernelBody<__ocml_log2_f32>(NumElements, Out, X); } +__gpu_kernel void powfKernel(const float *X, float *Y, float *Out, + size_t NumElements) noexcept { + runKernelBody<__ocml_pow_f32>(NumElements, Out, X, Y); +} + +__gpu_kernel void powfRoundedExponentKernel(const float *X, float *Y, + float *Out, + size_t NumElements) noexcept { + runKernelBody(NumElements, Out, X, Y); +} + __gpu_kernel void sinfKernel(const float *X, float *Out, size_t NumElements) noexcept { runKernelBody<__ocml_sin_f32>(NumElements, Out, X); diff --git a/offload/unittests/Conformance/device_code/LLVMLibm.cpp b/offload/unittests/Conformance/device_code/LLVMLibm.cpp index cf33e0a86e94c..20ad796c6d172 100644 --- a/offload/unittests/Conformance/device_code/LLVMLibm.cpp +++ b/offload/unittests/Conformance/device_code/LLVMLibm.cpp @@ -25,6 +25,10 @@ using namespace kernels; // Helpers //===----------------------------------------------------------------------===// +static inline float powfRoundedExponent(float Base, float Exponent) { + return powf(Base, roundf(Exponent)); +} + static inline float sincosfSin(float X) { float SinX, CosX; sincosf(X, &SinX, &CosX); @@ -68,6 +72,11 @@ __gpu_kernel void atanfKernel(const float *X, float *Out, runKernelBody(NumElements, Out, X); } +__gpu_kernel void atan2fKernel(const float *X, const float *Y, float *Out, + size_t NumElements) noexcept { + runKernelBody(NumElements, Out, X, Y); +} + __gpu_kernel void atanhfKernel(const float *X, float *Out, size_t NumElements) noexcept { runKernelBody(NumElements, Out, X); @@ -118,6 +127,11 @@ __gpu_kernel void expm1fKernel(const float *X, float *Out, runKernelBody(NumElements, Out, X); } +__gpu_kernel void hypotfKernel(const float *X, float *Y, float *Out, + size_t NumElements) noexcept { + runKernelBody(NumElements, Out, X, Y); +} + __gpu_kernel void hypotf16Kernel(const float16 *X, float16 *Y, float16 *Out, size_t NumElements) noexcept { runKernelBody(NumElements, Out, X, Y); @@ -148,6 +162,17 @@ __gpu_kernel void log2fKernel(const float *X, float *Out, runKernelBody(NumElements, Out, X); } +__gpu_kernel void powfKernel(const float *X, float *Y, float *Out, + size_t NumElements) noexcept { + runKernelBody(NumElements, Out, X, Y); +} + +__gpu_kernel void powfRoundedExponentKernel(const float *X, float *Y, + float *Out, + size_t NumElements) noexcept { + runKernelBody(NumElements, Out, X, Y); +} + __gpu_kernel void sinfKernel(const float *X, float *Out, size_t NumElements) noexcept { runKernelBody(NumElements, Out, X); diff --git a/offload/unittests/Conformance/tests/Atan2fTest.cpp b/offload/unittests/Conformance/tests/Atan2fTest.cpp new file mode 100644 index 0000000000000..4a46f9a61540a --- /dev/null +++ b/offload/unittests/Conformance/tests/Atan2fTest.cpp @@ -0,0 +1,58 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the conformance test of the atan2f function. +/// +//===----------------------------------------------------------------------===// + +#include "mathtest/CommandLineExtras.hpp" +#include "mathtest/IndexedRange.hpp" +#include "mathtest/RandomGenerator.hpp" +#include "mathtest/RandomState.hpp" +#include "mathtest/TestConfig.hpp" +#include "mathtest/TestRunner.hpp" + +#include "llvm/ADT/StringRef.h" + +#include +#include + +namespace mathtest { + +template <> struct FunctionConfig { + static constexpr llvm::StringRef Name = "atan2f"; + static constexpr llvm::StringRef KernelName = "atan2fKernel"; + + // Source: The Khronos Group, The OpenCL C Specification v3.0.19, Sec. 7.4, + // Table 65, Khronos Registry [July 10, 2025]. + static constexpr uint64_t UlpTolerance = 6; +}; +} // namespace mathtest + +int main(int argc, const char **argv) { + llvm::cl::ParseCommandLineOptions(argc, argv, + "Conformance test of the atan2f function"); + + using namespace mathtest; + + uint64_t Seed = 42; + uint64_t Size = 1ULL << 32; + IndexedRange RangeX; + IndexedRange RangeY; + RandomGenerator Generator(SeedTy{Seed}, Size, RangeX, RangeY); + + const auto Configs = cl::getTestConfigs(); + const llvm::StringRef DeviceBinaryDir = DEVICE_BINARY_DIR; + const bool IsVerbose = cl::IsVerbose; + + bool Passed = + runTests(Generator, Configs, DeviceBinaryDir, IsVerbose); + + return Passed ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/offload/unittests/Conformance/tests/CMakeLists.txt b/offload/unittests/Conformance/tests/CMakeLists.txt index 7d45e7a8a5865..0bac08d6ed919 100644 --- a/offload/unittests/Conformance/tests/CMakeLists.txt +++ b/offload/unittests/Conformance/tests/CMakeLists.txt @@ -8,6 +8,7 @@ add_conformance_test(acoshf AcoshfTest.cpp) add_conformance_test(asinf AsinfTest.cpp) add_conformance_test(asinhf AsinhfTest.cpp) add_conformance_test(atanf AtanfTest.cpp) +add_conformance_test(atan2f Atan2fTest.cpp) add_conformance_test(atanhf AtanhfTest.cpp) add_conformance_test(cbrtf CbrtfTest.cpp) add_conformance_test(cosf CosfTest.cpp) @@ -18,12 +19,14 @@ add_conformance_test(expf ExpfTest.cpp) add_conformance_test(exp10f Exp10fTest.cpp) add_conformance_test(exp2f Exp2fTest.cpp) add_conformance_test(expm1f Expm1fTest.cpp) +add_conformance_test(hypotf HypotfTest.cpp) add_conformance_test(hypotf16 Hypotf16Test.cpp) add_conformance_test(log LogTest.cpp) add_conformance_test(logf LogfTest.cpp) add_conformance_test(log10f Log10fTest.cpp) add_conformance_test(log1pf Log1pfTest.cpp) add_conformance_test(log2f Log2fTest.cpp) +add_conformance_test(powf PowfTest.cpp) add_conformance_test(sinf SinfTest.cpp) add_conformance_test(sincosf SincosfTest.cpp) add_conformance_test(sinhf SinhfTest.cpp) diff --git a/offload/unittests/Conformance/tests/HypotfTest.cpp b/offload/unittests/Conformance/tests/HypotfTest.cpp new file mode 100644 index 0000000000000..98a4e906920d5 --- /dev/null +++ b/offload/unittests/Conformance/tests/HypotfTest.cpp @@ -0,0 +1,58 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the conformance test of the hypotf function. +/// +//===----------------------------------------------------------------------===// + +#include "mathtest/CommandLineExtras.hpp" +#include "mathtest/IndexedRange.hpp" +#include "mathtest/RandomGenerator.hpp" +#include "mathtest/RandomState.hpp" +#include "mathtest/TestConfig.hpp" +#include "mathtest/TestRunner.hpp" + +#include "llvm/ADT/StringRef.h" + +#include +#include + +namespace mathtest { + +template <> struct FunctionConfig { + static constexpr llvm::StringRef Name = "hypotf"; + static constexpr llvm::StringRef KernelName = "hypotfKernel"; + + // Source: The Khronos Group, The OpenCL C Specification v3.0.19, Sec. 7.4, + // Table 65, Khronos Registry [July 10, 2025]. + static constexpr uint64_t UlpTolerance = 4; +}; +} // namespace mathtest + +int main(int argc, const char **argv) { + llvm::cl::ParseCommandLineOptions(argc, argv, + "Conformance test of the hypotf function"); + + using namespace mathtest; + + uint64_t Seed = 42; + uint64_t Size = 1ULL << 32; + IndexedRange RangeX; + IndexedRange RangeY; + RandomGenerator Generator(SeedTy{Seed}, Size, RangeX, RangeY); + + const auto Configs = cl::getTestConfigs(); + const llvm::StringRef DeviceBinaryDir = DEVICE_BINARY_DIR; + const bool IsVerbose = cl::IsVerbose; + + bool Passed = + runTests(Generator, Configs, DeviceBinaryDir, IsVerbose); + + return Passed ? EXIT_SUCCESS : EXIT_FAILURE; +} diff --git a/offload/unittests/Conformance/tests/PowfTest.cpp b/offload/unittests/Conformance/tests/PowfTest.cpp new file mode 100644 index 0000000000000..b07434c9dff39 --- /dev/null +++ b/offload/unittests/Conformance/tests/PowfTest.cpp @@ -0,0 +1,74 @@ +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// This file contains the conformance test of the powf function. +/// +//===----------------------------------------------------------------------===// + +#include "mathtest/CommandLineExtras.hpp" +#include "mathtest/IndexedRange.hpp" +#include "mathtest/RandomGenerator.hpp" +#include "mathtest/RandomState.hpp" +#include "mathtest/TestConfig.hpp" +#include "mathtest/TestRunner.hpp" + +#include "llvm/ADT/StringRef.h" + +#include +#include + +static inline float powfRoundedExponent(float Base, float Exponent) { + return powf(Base, roundf(Exponent)); +} + +namespace mathtest { + +template <> struct FunctionConfig { + static constexpr llvm::StringRef Name = "powf (real exponent)"; + static constexpr llvm::StringRef KernelName = "powfKernel"; + + // Source: The Khronos Group, The OpenCL C Specification v3.0.19, Sec. 7.4, + // Table 65, Khronos Registry [July 10, 2025]. + static constexpr uint64_t UlpTolerance = 16; +}; + +template <> struct FunctionConfig { + static constexpr llvm::StringRef Name = "powf (integer exponent)"; + static constexpr llvm::StringRef KernelName = "powfRoundedExponentKernel"; + + // Source: The Khronos Group, The OpenCL C Specification v3.0.19, Sec. 7.4, + // Table 65, Khronos Registry [July 10, 2025]. + static constexpr uint64_t UlpTolerance = 16; +}; +} // namespace mathtest + +int main(int argc, const char **argv) { + llvm::cl::ParseCommandLineOptions(argc, argv, + "Conformance test of the powf function"); + + using namespace mathtest; + + uint64_t Size = 1ULL << 32; + IndexedRange RangeX; + IndexedRange RangeY; + RandomGenerator Generator0(SeedTy{42}, Size, RangeX, RangeY); + RandomGenerator Generator1(SeedTy{51}, Size, RangeX, RangeY); + + const auto Configs = cl::getTestConfigs(); + const llvm::StringRef DeviceBinaryDir = DEVICE_BINARY_DIR; + const bool IsVerbose = cl::IsVerbose; + + bool RealExponentPassed = + runTests(Generator0, Configs, DeviceBinaryDir, IsVerbose); + bool IntegerExponentPassed = runTests( + Generator1, Configs, DeviceBinaryDir, IsVerbose); + + return (RealExponentPassed && IntegerExponentPassed) ? EXIT_SUCCESS + : EXIT_FAILURE; +} From ded6916d3bd02e1fcde27e59b6f9c7b19dba9d08 Mon Sep 17 00:00:00 2001 From: Leandro Augusto Lacerda Campos Date: Thu, 21 Aug 2025 00:46:01 -0300 Subject: [PATCH 2/3] Fix pluralization of 'exponent' --- offload/unittests/Conformance/tests/PowfTest.cpp | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/offload/unittests/Conformance/tests/PowfTest.cpp b/offload/unittests/Conformance/tests/PowfTest.cpp index b07434c9dff39..246801e390aea 100644 --- a/offload/unittests/Conformance/tests/PowfTest.cpp +++ b/offload/unittests/Conformance/tests/PowfTest.cpp @@ -30,7 +30,7 @@ static inline float powfRoundedExponent(float Base, float Exponent) { namespace mathtest { template <> struct FunctionConfig { - static constexpr llvm::StringRef Name = "powf (real exponent)"; + static constexpr llvm::StringRef Name = "powf (real exponents)"; static constexpr llvm::StringRef KernelName = "powfKernel"; // Source: The Khronos Group, The OpenCL C Specification v3.0.19, Sec. 7.4, @@ -39,7 +39,7 @@ template <> struct FunctionConfig { }; template <> struct FunctionConfig { - static constexpr llvm::StringRef Name = "powf (integer exponent)"; + static constexpr llvm::StringRef Name = "powf (integer exponents)"; static constexpr llvm::StringRef KernelName = "powfRoundedExponentKernel"; // Source: The Khronos Group, The OpenCL C Specification v3.0.19, Sec. 7.4, @@ -64,11 +64,11 @@ int main(int argc, const char **argv) { const llvm::StringRef DeviceBinaryDir = DEVICE_BINARY_DIR; const bool IsVerbose = cl::IsVerbose; - bool RealExponentPassed = + bool RealExponentsPassed = runTests(Generator0, Configs, DeviceBinaryDir, IsVerbose); - bool IntegerExponentPassed = runTests( + bool IntegerExponentsPassed = runTests( Generator1, Configs, DeviceBinaryDir, IsVerbose); - return (RealExponentPassed && IntegerExponentPassed) ? EXIT_SUCCESS - : EXIT_FAILURE; + return (RealExponentsPassed && IntegerExponentsPassed) ? EXIT_SUCCESS + : EXIT_FAILURE; } From 5e387e2a766a645279750348651af6c50cc04230 Mon Sep 17 00:00:00 2001 From: Leandro Augusto Lacerda Campos Date: Thu, 21 Aug 2025 12:57:26 -0300 Subject: [PATCH 3/3] Correct template syntax in `hypotfKernel` --- offload/unittests/Conformance/device_code/HIPMath.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/offload/unittests/Conformance/device_code/HIPMath.cpp b/offload/unittests/Conformance/device_code/HIPMath.cpp index e440be05a5aba..74a7f5c3a9492 100644 --- a/offload/unittests/Conformance/device_code/HIPMath.cpp +++ b/offload/unittests/Conformance/device_code/HIPMath.cpp @@ -130,7 +130,7 @@ __gpu_kernel void expm1fKernel(const float *X, float *Out, __gpu_kernel void hypotfKernel(const float *X, float *Y, float *Out, size_t NumElements) noexcept { - runKernelBody__ocml_hypot_f32 > (NumElements, Out, X, Y); + runKernelBody<__ocml_hypot_f32>(NumElements, Out, X, Y); } __gpu_kernel void logKernel(const double *X, double *Out,