|
| 1 | +// Copyright 2020 Google LLC |
| 2 | +// SPDX-License-Identifier: Apache-2.0 |
| 3 | +// |
| 4 | +// Licensed under the Apache License, Version 2.0 (the "License"); |
| 5 | +// you may not use this file except in compliance with the License. |
| 6 | +// You may obtain a copy of the License at |
| 7 | +// |
| 8 | +// http://www.apache.org/licenses/LICENSE-2.0 |
| 9 | +// |
| 10 | +// Unless required by applicable law or agreed to in writing, software |
| 11 | +// distributed under the License is distributed on an "AS IS" BASIS, |
| 12 | +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 13 | +// See the License for the specific language governing permissions and |
| 14 | +// limitations under the License. |
| 15 | + |
| 16 | +#include <stdint.h> |
| 17 | +#include <stdio.h> |
| 18 | + |
| 19 | +#include <cfloat> // FLT_MAX |
| 20 | +#include <cmath> // std::abs |
| 21 | + |
| 22 | +#include "hwy/base.h" |
| 23 | + |
| 24 | +// clang-format off |
| 25 | +#undef HWY_TARGET_INCLUDE |
| 26 | +#define HWY_TARGET_INCLUDE "hwy/contrib/math/math_hyper_test.cc" |
| 27 | +#include "hwy/foreach_target.h" // IWYU pragma: keep |
| 28 | +#include "hwy/highway.h" |
| 29 | +#include "hwy/contrib/math/math-inl.h" |
| 30 | +#include "hwy/tests/test_util-inl.h" |
| 31 | +// clang-format on |
| 32 | + |
| 33 | +HWY_BEFORE_NAMESPACE(); |
| 34 | +namespace hwy { |
| 35 | +namespace HWY_NAMESPACE { |
| 36 | +namespace { |
| 37 | + |
| 38 | +// We have had test failures caused by excess precision due to keeping |
| 39 | +// intermediate results in 80-bit x87 registers. One such failure mode is that |
| 40 | +// Log1p computes a 1.0 which is not exactly equal to 1.0f, causing is_pole to |
| 41 | +// incorrectly evaluate to false. |
| 42 | +#undef HWY_MATH_TEST_EXCESS_PRECISION |
| 43 | +#if HWY_ARCH_X86_32 && HWY_COMPILER_GCC_ACTUAL && \ |
| 44 | + (HWY_TARGET == HWY_SCALAR || HWY_TARGET == HWY_EMU128) |
| 45 | + |
| 46 | +// GCC 13+: because CMAKE_CXX_EXTENSIONS is OFF, we build with -std= and hence |
| 47 | +// also -fexcess-precision=standard, so there is no problem. See #1708 and |
| 48 | +// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=323. |
| 49 | +#if HWY_COMPILER_GCC_ACTUAL >= 1300 |
| 50 | +#define HWY_MATH_TEST_EXCESS_PRECISION 0 |
| 51 | + |
| 52 | +#else // HWY_COMPILER_GCC_ACTUAL < 1300 |
| 53 | + |
| 54 | +// The build system must enable SSE2, e.g. via HWY_CMAKE_SSE2 - see |
| 55 | +// https://stackoverflow.com/questions/20869904/c-handling-of-excess-precision . |
| 56 | +#if defined(__SSE2__) // correct flag given, no problem |
| 57 | +#define HWY_MATH_TEST_EXCESS_PRECISION 0 |
| 58 | +#else |
| 59 | +#define HWY_MATH_TEST_EXCESS_PRECISION 1 |
| 60 | +#pragma message( \ |
| 61 | + "Skipping scalar math_test on 32-bit x86 GCC <13 without HWY_CMAKE_SSE2") |
| 62 | +#endif // defined(__SSE2__) |
| 63 | + |
| 64 | +#endif // HWY_COMPILER_GCC_ACTUAL |
| 65 | +#else // not (x86-32, GCC, scalar target): running math_test normally |
| 66 | +#define HWY_MATH_TEST_EXCESS_PRECISION 0 |
| 67 | +#endif // HWY_ARCH_X86_32 etc |
| 68 | + |
| 69 | +template <class T, class D> |
| 70 | +HWY_NOINLINE void TestMath(const char* name, T (*fx1)(T), |
| 71 | + Vec<D> (*fxN)(D, VecArg<Vec<D>>), D d, T min, T max, |
| 72 | + uint64_t max_error_ulp) { |
| 73 | + if (HWY_MATH_TEST_EXCESS_PRECISION) { |
| 74 | + static bool once = true; |
| 75 | + if (once) { |
| 76 | + once = false; |
| 77 | + HWY_WARN("Skipping math_test due to GCC issue with excess precision.\n"); |
| 78 | + } |
| 79 | + return; |
| 80 | + } |
| 81 | + |
| 82 | + using UintT = MakeUnsigned<T>; |
| 83 | + |
| 84 | + const UintT min_bits = BitCastScalar<UintT>(min); |
| 85 | + const UintT max_bits = BitCastScalar<UintT>(max); |
| 86 | + |
| 87 | + // If min is negative and max is positive, the range needs to be broken into |
| 88 | + // two pieces, [+0, max] and [-0, min], otherwise [min, max]. |
| 89 | + int range_count = 1; |
| 90 | + UintT ranges[2][2] = {{min_bits, max_bits}, {0, 0}}; |
| 91 | + if ((min < 0.0) && (max > 0.0)) { |
| 92 | + ranges[0][0] = BitCastScalar<UintT>(ConvertScalarTo<T>(+0.0)); |
| 93 | + ranges[0][1] = max_bits; |
| 94 | + ranges[1][0] = BitCastScalar<UintT>(ConvertScalarTo<T>(-0.0)); |
| 95 | + ranges[1][1] = min_bits; |
| 96 | + range_count = 2; |
| 97 | + } |
| 98 | + |
| 99 | + uint64_t max_ulp = 0; |
| 100 | + // Emulation is slower, so cannot afford as many. |
| 101 | + constexpr UintT kSamplesPerRange = static_cast<UintT>(AdjustedReps(4000)); |
| 102 | + for (int range_index = 0; range_index < range_count; ++range_index) { |
| 103 | + const UintT start = ranges[range_index][0]; |
| 104 | + const UintT stop = ranges[range_index][1]; |
| 105 | + const UintT step = HWY_MAX(1, ((stop - start) / kSamplesPerRange)); |
| 106 | + for (UintT value_bits = start; value_bits <= stop; value_bits += step) { |
| 107 | + // For reasons unknown, the HWY_MAX is necessary on RVV, otherwise |
| 108 | + // value_bits can be less than start, and thus possibly NaN. |
| 109 | + const T value = |
| 110 | + BitCastScalar<T>(HWY_MIN(HWY_MAX(start, value_bits), stop)); |
| 111 | + const T actual = GetLane(fxN(d, Set(d, value))); |
| 112 | + const T expected = fx1(value); |
| 113 | + |
| 114 | + // Skip small inputs and outputs on armv7, it flushes subnormals to zero. |
| 115 | +#if HWY_TARGET <= HWY_NEON_WITHOUT_AES && HWY_ARCH_ARM_V7 |
| 116 | + if ((std::abs(value) < 1e-37f) || (std::abs(expected) < 1e-37f)) { |
| 117 | + continue; |
| 118 | + } |
| 119 | +#endif |
| 120 | + |
| 121 | + const auto ulp = hwy::detail::ComputeUlpDelta(actual, expected); |
| 122 | + max_ulp = HWY_MAX(max_ulp, ulp); |
| 123 | + if (ulp > max_error_ulp) { |
| 124 | + fprintf(stderr, "%s: %s(%f) expected %E actual %E ulp %g max ulp %u\n", |
| 125 | + hwy::TypeName(T(), Lanes(d)).c_str(), name, value, expected, |
| 126 | + actual, static_cast<double>(ulp), |
| 127 | + static_cast<uint32_t>(max_error_ulp)); |
| 128 | + } |
| 129 | + } |
| 130 | + } |
| 131 | + fprintf(stderr, "%s: %s max_ulp %g\n", hwy::TypeName(T(), Lanes(d)).c_str(), |
| 132 | + name, static_cast<double>(max_ulp)); |
| 133 | + HWY_ASSERT(max_ulp <= max_error_ulp); |
| 134 | +} |
| 135 | + |
| 136 | +#define DEFINE_MATH_TEST_FUNC(NAME) \ |
| 137 | + HWY_NOINLINE void TestAll##NAME() { \ |
| 138 | + ForFloat3264Types(ForPartialVectors<Test##NAME>()); \ |
| 139 | + } |
| 140 | + |
| 141 | +#undef DEFINE_MATH_TEST |
| 142 | +#define DEFINE_MATH_TEST(NAME, F32x1, F32xN, F32_MIN, F32_MAX, F32_ERROR, \ |
| 143 | + F64x1, F64xN, F64_MIN, F64_MAX, F64_ERROR) \ |
| 144 | + struct Test##NAME { \ |
| 145 | + template <class T, class D> \ |
| 146 | + HWY_NOINLINE void operator()(T, D d) { \ |
| 147 | + if (sizeof(T) == 4) { \ |
| 148 | + TestMath<T, D>(HWY_STR(NAME), F32x1, F32xN, d, F32_MIN, F32_MAX, \ |
| 149 | + F32_ERROR); \ |
| 150 | + } else { \ |
| 151 | + TestMath<T, D>(HWY_STR(NAME), F64x1, F64xN, d, \ |
| 152 | + static_cast<T>(F64_MIN), static_cast<T>(F64_MAX), \ |
| 153 | + F64_ERROR); \ |
| 154 | + } \ |
| 155 | + } \ |
| 156 | + }; \ |
| 157 | + DEFINE_MATH_TEST_FUNC(NAME) |
| 158 | + |
| 159 | +// Floating point values closest to but less than 1.0. Avoid variables with |
| 160 | +// static initializers inside HWY_BEFORE_NAMESPACE/HWY_AFTER_NAMESPACE to |
| 161 | +// ensure target-specific code does not leak into startup code. |
| 162 | +float kNearOneF() { return BitCastScalar<float>(0x3F7FFFFF); } |
| 163 | +double kNearOneD() { return BitCastScalar<double>(0x3FEFFFFFFFFFFFFFULL); } |
| 164 | + |
| 165 | +constexpr uint64_t ACosh32ULP() { |
| 166 | +#if defined(__MINGW32__) |
| 167 | + return 8; |
| 168 | +#else |
| 169 | + return 3; |
| 170 | +#endif |
| 171 | +} |
| 172 | + |
| 173 | +// clang-format off |
| 174 | +DEFINE_MATH_TEST(Acosh, |
| 175 | + std::acosh, CallAcosh, +1.0f, +FLT_MAX, ACosh32ULP(), |
| 176 | + std::acosh, CallAcosh, +1.0, +DBL_MAX, 3) |
| 177 | +DEFINE_MATH_TEST(Asinh, |
| 178 | + std::asinh, CallAsinh, -FLT_MAX, +FLT_MAX, 3, |
| 179 | + std::asinh, CallAsinh, -DBL_MAX, +DBL_MAX, 3) |
| 180 | +// NEON has ULP 4 instead of 3 |
| 181 | +DEFINE_MATH_TEST(Atanh, |
| 182 | + std::atanh, CallAtanh, -kNearOneF(), +kNearOneF(), 4, |
| 183 | + std::atanh, CallAtanh, -kNearOneD(), +kNearOneD(), 3) |
| 184 | +DEFINE_MATH_TEST(Sinh, |
| 185 | + std::sinh, CallSinh, -80.0f, +80.0f, 4, |
| 186 | + std::sinh, CallSinh, -709.0, +709.0, 4) |
| 187 | +DEFINE_MATH_TEST(Tanh, |
| 188 | + std::tanh, CallTanh, -FLT_MAX, +FLT_MAX, 4, |
| 189 | + std::tanh, CallTanh, -DBL_MAX, +DBL_MAX, 4) |
| 190 | +// clang-format on |
| 191 | + |
| 192 | +} // namespace |
| 193 | +// NOLINTNEXTLINE(google-readability-namespace-comments) |
| 194 | +} // namespace HWY_NAMESPACE |
| 195 | +} // namespace hwy |
| 196 | +HWY_AFTER_NAMESPACE(); |
| 197 | + |
| 198 | +#if HWY_ONCE |
| 199 | +namespace hwy { |
| 200 | +namespace { |
| 201 | +HWY_BEFORE_TEST(HwyMathHyperTest); |
| 202 | +HWY_EXPORT_AND_TEST_P(HwyMathHyperTest, TestAllAcosh); |
| 203 | +HWY_EXPORT_AND_TEST_P(HwyMathHyperTest, TestAllAsinh); |
| 204 | +HWY_EXPORT_AND_TEST_P(HwyMathHyperTest, TestAllAtanh); |
| 205 | +HWY_EXPORT_AND_TEST_P(HwyMathHyperTest, TestAllSinh); |
| 206 | +HWY_EXPORT_AND_TEST_P(HwyMathHyperTest, TestAllTanh); |
| 207 | +HWY_AFTER_TEST(); |
| 208 | +} // namespace |
| 209 | +} // namespace hwy |
| 210 | +HWY_TEST_MAIN(); |
| 211 | +#endif // HWY_ONCE |
0 commit comments