Skip to content

Commit 3fcc2d5

Browse files
jan-wassenbergcopybara-github
authored andcommitted
split math_test due to RVV build timeout
PiperOrigin-RevId: 808973974
1 parent 54fc0d7 commit 3fcc2d5

File tree

7 files changed

+1013
-459
lines changed

7 files changed

+1013
-459
lines changed

BUILD

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -553,6 +553,21 @@ HWY_CONTRIB_TESTS = (
553553
"math_test",
554554
(":math",),
555555
),
556+
(
557+
"hwy/contrib/math/",
558+
"math_hyper_test",
559+
(":math",),
560+
),
561+
(
562+
"hwy/contrib/math/",
563+
"math_tan_test",
564+
(":math",),
565+
),
566+
(
567+
"hwy/contrib/math/",
568+
"math_trig_test",
569+
(":math",),
570+
),
556571
(
557572
"hwy/contrib/random/",
558573
"random_test",

CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -887,6 +887,9 @@ list(APPEND HWY_TEST_FILES
887887
# Disabled due to SIGILL in clang7 debug build during gtest discovery phase,
888888
# not reproducible locally. Still tested via bazel build.
889889
hwy/contrib/math/math_test.cc
890+
hwy/contrib/math/math_hyper_test.cc
891+
hwy/contrib/math/math_tan_test.cc
892+
hwy/contrib/math/math_trig_test.cc
890893
hwy/contrib/random/random_test.cc
891894
hwy/contrib/sort/bench_sort.cc
892895
hwy/contrib/sort/sort_test.cc
Lines changed: 211 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,211 @@
1+
// Copyright 2020 Google LLC
2+
// SPDX-License-Identifier: Apache-2.0
3+
//
4+
// Licensed under the Apache License, Version 2.0 (the "License");
5+
// you may not use this file except in compliance with the License.
6+
// You may obtain a copy of the License at
7+
//
8+
// http://www.apache.org/licenses/LICENSE-2.0
9+
//
10+
// Unless required by applicable law or agreed to in writing, software
11+
// distributed under the License is distributed on an "AS IS" BASIS,
12+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
// See the License for the specific language governing permissions and
14+
// limitations under the License.
15+
16+
#include <stdint.h>
17+
#include <stdio.h>
18+
19+
#include <cfloat> // FLT_MAX
20+
#include <cmath> // std::abs
21+
22+
#include "hwy/base.h"
23+
24+
// clang-format off
25+
#undef HWY_TARGET_INCLUDE
26+
#define HWY_TARGET_INCLUDE "hwy/contrib/math/math_hyper_test.cc"
27+
#include "hwy/foreach_target.h" // IWYU pragma: keep
28+
#include "hwy/highway.h"
29+
#include "hwy/contrib/math/math-inl.h"
30+
#include "hwy/tests/test_util-inl.h"
31+
// clang-format on
32+
33+
HWY_BEFORE_NAMESPACE();
34+
namespace hwy {
35+
namespace HWY_NAMESPACE {
36+
namespace {
37+
38+
// We have had test failures caused by excess precision due to keeping
39+
// intermediate results in 80-bit x87 registers. One such failure mode is that
40+
// Log1p computes a 1.0 which is not exactly equal to 1.0f, causing is_pole to
41+
// incorrectly evaluate to false.
42+
#undef HWY_MATH_TEST_EXCESS_PRECISION
43+
#if HWY_ARCH_X86_32 && HWY_COMPILER_GCC_ACTUAL && \
44+
(HWY_TARGET == HWY_SCALAR || HWY_TARGET == HWY_EMU128)
45+
46+
// GCC 13+: because CMAKE_CXX_EXTENSIONS is OFF, we build with -std= and hence
47+
// also -fexcess-precision=standard, so there is no problem. See #1708 and
48+
// https://gcc.gnu.org/bugzilla/show_bug.cgi?id=323.
49+
#if HWY_COMPILER_GCC_ACTUAL >= 1300
50+
#define HWY_MATH_TEST_EXCESS_PRECISION 0
51+
52+
#else // HWY_COMPILER_GCC_ACTUAL < 1300
53+
54+
// The build system must enable SSE2, e.g. via HWY_CMAKE_SSE2 - see
55+
// https://stackoverflow.com/questions/20869904/c-handling-of-excess-precision .
56+
#if defined(__SSE2__) // correct flag given, no problem
57+
#define HWY_MATH_TEST_EXCESS_PRECISION 0
58+
#else
59+
#define HWY_MATH_TEST_EXCESS_PRECISION 1
60+
#pragma message( \
61+
"Skipping scalar math_test on 32-bit x86 GCC <13 without HWY_CMAKE_SSE2")
62+
#endif // defined(__SSE2__)
63+
64+
#endif // HWY_COMPILER_GCC_ACTUAL
65+
#else // not (x86-32, GCC, scalar target): running math_test normally
66+
#define HWY_MATH_TEST_EXCESS_PRECISION 0
67+
#endif // HWY_ARCH_X86_32 etc
68+
69+
template <class T, class D>
70+
HWY_NOINLINE void TestMath(const char* name, T (*fx1)(T),
71+
Vec<D> (*fxN)(D, VecArg<Vec<D>>), D d, T min, T max,
72+
uint64_t max_error_ulp) {
73+
if (HWY_MATH_TEST_EXCESS_PRECISION) {
74+
static bool once = true;
75+
if (once) {
76+
once = false;
77+
HWY_WARN("Skipping math_test due to GCC issue with excess precision.\n");
78+
}
79+
return;
80+
}
81+
82+
using UintT = MakeUnsigned<T>;
83+
84+
const UintT min_bits = BitCastScalar<UintT>(min);
85+
const UintT max_bits = BitCastScalar<UintT>(max);
86+
87+
// If min is negative and max is positive, the range needs to be broken into
88+
// two pieces, [+0, max] and [-0, min], otherwise [min, max].
89+
int range_count = 1;
90+
UintT ranges[2][2] = {{min_bits, max_bits}, {0, 0}};
91+
if ((min < 0.0) && (max > 0.0)) {
92+
ranges[0][0] = BitCastScalar<UintT>(ConvertScalarTo<T>(+0.0));
93+
ranges[0][1] = max_bits;
94+
ranges[1][0] = BitCastScalar<UintT>(ConvertScalarTo<T>(-0.0));
95+
ranges[1][1] = min_bits;
96+
range_count = 2;
97+
}
98+
99+
uint64_t max_ulp = 0;
100+
// Emulation is slower, so cannot afford as many.
101+
constexpr UintT kSamplesPerRange = static_cast<UintT>(AdjustedReps(4000));
102+
for (int range_index = 0; range_index < range_count; ++range_index) {
103+
const UintT start = ranges[range_index][0];
104+
const UintT stop = ranges[range_index][1];
105+
const UintT step = HWY_MAX(1, ((stop - start) / kSamplesPerRange));
106+
for (UintT value_bits = start; value_bits <= stop; value_bits += step) {
107+
// For reasons unknown, the HWY_MAX is necessary on RVV, otherwise
108+
// value_bits can be less than start, and thus possibly NaN.
109+
const T value =
110+
BitCastScalar<T>(HWY_MIN(HWY_MAX(start, value_bits), stop));
111+
const T actual = GetLane(fxN(d, Set(d, value)));
112+
const T expected = fx1(value);
113+
114+
// Skip small inputs and outputs on armv7, it flushes subnormals to zero.
115+
#if HWY_TARGET <= HWY_NEON_WITHOUT_AES && HWY_ARCH_ARM_V7
116+
if ((std::abs(value) < 1e-37f) || (std::abs(expected) < 1e-37f)) {
117+
continue;
118+
}
119+
#endif
120+
121+
const auto ulp = hwy::detail::ComputeUlpDelta(actual, expected);
122+
max_ulp = HWY_MAX(max_ulp, ulp);
123+
if (ulp > max_error_ulp) {
124+
fprintf(stderr, "%s: %s(%f) expected %E actual %E ulp %g max ulp %u\n",
125+
hwy::TypeName(T(), Lanes(d)).c_str(), name, value, expected,
126+
actual, static_cast<double>(ulp),
127+
static_cast<uint32_t>(max_error_ulp));
128+
}
129+
}
130+
}
131+
fprintf(stderr, "%s: %s max_ulp %g\n", hwy::TypeName(T(), Lanes(d)).c_str(),
132+
name, static_cast<double>(max_ulp));
133+
HWY_ASSERT(max_ulp <= max_error_ulp);
134+
}
135+
136+
#define DEFINE_MATH_TEST_FUNC(NAME) \
137+
HWY_NOINLINE void TestAll##NAME() { \
138+
ForFloat3264Types(ForPartialVectors<Test##NAME>()); \
139+
}
140+
141+
#undef DEFINE_MATH_TEST
142+
#define DEFINE_MATH_TEST(NAME, F32x1, F32xN, F32_MIN, F32_MAX, F32_ERROR, \
143+
F64x1, F64xN, F64_MIN, F64_MAX, F64_ERROR) \
144+
struct Test##NAME { \
145+
template <class T, class D> \
146+
HWY_NOINLINE void operator()(T, D d) { \
147+
if (sizeof(T) == 4) { \
148+
TestMath<T, D>(HWY_STR(NAME), F32x1, F32xN, d, F32_MIN, F32_MAX, \
149+
F32_ERROR); \
150+
} else { \
151+
TestMath<T, D>(HWY_STR(NAME), F64x1, F64xN, d, \
152+
static_cast<T>(F64_MIN), static_cast<T>(F64_MAX), \
153+
F64_ERROR); \
154+
} \
155+
} \
156+
}; \
157+
DEFINE_MATH_TEST_FUNC(NAME)
158+
159+
// Floating point values closest to but less than 1.0. Avoid variables with
160+
// static initializers inside HWY_BEFORE_NAMESPACE/HWY_AFTER_NAMESPACE to
161+
// ensure target-specific code does not leak into startup code.
162+
float kNearOneF() { return BitCastScalar<float>(0x3F7FFFFF); }
163+
double kNearOneD() { return BitCastScalar<double>(0x3FEFFFFFFFFFFFFFULL); }
164+
165+
constexpr uint64_t ACosh32ULP() {
166+
#if defined(__MINGW32__)
167+
return 8;
168+
#else
169+
return 3;
170+
#endif
171+
}
172+
173+
// clang-format off
174+
DEFINE_MATH_TEST(Acosh,
175+
std::acosh, CallAcosh, +1.0f, +FLT_MAX, ACosh32ULP(),
176+
std::acosh, CallAcosh, +1.0, +DBL_MAX, 3)
177+
DEFINE_MATH_TEST(Asinh,
178+
std::asinh, CallAsinh, -FLT_MAX, +FLT_MAX, 3,
179+
std::asinh, CallAsinh, -DBL_MAX, +DBL_MAX, 3)
180+
// NEON has ULP 4 instead of 3
181+
DEFINE_MATH_TEST(Atanh,
182+
std::atanh, CallAtanh, -kNearOneF(), +kNearOneF(), 4,
183+
std::atanh, CallAtanh, -kNearOneD(), +kNearOneD(), 3)
184+
DEFINE_MATH_TEST(Sinh,
185+
std::sinh, CallSinh, -80.0f, +80.0f, 4,
186+
std::sinh, CallSinh, -709.0, +709.0, 4)
187+
DEFINE_MATH_TEST(Tanh,
188+
std::tanh, CallTanh, -FLT_MAX, +FLT_MAX, 4,
189+
std::tanh, CallTanh, -DBL_MAX, +DBL_MAX, 4)
190+
// clang-format on
191+
192+
} // namespace
193+
// NOLINTNEXTLINE(google-readability-namespace-comments)
194+
} // namespace HWY_NAMESPACE
195+
} // namespace hwy
196+
HWY_AFTER_NAMESPACE();
197+
198+
#if HWY_ONCE
199+
namespace hwy {
200+
namespace {
201+
HWY_BEFORE_TEST(HwyMathHyperTest);
202+
HWY_EXPORT_AND_TEST_P(HwyMathHyperTest, TestAllAcosh);
203+
HWY_EXPORT_AND_TEST_P(HwyMathHyperTest, TestAllAsinh);
204+
HWY_EXPORT_AND_TEST_P(HwyMathHyperTest, TestAllAtanh);
205+
HWY_EXPORT_AND_TEST_P(HwyMathHyperTest, TestAllSinh);
206+
HWY_EXPORT_AND_TEST_P(HwyMathHyperTest, TestAllTanh);
207+
HWY_AFTER_TEST();
208+
} // namespace
209+
} // namespace hwy
210+
HWY_TEST_MAIN();
211+
#endif // HWY_ONCE

0 commit comments

Comments
 (0)