Skip to content

[UnitTest] Add test for fmax reductions without fast-math. #266

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jul 18, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions SingleSource/UnitTests/Vectorizer/common.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,14 @@
#include <memory>
#include <random>

#define DEFINE_SCALAR_AND_VECTOR_FN1_TYPE(Init, Loop, Type) \
auto ScalarFn = [](auto *A, Type TC) -> Type { \
Init _Pragma("clang loop vectorize(disable) interleave_count(1)") Loop \
}; \
auto VectorFn = [](auto *A, Type TC) -> Type { \
Init _Pragma("clang loop vectorize(enable)") Loop \
};

#define DEFINE_SCALAR_AND_VECTOR_FN2(Init, Loop) \
auto ScalarFn = [](auto *A, auto *B, unsigned TC) { \
Init _Pragma("clang loop vectorize(disable) interleave_count(1)") Loop \
Expand Down
218 changes: 218 additions & 0 deletions SingleSource/UnitTests/Vectorizer/fmax-reduction.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,218 @@
#include <algorithm>
#include <functional>
#include <iostream>
#include <limits>
#include <memory>
#include <stdint.h>

#include "common.h"

static bool isEqual(float A, float B) {
if (std::isnan(A))
return std::isnan(B);

if (A == 0.0f)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think if isnan(B) and A == 0.0f this becomes dependent on the nan signbit

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I updated the check above to check if either is nan, in that case both must be nan to be equal. I also check here if B is zero. I think that should take care of that.

return std::signbit(A) == std::signbit(B);

return A == B;
}

template <typename Ty> using Fn1Ty = std::function<Ty(Ty *, unsigned)>;

template <typename Ty>
static void check(Fn1Ty<Ty> ScalarFn, Fn1Ty<Ty> VectorFn, float *Src,
unsigned N, const char *Type) {
auto Reference = ScalarFn(Src, N);
auto ToCheck = VectorFn(Src, N);
if (!isEqual(Reference, ToCheck)) {
std::cerr << "Miscompare " << Type << ": " << Reference << " != " << ToCheck
<< "\n";
exit(1);
}
}

template <typename Ty>
static void checkVectorFunction(Fn1Ty<Ty> ScalarFn, Fn1Ty<Ty> VectorFn,
const char *Name) {
std::cout << "Checking " << Name << "\n";

unsigned N = 1024;
std::unique_ptr<Ty[]> Src1(new Ty[N]);
init_data(Src1, N);

// Check with random inputs.

// Check with sorted inputs.
std::sort(&Src1[0], &Src1[N]);
check(ScalarFn, VectorFn, &Src1[0], N, "sorted");

// Check with reverse sorted inputs.
std::reverse(&Src1[0], &Src1[N]);
check(ScalarFn, VectorFn, &Src1[0], N, "reverse-sorted");

// Check with all max values.
for (unsigned I = 0; I != N; ++I)
Src1[I] = std::numeric_limits<Ty>::max();
check(ScalarFn, VectorFn, &Src1[0], N, "all-max");

// Check with all min values.
for (unsigned I = 0; I != N; ++I)
Src1[I] = std::numeric_limits<Ty>::min();
check(ScalarFn, VectorFn, &Src1[0], N, "all-min");

// Check with inputs all zero.
for (unsigned I = 0; I != N; ++I)
Src1[I] = 0.0;
check(ScalarFn, VectorFn, &Src1[0], N, "all-zeros");

// Check with NaN at different indices.
for (unsigned NaNIdx = 3; NaNIdx != 32; NaNIdx++) {
for (unsigned I = 0; I != N; ++I)
Src1[I] = 100;
Src1[NaNIdx] = std::numeric_limits<Ty>::quiet_NaN();

check(ScalarFn, VectorFn, &Src1[0], N, "NaN");
}

// Check with multiple signed-zeros at different positions.
for (unsigned Idx = 0; Idx != 64; ++Idx) {
for (unsigned I = 0; I != N; ++I)
Src1[I] = -1.0;

for (unsigned Offset = 1; Offset != 16; ++Offset) {
Src1[Idx] = -0.0;
Src1[Idx + Offset] = +0.0;

check(ScalarFn, VectorFn, &Src1[0], N, "signed-zeros");
}
}

// Check with max value at all possible indices.
for (unsigned Idx = 0; Idx != N; ++Idx) {
for (unsigned I = 0; I != N; ++I)
Src1[I] = I;

Src1[Idx] = N + 1;

check(ScalarFn, VectorFn, &Src1[0], N, "full");

for (unsigned Offset = 1; Offset != 16; ++Offset) {
if (Idx + Offset < N)
Src1[Idx + Offset] = N + 1;

check(ScalarFn, VectorFn, &Src1[0], N, "full");
}
}

// Check with NaN value at all possible indices.
for (unsigned Idx = 0; Idx != N; ++Idx) {
for (unsigned I = 0; I != N; ++I)
Src1[I] = I;

Src1[Idx] = std::numeric_limits<float>::quiet_NaN();
check(ScalarFn, VectorFn, &Src1[0], N, "full-with-nan");

// Check with multiple NaNs at different offsets.
for (unsigned Offset = 1; Offset != 16; ++Offset) {
if (Idx + Offset < N)
Src1[Idx + Offset] = std::numeric_limits<float>::quiet_NaN();

check(ScalarFn, VectorFn, &Src1[0], N, "full-with-multiple-nan");
}
}
}
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be good to test signaling nans, but it will be busted all over the place

Test some denormals?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added some tests with denormals both as start value and in the inputs, thanks

At least for AArch64, the tests also pass when replacing all quiet nans with signaling nans with llvm/llvm-project#148239, as it just checks if it matches the behavior of the scalar loop.

But in general it may be a bit risky to check signaling NaNs, as the behavior may not be 100% consistent across platforms?

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should be, but the state is in flux and has never been consistent


int main(void) {
rng = std::mt19937(15);

{
DEFINE_SCALAR_AND_VECTOR_FN1_TYPE(
float Max = -2.0;, for (unsigned I = 0; I < 1024;
I++) { Max = std::fmax(Max, A[I]); } return Max;
, float);
checkVectorFunction<float>(ScalarFn, VectorFn, "fmaxnum_start_neg_2");
}
{
DEFINE_SCALAR_AND_VECTOR_FN1_TYPE(
float Max = std::numeric_limits<float>::min();
, for (unsigned I = 0; I < 1024;
I++) { Max = std::fmax(Max, A[I]); } return Max;
, float);
checkVectorFunction<float>(ScalarFn, VectorFn, "fmaxnum_start_min");
}
{
DEFINE_SCALAR_AND_VECTOR_FN1_TYPE(
float Max = std::numeric_limits<float>::quiet_NaN();
, for (unsigned I = 0; I < 1024;
I++) { Max = std::fmax(Max, A[I]); } return Max;
, float);
checkVectorFunction<float>(ScalarFn, VectorFn, "fmaxnum_start_is_nan");
}

{
DEFINE_SCALAR_AND_VECTOR_FN1_TYPE(
float Max = -2.0;
, for (unsigned I = 0; I < 1024;
I++) { Max = A[I] > Max ? A[I] : Max; } return Max;
, float);
checkVectorFunction<float>(ScalarFn, VectorFn, "fmax_strict_start_neg_2");
}
{
DEFINE_SCALAR_AND_VECTOR_FN1_TYPE(
float Max = std::numeric_limits<float>::min();
, for (unsigned I = 0; I < 1024;
I++) { Max = A[I] > Max ? A[I] : Max; } return Max;
, float);
checkVectorFunction<float>(ScalarFn, VectorFn, "fmax_strict_start_min");
}
{
DEFINE_SCALAR_AND_VECTOR_FN1_TYPE(
float Max = std::numeric_limits<float>::quiet_NaN();
, for (unsigned I = 0; I < 1025;
I++) { Max = A[I] > Max ? A[I] : Max; } return Max;
, float);
checkVectorFunction<float>(ScalarFn, VectorFn, "fmax_strict_start_nan");
}

{
DEFINE_SCALAR_AND_VECTOR_FN1_TYPE(
float Max = -2.0;
, for (unsigned I = 0; I < 1024;
I++) { Max = Max >= A[I] ? Max : A[I]; } return Max;
, float);
checkVectorFunction<float>(ScalarFn, VectorFn,
"fmax_non_strict_start_neg_2");
}

{
DEFINE_SCALAR_AND_VECTOR_FN1_TYPE(
float Max = -2.0;
, for (unsigned I = 0; I < 1024;
I++) { Max = Max > A[I] ? Max : A[I]; } return Max;
, float);
checkVectorFunction<float>(ScalarFn, VectorFn,
"fmax_cmp_max_gt_start_neg_2");
}

{
DEFINE_SCALAR_AND_VECTOR_FN1_TYPE(
float Max = -2.0;
, for (unsigned I = 0; I < 1024;
I++) { Max = Max < A[I] ? A[I] : Max; } return Max;
, float);
checkVectorFunction<float>(ScalarFn, VectorFn,
"fmax_cmp_max_lt_start_neg_2");
}

{
DEFINE_SCALAR_AND_VECTOR_FN1_TYPE(
float Max = std::numeric_limits<float>::quiet_NaN();
, for (unsigned I = 0; I < 1024;
I++) { Max = Max < A[I] ? A[I] : Max; } return Max;
, float);
checkVectorFunction<float>(ScalarFn, VectorFn,
"fmax_cmp_max_lt_start_neg_nan");
}

return 0;
}