Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
342 changes: 337 additions & 5 deletions SingleSource/UnitTests/Vectorizer/early-exit.cpp
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
#include <algorithm>
#include <functional>
#include <iostream>
#include <limits>
#include <stdint.h>

#define N 512
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It it is worth extracting out the constant, also give it a description of where it is used?

// N_ODD is not divisible by common VFs (4, 8, 16) to test scalar epilogue.
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// N_ODD is not divisible by common VFs (4, 8, 16) to test scalar epilogue.
/// N_ODD is not divisible by common VFs (4, 8, 16) to test scalar epilogue.

#define N_ODD 511
// N_SMALL is a small trip count divisible by common VFs, allowing the compiler
// to potentially eliminate the scalar remainder loop entirely.
Comment on lines +8 to +9
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// N_SMALL is a small trip count divisible by common VFs, allowing the compiler
// to potentially eliminate the scalar remainder loop entirely.
/// N_SMALL is a small trip count divisible by common VFs, allowing the compiler
/// to potentially eliminate the scalar remainder loop entirely.

#define N_SMALL 64

template <typename Ty>
using TestFnTy = std::function<unsigned(std::function<void(Ty *)>)>;
Expand All @@ -27,8 +30,8 @@ checkVectorFunction(TestFnTy<Ty> ScalarFn, TestFnTy<Ty> VectorFn,
std::make_pair(InterleavedFn, "interleave-forced"),
std::make_pair(InterleavedOnlyFn, "interleave-only")};

// Check finding the target element at all indices between 0 and 512.
for (unsigned IdxToFind = 0; IdxToFind < 512; ++IdxToFind) {
// Check finding the target element at all indices between 0 and N.
for (unsigned IdxToFind = 0; IdxToFind < N; ++IdxToFind) {
// Lambda to initialize all array elements to one, except the one to look
// for to zero.
auto Init1 = [IdxToFind](Ty *Arr) {
Expand Down Expand Up @@ -67,9 +70,97 @@ checkVectorFunction(TestFnTy<Ty> ScalarFn, TestFnTy<Ty> VectorFn,
}
}

// Test function type for three arrays
template <typename Ty>
using TestFnTy3 =
std::function<unsigned(std::function<void(Ty *, Ty *, Ty *)>)>;

// Unified check function for multiple early exits
template <typename Ty, unsigned NumExits, unsigned TripCount, unsigned Stride1,
unsigned Stride2, unsigned Stride3>
static void
checkVectorFunctionMulti(TestFnTy3<Ty> ScalarFn, TestFnTy3<Ty> VectorFn,
TestFnTy3<Ty> ForcedVectorFn,
TestFnTy3<Ty> InterleavedFn,
TestFnTy3<Ty> InterleavedOnlyFn, const char *Name) {
static_assert(NumExits == 2 || NumExits == 3, "NumExits must be 2 or 3");
std::cout << "Checking " << Name << "\n";

std::array Tests = {std::make_pair(VectorFn, "autovec"),
std::make_pair(ForcedVectorFn, "vector-forced"),
std::make_pair(InterleavedFn, "interleave-forced"),
std::make_pair(InterleavedOnlyFn, "interleave-only")};

// Test all combinations of exit positions
// Use stride to reduce test count while still covering important cases
for (unsigned Exit1Idx = 0; Exit1Idx <= TripCount; Exit1Idx += Stride1) {
for (unsigned Exit2Idx = 0; Exit2Idx <= TripCount; Exit2Idx += Stride2) {
for (unsigned Exit3Idx = 0; Exit3Idx <= TripCount; Exit3Idx += Stride3) {
auto Init = [Exit1Idx, Exit2Idx, Exit3Idx](Ty *A, Ty *B, Ty *C) {
// Fill arrays with different values
for (unsigned I = 0; I < TripCount; I++) {
A[I] = I + 1;
B[I] = I + TripCount;
C[I] = I + 2 * TripCount;
}
// Set up first exit condition: A[Exit1Idx] == 0
if (Exit1Idx < TripCount)
A[Exit1Idx] = 0;
// Set up second exit condition: A[Exit2Idx] == B[Exit2Idx]
if (Exit2Idx < TripCount)
B[Exit2Idx] = A[Exit2Idx];
// Set up third exit condition: B[Exit3Idx] == C[Exit3Idx]
if (Exit3Idx < TripCount)
C[Exit3Idx] = B[Exit3Idx];
};

auto Reference = ScalarFn(Init);
for (const auto &[Fn, FnName] : Tests) {
auto ToCheck = callThroughOptnone(Fn, Init);
if (Reference != ToCheck) {
if constexpr (NumExits == 2) {
std::cerr << "Miscompare for " << FnName
<< " at Exit1Idx=" << Exit1Idx
<< ", Exit2Idx=" << Exit2Idx << ": " << Reference
<< " != " << ToCheck << "\n";
} else {
std::cerr << "Miscompare for " << FnName
<< " at Exit1Idx=" << Exit1Idx
<< ", Exit2Idx=" << Exit2Idx
<< ", Exit3Idx=" << Exit3Idx << ": " << Reference
<< " != " << ToCheck << "\n";
}
exit(1);
}
}
}
}
}

// Explicit test for regular exit (no early exit taken)
// All ExitIdx = TripCount means no early exit condition is triggered
auto InitNoExit = [](Ty *A, Ty *B, Ty *C) {
for (unsigned I = 0; I < TripCount; I++) {
A[I] = I + 1;
B[I] = I + TripCount;
C[I] = I + 2 * TripCount;
}
};
auto Reference = callThroughOptnone(ScalarFn, InitNoExit);
for (const auto &[Fn, FnName] : Tests) {
auto ToCheck = callThroughOptnone(Fn, InitNoExit);
if (Reference != ToCheck) {
std::cerr << "Miscompare for " << FnName
<< " at regular exit (no early exit taken): " << Reference
<< " != " << ToCheck << "\n";
exit(1);
}
}
}

/// Define 3 test functions
/// * one with vectorization and interleaving disabled,
/// * one with auto-vectorization w/o any pargmas
/// * one with auto-vectorization w/o any pragmas
/// * one where VF is picked automatically and interleaving is forced.
#define DEFINE_SCALAR_AND_VECTOR_EARLY_EXIT(Init, Src, Loop) \
auto ScalarFn = [](std::function<void(int *)> II) -> unsigned { \
Expand Down Expand Up @@ -98,6 +189,62 @@ checkVectorFunction(TestFnTy<Ty> ScalarFn, TestFnTy<Ty> VectorFn,
_Pragma("clang loop vectorize_width(1) interleave_count(4)") Loop \
};

/// Define test functions for two early exits (uses 3 arrays, third is unused).
#define DEFINE_SCALAR_AND_VECTOR_EARLY_EXIT_2(Init, Src1, Src2, Src3, Loop) \
auto ScalarFn = \
[](std::function<void(int *, int *, int *)> II) -> unsigned { \
Init II(Src1, Src2, Src3); \
_Pragma("clang loop vectorize(disable) interleave_count(1)") Loop \
}; \
auto VectorFn = \
[](std::function<void(int *, int *, int *)> II) -> unsigned { \
Init II(Src1, Src2, Src3); \
_Pragma("clang loop vectorize(enable)") Loop \
}; \
auto ForcedVectorFn = \
[](std::function<void(int *, int *, int *)> II) -> unsigned { \
Init II(Src1, Src2, Src3); \
_Pragma("clang loop vectorize_width(8) interleave_count(1)") Loop \
}; \
auto InterleavedFn = \
[](std::function<void(int *, int *, int *)> II) -> unsigned { \
Init II(Src1, Src2, Src3); \
_Pragma("clang loop vectorize(enable) interleave_count(4)") Loop \
}; \
auto InterleavedOnlyFn = \
[](std::function<void(int *, int *, int *)> II) -> unsigned { \
Init II(Src1, Src2, Src3); \
_Pragma("clang loop vectorize_width(1) interleave_count(4)") Loop \
};

/// Define test functions for three arrays (for three early exits)
#define DEFINE_SCALAR_AND_VECTOR_EARLY_EXIT_3(Init, Src1, Src2, Src3, Loop) \
auto ScalarFn = \
[](std::function<void(int *, int *, int *)> II) -> unsigned { \
Init II(Src1, Src2, Src3); \
_Pragma("clang loop vectorize(disable) interleave_count(1)") Loop \
}; \
auto VectorFn = \
[](std::function<void(int *, int *, int *)> II) -> unsigned { \
Init II(Src1, Src2, Src3); \
_Pragma("clang loop vectorize(enable)") Loop \
}; \
auto ForcedVectorFn = \
[](std::function<void(int *, int *, int *)> II) -> unsigned { \
Init II(Src1, Src2, Src3); \
_Pragma("clang loop vectorize_width(8) interleave_count(1)") Loop \
}; \
auto InterleavedFn = \
[](std::function<void(int *, int *, int *)> II) -> unsigned { \
Init II(Src1, Src2, Src3); \
_Pragma("clang loop vectorize(enable) interleave_count(4)") Loop \
}; \
auto InterleavedOnlyFn = \
[](std::function<void(int *, int *, int *)> II) -> unsigned { \
Init II(Src1, Src2, Src3); \
_Pragma("clang loop vectorize_width(1) interleave_count(4)") Loop \
};

int main(void) {
{
DEFINE_SCALAR_AND_VECTOR_EARLY_EXIT(
Expand All @@ -119,5 +266,190 @@ int main(void) {
checkVectorFunction<int>(ScalarFn, VectorFn, ForcedVectorFn, InterleavedFn,
InterleavedOnlyFn, "early_exit_find_step_3");
}

// Test: Two early exits to the same exit block.
// First exit: A[I] == 0
// Second exit: A[I] == B[I]
// This tests that exits are checked in the correct program order.
{
DEFINE_SCALAR_AND_VECTOR_EARLY_EXIT_2(
int A[N]; int B[N]; int C[N];
, A, B, C, for (unsigned I = 0; I < N; I++) {
if (A[I] == 0)
return I;
if (A[I] == B[I])
return I + 1000;
} return -1;);
Comment on lines +277 to +282
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
, A, B, C, for (unsigned I = 0; I < N; I++) {
if (A[I] == 0)
return I;
if (A[I] == B[I])
return I + 1000;
} return -1;);
, A, B, C, { for (unsigned I = 0; I < N; I++) {
if (A[I] == 0)
return I;
if (A[I] == B[I])
return I + 1000;
} return -1;});

braces may help clang-format to format this better as code:

DEFINE_SCALAR_AND_VECTOR_EARLY_EXIT_2(int A[N]; int B[N]; int C[N];, A, B, C, {
  for (unsigned I = 0; I < N; I++) {
    if (A[I] == 0)
      return I;
    if (A[I] == B[I])
      return I + 1000;
  }
  return -1;
});

checkVectorFunctionMulti<int, 2, N, 3, 3, N + 1>(
ScalarFn, VectorFn, ForcedVectorFn, InterleavedFn, InterleavedOnlyFn,
"two_early_exits_same_block");
}

// Test: Two early exits with different return values.
// First exit: A[I] == 0 -> return I
// Second exit: A[I] == B[I] -> return I * 2
// Latch exit: return -1
{
DEFINE_SCALAR_AND_VECTOR_EARLY_EXIT_2(
int A[N]; int B[N]; int C[N];
, A, B, C, for (unsigned I = 0; I < N; I++) {
if (A[I] == 0)
return I;
if (A[I] == B[I])
return I * 2;
} return -1;);
checkVectorFunctionMulti<int, 2, N, 3, 3, N + 1>(
ScalarFn, VectorFn, ForcedVectorFn, InterleavedFn, InterleavedOnlyFn,
"two_early_exits_different_values");
}

// Test: Two early exits with different comparison types.
// First exit: A[I] == 0
// Second exit: A[I] == B[I]
// Both use equality checks that checkVectorFunctionMulti can set up.
{
DEFINE_SCALAR_AND_VECTOR_EARLY_EXIT_2(
int A[N]; int B[N]; int C[N];
, A, B, C, for (unsigned I = 0; I < N; I++) {
if (A[I] == 0)
return I + 500;
if (A[I] == B[I])
return I;
} return -1;);
checkVectorFunctionMulti<int, 2, N, 3, 3, N + 1>(
ScalarFn, VectorFn, ForcedVectorFn, InterleavedFn, InterleavedOnlyFn,
"two_early_exits_swapped_returns");
}

// Test: Three early exits to the same exit block.
// First exit: A[I] == 0
// Second exit: A[I] == B[I]
// Third exit: B[I] == C[I]
{
DEFINE_SCALAR_AND_VECTOR_EARLY_EXIT_3(
int A[N]; int B[N]; int C[N];
, A, B, C, for (unsigned I = 0; I < N; I++) {
if (A[I] == 0)
return I;
if (A[I] == B[I])
return I + 1000;
if (B[I] == C[I])
return I + 2000;
} return -1;);
checkVectorFunctionMulti<int, 3, N, 17, 17, 17>(
ScalarFn, VectorFn, ForcedVectorFn, InterleavedFn, InterleavedOnlyFn,
"three_early_exits_same_block");
}

// Test: Three early exits with different return expressions.
{
DEFINE_SCALAR_AND_VECTOR_EARLY_EXIT_3(
int A[N]; int B[N]; int C[N];
, A, B, C, for (unsigned I = 0; I < N; I++) {
if (A[I] == 0)
return I;
if (A[I] == B[I])
return I * 2;
if (B[I] == C[I])
return I * 3;
} return -1;);
checkVectorFunctionMulti<int, 3, N, 17, 17, 17>(
ScalarFn, VectorFn, ForcedVectorFn, InterleavedFn, InterleavedOnlyFn,
"three_early_exits_different_values");
}

// Test: Two early exits with odd trip count (tests scalar epilogue).
{
DEFINE_SCALAR_AND_VECTOR_EARLY_EXIT_2(
int A[N_ODD]; int B[N_ODD]; int C[N_ODD];
, A, B, C, for (unsigned I = 0; I < N_ODD; I++) {
if (A[I] == 0)
return I;
if (A[I] == B[I])
return I + 1000;
} return -1;);
checkVectorFunctionMulti<int, 2, N_ODD, 3, 3, N_ODD + 1>(
ScalarFn, VectorFn, ForcedVectorFn, InterleavedFn, InterleavedOnlyFn,
"two_early_exits_odd_trip_count");
}

// Test: Two early exits with odd trip count and different return values.
{
DEFINE_SCALAR_AND_VECTOR_EARLY_EXIT_2(
int A[N_ODD]; int B[N_ODD]; int C[N_ODD];
, A, B, C, for (unsigned I = 0; I < N_ODD; I++) {
if (A[I] == 0)
return I;
if (A[I] == B[I])
return I * 2;
} return -1;);
checkVectorFunctionMulti<int, 2, N_ODD, 3, 3, N_ODD + 1>(
ScalarFn, VectorFn, ForcedVectorFn, InterleavedFn, InterleavedOnlyFn,
"two_early_exits_odd_different_values");
}
// Test: Three early exits with odd trip count.
{
DEFINE_SCALAR_AND_VECTOR_EARLY_EXIT_3(
int A[N_ODD]; int B[N_ODD]; int C[N_ODD];
, A, B, C, for (unsigned I = 0; I < N_ODD; I++) {
if (A[I] == 0)
return I;
if (A[I] == B[I])
return I + 1000;
if (B[I] == C[I])
return I + 2000;
} return -1;);
checkVectorFunctionMulti<int, 3, N_ODD, 17, 17, 17>(
ScalarFn, VectorFn, ForcedVectorFn, InterleavedFn, InterleavedOnlyFn,
"three_early_exits_odd_trip_count");
}

// Test: Two early exits with small known trip count (no scalar remainder).
{
DEFINE_SCALAR_AND_VECTOR_EARLY_EXIT_2(
int A[N_SMALL]; int B[N_SMALL]; int C[N_SMALL];
, A, B, C, for (unsigned I = 0; I < N_SMALL; I++) {
if (A[I] == 0)
return I;
if (A[I] == B[I])
return I + 1000;
} return -1;);
checkVectorFunctionMulti<int, 2, N_SMALL, 3, 3, N_SMALL + 1>(
ScalarFn, VectorFn, ForcedVectorFn, InterleavedFn, InterleavedOnlyFn,
"two_early_exits_small_known_tc");
}

// Test: Three early exits with small known trip count (no scalar remainder).
{
DEFINE_SCALAR_AND_VECTOR_EARLY_EXIT_3(
int A[N_SMALL]; int B[N_SMALL]; int C[N_SMALL];
, A, B, C, for (unsigned I = 0; I < N_SMALL; I++) {
if (A[I] == 0)
return I;
if (A[I] == B[I])
return I + 1000;
if (B[I] == C[I])
return I + 2000;
} return -1;);
checkVectorFunctionMulti<int, 3, N_SMALL, 7, 7, 7>(
ScalarFn, VectorFn, ForcedVectorFn, InterleavedFn, InterleavedOnlyFn,
"three_early_exits_small_known_tc");
}

// Test: Two early exits with small known trip count and different returns.
{
DEFINE_SCALAR_AND_VECTOR_EARLY_EXIT_2(
int A[N_SMALL]; int B[N_SMALL]; int C[N_SMALL];
, A, B, C, for (unsigned I = 0; I < N_SMALL; I++) {
if (A[I] == 0)
return I;
if (A[I] == B[I])
return I * 2;
} return -1;);
checkVectorFunctionMulti<int, 2, N_SMALL, 3, 3, N_SMALL + 1>(
ScalarFn, VectorFn, ForcedVectorFn, InterleavedFn, InterleavedOnlyFn,
"two_early_exits_small_different_values");
}

return 0;
}
11 changes: 11 additions & 0 deletions SingleSource/UnitTests/Vectorizer/early-exit.reference_output
Original file line number Diff line number Diff line change
@@ -1,3 +1,14 @@
Checking early_exit_find_step_1
Checking early_exit_find_step_3
Checking two_early_exits_same_block
Checking two_early_exits_different_values
Checking two_early_exits_swapped_returns
Checking three_early_exits_same_block
Checking three_early_exits_different_values
Checking two_early_exits_odd_trip_count
Checking two_early_exits_odd_different_values
Checking three_early_exits_odd_trip_count
Checking two_early_exits_small_known_tc
Checking three_early_exits_small_known_tc
Checking two_early_exits_small_different_values
exit 0