Skip to content

Commit 7b5b5dc

Browse files
committed
[SingleSource/Vectorizer] Add runtime checks tests for nested loops
This patch adds tests for nested loops like this: for (int i = 0; i < N; i++) { for (int j = 0; j < N; j++) { a[(i * (N + 1)) + j] += b[(i * N) + j]; } } where we generate runtime checks for the inner loop that do not currently get hoisted above the outer loop. Differential Revision: https://reviews.llvm.org/D154719
1 parent 065b7c0 commit 7b5b5dc

File tree

3 files changed

+168
-0
lines changed

3 files changed

+168
-0
lines changed

SingleSource/UnitTests/Vectorizer/common.h

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,42 @@
1717
_Pragma("clang loop vectorize(enable)") Loop \
1818
};
1919

20+
#define DEFINE_NESTED_SCALAR_AND_VECTOR_FN4(InnerLoopCode) \
21+
auto ScalarFn = [](auto *A, auto *B, unsigned OuterTC, unsigned InnerTC) { \
22+
for (unsigned long i = 0; i < OuterTC; i++) { \
23+
_Pragma("clang loop vectorize(disable) interleave_count(1)") \
24+
for (unsigned long j = 0; j < InnerTC; j++) { \
25+
InnerLoopCode \
26+
} \
27+
} \
28+
}; \
29+
auto VectorFn = [](auto *A, auto *B, unsigned OuterTC, unsigned InnerTC) { \
30+
for (unsigned long i = 0; i < OuterTC; i++) { \
31+
_Pragma("clang loop vectorize(enable)") \
32+
for (unsigned long j = 0; j < InnerTC; j++) { \
33+
InnerLoopCode \
34+
} \
35+
} \
36+
};
37+
38+
#define DEFINE_NESTED_SCALAR_AND_VECTOR_FN5(InnerLoopCode) \
39+
auto ScalarFn = [](auto *A, auto *B, unsigned OuterTC, unsigned InnerTC) { \
40+
for (long i = OuterTC - 1; i >= 0; i--) { \
41+
_Pragma("clang loop vectorize(disable) interleave_count(1)") \
42+
for (unsigned long j = 0; j < InnerTC; j++) { \
43+
InnerLoopCode \
44+
} \
45+
} \
46+
}; \
47+
auto VectorFn = [](auto *A, auto *B, unsigned OuterTC, unsigned InnerTC) { \
48+
for (long i = OuterTC - 1; i >= 0; i--) { \
49+
_Pragma("clang loop vectorize(enable)") \
50+
for (unsigned long j = 0; j < InnerTC; j++) { \
51+
InnerLoopCode \
52+
} \
53+
} \
54+
};
55+
2056
static std::mt19937 rng;
2157

2258
// Initialize arrays A with random numbers.

SingleSource/UnitTests/Vectorizer/runtime-checks.cpp

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
#include "common.h"
99

10+
1011
// Tests for memory runtime checks generated by the vectorizer. Runs scalar and
1112
// vectorized versions of a loop requiring runtime checks on the same inputs
1213
// with pointers to the same buffer using various offsets between reads and
@@ -108,6 +109,51 @@ static void checkOverlappingMemoryTwoRuntimeChecks(Fn3Ty<Ty> ScalarFn,
108109
CheckWithOffsetSecond(i);
109110
}
110111

112+
113+
114+
template <typename Ty>
115+
using Fn4Ty = std::function<void(Ty *, Ty *, unsigned, unsigned)>;
116+
template <typename Ty>
117+
static void checkOverlappingMemoryTwoRuntimeChecksNested(Fn4Ty<Ty> ScalarFn,
118+
Fn4Ty<Ty> VectorFn,
119+
const int OuterTC,
120+
const int InnerTC,
121+
const char *Name) {
122+
std::cout << "Checking " << Name << "\n";
123+
124+
// Make sure we have enough extra elements so we can be liberal with offsets.
125+
const unsigned NumArrayElements = (InnerTC * (OuterTC + 1)) * 8;
126+
std::unique_ptr<Ty[]> Input1(new Ty[NumArrayElements]);
127+
std::unique_ptr<Ty[]> Reference(new Ty[NumArrayElements]);
128+
std::unique_ptr<Ty[]> ToCheck(new Ty[NumArrayElements]);
129+
130+
auto CheckWithOffsetSecond = [&](int Offset) {
131+
init_data(Input1, NumArrayElements);
132+
for (unsigned i = 0; i < NumArrayElements; i++) {
133+
Reference[i] = Input1[i];
134+
ToCheck[i] = Input1[i];
135+
}
136+
137+
// Run scalar function to generate reference output.
138+
Ty *ReferenceStart = &Reference[NumArrayElements / 2];
139+
ScalarFn(ReferenceStart + Offset, ReferenceStart, OuterTC, InnerTC);
140+
141+
// Run vector function to generate output to check.
142+
Ty *StartPtr = &ToCheck[NumArrayElements / 2];
143+
callThroughOptnone(VectorFn, StartPtr + Offset, StartPtr, OuterTC, InnerTC);
144+
145+
// Compare scalar and vector output.
146+
check(Reference, ToCheck, NumArrayElements, Offset);
147+
};
148+
149+
// With a nested loop, sometimes the runtime checks will fail and sometimes
150+
// succeed. For example, with large offsets you'd expect for the first and
151+
// last one or two executions of the inner loop there is no overlap.
152+
for (int i = -(2 * (InnerTC + 1)); i <= (2 * (InnerTC + 1)); i++)
153+
CheckWithOffsetSecond(i);
154+
}
155+
156+
111157
int main(void) {
112158
rng = std::mt19937(15);
113159

@@ -261,5 +307,73 @@ int main(void) {
261307
ScalarFn, VectorFn, "1 read, 2 writes, simple indices, uint64_t");
262308
}
263309

310+
{
311+
DEFINE_NESTED_SCALAR_AND_VECTOR_FN4(
312+
auto X = B[(i * OuterTC) + j];
313+
A[(i * (OuterTC + 1)) + j] = X;
314+
);
315+
checkOverlappingMemoryTwoRuntimeChecksNested<uint8_t>(
316+
ScalarFn, VectorFn, 100, 100, "1 read, 1 write, nested loop (matching trip counts), uint8_t");
317+
checkOverlappingMemoryTwoRuntimeChecksNested<uint32_t>(
318+
ScalarFn, VectorFn, 100, 100, "1 read, 1 write, nested loop (matching trip counts), uint32_t");
319+
checkOverlappingMemoryTwoRuntimeChecksNested<uint64_t>(
320+
ScalarFn, VectorFn, 100, 100, "1 read, 1 write, nested loop (matching trip counts), uint64_t");
321+
322+
checkOverlappingMemoryTwoRuntimeChecksNested<uint8_t>(
323+
ScalarFn, VectorFn, 100, 50, "1 read, 1 write, nested loop (different trip counts), uint8_t");
324+
checkOverlappingMemoryTwoRuntimeChecksNested<uint32_t>(
325+
ScalarFn, VectorFn, 100, 50, "1 read, 1 write, nested loop (different trip counts), uint32_t");
326+
checkOverlappingMemoryTwoRuntimeChecksNested<uint64_t>(
327+
ScalarFn, VectorFn, 100, 50, "1 read, 1 write, nested loop (different trip counts), uint64_t");
328+
}
329+
330+
{
331+
DEFINE_NESTED_SCALAR_AND_VECTOR_FN4(
332+
auto X = B[(i * OuterTC) + j];
333+
A[(i * (OuterTC + 1)) + j] += X;
334+
);
335+
336+
checkOverlappingMemoryTwoRuntimeChecksNested<uint8_t>(
337+
ScalarFn, VectorFn, 100, 100, "2 reads, 1 write, nested loop (matching trip counts), uint8_t");
338+
checkOverlappingMemoryTwoRuntimeChecksNested<uint32_t>(
339+
ScalarFn, VectorFn, 100, 100, "2 reads, 1 write, nested loop (matching trip counts), uint32_t");
340+
checkOverlappingMemoryTwoRuntimeChecksNested<uint64_t>(
341+
ScalarFn, VectorFn, 100, 100, "2 reads, 1 write, nested loop (matching trip counts), uint64_t");
342+
343+
checkOverlappingMemoryTwoRuntimeChecksNested<uint8_t>(
344+
ScalarFn, VectorFn, 100, 50, "2 reads, 1 write, nested loop (different trip counts), uint8_t");
345+
checkOverlappingMemoryTwoRuntimeChecksNested<uint32_t>(
346+
ScalarFn, VectorFn, 100, 50, "2 reads, 1 write, nested loop (different trip counts), uint32_t");
347+
checkOverlappingMemoryTwoRuntimeChecksNested<uint64_t>(
348+
ScalarFn, VectorFn, 100, 50, "2 reads, 1 write, nested loop (different trip counts), uint64_t");
349+
}
350+
351+
{
352+
DEFINE_NESTED_SCALAR_AND_VECTOR_FN5(
353+
auto X = B[(i * OuterTC) + j];
354+
A[(i * (OuterTC + 1)) + j] = X;
355+
);
356+
checkOverlappingMemoryTwoRuntimeChecksNested<uint8_t>(
357+
ScalarFn, VectorFn, 100, 100, "1 read, 1 write, nested loop (decreasing outer iv, matching trip counts), uint8_t");
358+
checkOverlappingMemoryTwoRuntimeChecksNested<uint32_t>(
359+
ScalarFn, VectorFn, 100, 100, "1 read, 1 write, nested loop (decreasing outer iv, matching trip counts), uint32_t");
360+
checkOverlappingMemoryTwoRuntimeChecksNested<uint64_t>(
361+
ScalarFn, VectorFn, 100, 100, "1 read, 1 write, nested loop (decreasing outer iv, matching trip counts), uint64_t");
362+
}
363+
364+
{
365+
DEFINE_NESTED_SCALAR_AND_VECTOR_FN5(
366+
auto X = B[(i * OuterTC) + j];
367+
A[(i * (OuterTC + 1)) + j] += X;
368+
);
369+
370+
checkOverlappingMemoryTwoRuntimeChecksNested<uint8_t>(
371+
ScalarFn, VectorFn, 100, 100, "2 reads, 1 write, nested loop (decreasing outer iv, matching trip counts), uint8_t");
372+
checkOverlappingMemoryTwoRuntimeChecksNested<uint32_t>(
373+
ScalarFn, VectorFn, 100, 100, "2 reads, 1 write, nested loop (decreasing outer iv, matching trip counts), uint32_t");
374+
checkOverlappingMemoryTwoRuntimeChecksNested<uint64_t>(
375+
ScalarFn, VectorFn, 100, 100, "2 reads, 1 write, nested loop (decreasing outer iv, matching trip counts), uint64_t");
376+
}
377+
264378
return 0;
265379
}

SingleSource/UnitTests/Vectorizer/runtime-checks.reference_output

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,4 +28,22 @@ Checking 2 reads, 1 write, simple indices, uint64_t
2828
Checking 1 read, 2 writes, simple indices, uint8_t
2929
Checking 1 read, 2 writes, simple indices, uint32_t
3030
Checking 1 read, 2 writes, simple indices, uint64_t
31+
Checking 1 read, 1 write, nested loop (matching trip counts), uint8_t
32+
Checking 1 read, 1 write, nested loop (matching trip counts), uint32_t
33+
Checking 1 read, 1 write, nested loop (matching trip counts), uint64_t
34+
Checking 1 read, 1 write, nested loop (different trip counts), uint8_t
35+
Checking 1 read, 1 write, nested loop (different trip counts), uint32_t
36+
Checking 1 read, 1 write, nested loop (different trip counts), uint64_t
37+
Checking 2 reads, 1 write, nested loop (matching trip counts), uint8_t
38+
Checking 2 reads, 1 write, nested loop (matching trip counts), uint32_t
39+
Checking 2 reads, 1 write, nested loop (matching trip counts), uint64_t
40+
Checking 2 reads, 1 write, nested loop (different trip counts), uint8_t
41+
Checking 2 reads, 1 write, nested loop (different trip counts), uint32_t
42+
Checking 2 reads, 1 write, nested loop (different trip counts), uint64_t
43+
Checking 1 read, 1 write, nested loop (decreasing outer iv, matching trip counts), uint8_t
44+
Checking 1 read, 1 write, nested loop (decreasing outer iv, matching trip counts), uint32_t
45+
Checking 1 read, 1 write, nested loop (decreasing outer iv, matching trip counts), uint64_t
46+
Checking 2 reads, 1 write, nested loop (decreasing outer iv, matching trip counts), uint8_t
47+
Checking 2 reads, 1 write, nested loop (decreasing outer iv, matching trip counts), uint32_t
48+
Checking 2 reads, 1 write, nested loop (decreasing outer iv, matching trip counts), uint64_t
3149
exit 0

0 commit comments

Comments
 (0)