|
7 | 7 |
|
8 | 8 | #include "common.h"
|
9 | 9 |
|
| 10 | + |
10 | 11 | // Tests for memory runtime checks generated by the vectorizer. Runs scalar and
|
11 | 12 | // vectorized versions of a loop requiring runtime checks on the same inputs
|
12 | 13 | // with pointers to the same buffer using various offsets between reads and
|
@@ -108,6 +109,51 @@ static void checkOverlappingMemoryTwoRuntimeChecks(Fn3Ty<Ty> ScalarFn,
|
108 | 109 | CheckWithOffsetSecond(i);
|
109 | 110 | }
|
110 | 111 |
|
| 112 | + |
| 113 | + |
| 114 | +template <typename Ty> |
| 115 | +using Fn4Ty = std::function<void(Ty *, Ty *, unsigned, unsigned)>; |
| 116 | +template <typename Ty> |
| 117 | +static void checkOverlappingMemoryTwoRuntimeChecksNested(Fn4Ty<Ty> ScalarFn, |
| 118 | + Fn4Ty<Ty> VectorFn, |
| 119 | + const int OuterTC, |
| 120 | + const int InnerTC, |
| 121 | + const char *Name) { |
| 122 | + std::cout << "Checking " << Name << "\n"; |
| 123 | + |
| 124 | + // Make sure we have enough extra elements so we can be liberal with offsets. |
| 125 | + const unsigned NumArrayElements = (InnerTC * (OuterTC + 1)) * 8; |
| 126 | + std::unique_ptr<Ty[]> Input1(new Ty[NumArrayElements]); |
| 127 | + std::unique_ptr<Ty[]> Reference(new Ty[NumArrayElements]); |
| 128 | + std::unique_ptr<Ty[]> ToCheck(new Ty[NumArrayElements]); |
| 129 | + |
| 130 | + auto CheckWithOffsetSecond = [&](int Offset) { |
| 131 | + init_data(Input1, NumArrayElements); |
| 132 | + for (unsigned i = 0; i < NumArrayElements; i++) { |
| 133 | + Reference[i] = Input1[i]; |
| 134 | + ToCheck[i] = Input1[i]; |
| 135 | + } |
| 136 | + |
| 137 | + // Run scalar function to generate reference output. |
| 138 | + Ty *ReferenceStart = &Reference[NumArrayElements / 2]; |
| 139 | + ScalarFn(ReferenceStart + Offset, ReferenceStart, OuterTC, InnerTC); |
| 140 | + |
| 141 | + // Run vector function to generate output to check. |
| 142 | + Ty *StartPtr = &ToCheck[NumArrayElements / 2]; |
| 143 | + callThroughOptnone(VectorFn, StartPtr + Offset, StartPtr, OuterTC, InnerTC); |
| 144 | + |
| 145 | + // Compare scalar and vector output. |
| 146 | + check(Reference, ToCheck, NumArrayElements, Offset); |
| 147 | + }; |
| 148 | + |
| 149 | + // With a nested loop, sometimes the runtime checks will fail and sometimes |
| 150 | + // succeed. For example, with large offsets you'd expect for the first and |
| 151 | + // last one or two executions of the inner loop there is no overlap. |
| 152 | + for (int i = -(2 * (InnerTC + 1)); i <= (2 * (InnerTC + 1)); i++) |
| 153 | + CheckWithOffsetSecond(i); |
| 154 | +} |
| 155 | + |
| 156 | + |
111 | 157 | int main(void) {
|
112 | 158 | rng = std::mt19937(15);
|
113 | 159 |
|
@@ -261,5 +307,73 @@ int main(void) {
|
261 | 307 | ScalarFn, VectorFn, "1 read, 2 writes, simple indices, uint64_t");
|
262 | 308 | }
|
263 | 309 |
|
| 310 | + { |
| 311 | + DEFINE_NESTED_SCALAR_AND_VECTOR_FN4( |
| 312 | + auto X = B[(i * OuterTC) + j]; |
| 313 | + A[(i * (OuterTC + 1)) + j] = X; |
| 314 | + ); |
| 315 | + checkOverlappingMemoryTwoRuntimeChecksNested<uint8_t>( |
| 316 | + ScalarFn, VectorFn, 100, 100, "1 read, 1 write, nested loop (matching trip counts), uint8_t"); |
| 317 | + checkOverlappingMemoryTwoRuntimeChecksNested<uint32_t>( |
| 318 | + ScalarFn, VectorFn, 100, 100, "1 read, 1 write, nested loop (matching trip counts), uint32_t"); |
| 319 | + checkOverlappingMemoryTwoRuntimeChecksNested<uint64_t>( |
| 320 | + ScalarFn, VectorFn, 100, 100, "1 read, 1 write, nested loop (matching trip counts), uint64_t"); |
| 321 | + |
| 322 | + checkOverlappingMemoryTwoRuntimeChecksNested<uint8_t>( |
| 323 | + ScalarFn, VectorFn, 100, 50, "1 read, 1 write, nested loop (different trip counts), uint8_t"); |
| 324 | + checkOverlappingMemoryTwoRuntimeChecksNested<uint32_t>( |
| 325 | + ScalarFn, VectorFn, 100, 50, "1 read, 1 write, nested loop (different trip counts), uint32_t"); |
| 326 | + checkOverlappingMemoryTwoRuntimeChecksNested<uint64_t>( |
| 327 | + ScalarFn, VectorFn, 100, 50, "1 read, 1 write, nested loop (different trip counts), uint64_t"); |
| 328 | + } |
| 329 | + |
| 330 | + { |
| 331 | + DEFINE_NESTED_SCALAR_AND_VECTOR_FN4( |
| 332 | + auto X = B[(i * OuterTC) + j]; |
| 333 | + A[(i * (OuterTC + 1)) + j] += X; |
| 334 | + ); |
| 335 | + |
| 336 | + checkOverlappingMemoryTwoRuntimeChecksNested<uint8_t>( |
| 337 | + ScalarFn, VectorFn, 100, 100, "2 reads, 1 write, nested loop (matching trip counts), uint8_t"); |
| 338 | + checkOverlappingMemoryTwoRuntimeChecksNested<uint32_t>( |
| 339 | + ScalarFn, VectorFn, 100, 100, "2 reads, 1 write, nested loop (matching trip counts), uint32_t"); |
| 340 | + checkOverlappingMemoryTwoRuntimeChecksNested<uint64_t>( |
| 341 | + ScalarFn, VectorFn, 100, 100, "2 reads, 1 write, nested loop (matching trip counts), uint64_t"); |
| 342 | + |
| 343 | + checkOverlappingMemoryTwoRuntimeChecksNested<uint8_t>( |
| 344 | + ScalarFn, VectorFn, 100, 50, "2 reads, 1 write, nested loop (different trip counts), uint8_t"); |
| 345 | + checkOverlappingMemoryTwoRuntimeChecksNested<uint32_t>( |
| 346 | + ScalarFn, VectorFn, 100, 50, "2 reads, 1 write, nested loop (different trip counts), uint32_t"); |
| 347 | + checkOverlappingMemoryTwoRuntimeChecksNested<uint64_t>( |
| 348 | + ScalarFn, VectorFn, 100, 50, "2 reads, 1 write, nested loop (different trip counts), uint64_t"); |
| 349 | + } |
| 350 | + |
| 351 | + { |
| 352 | + DEFINE_NESTED_SCALAR_AND_VECTOR_FN5( |
| 353 | + auto X = B[(i * OuterTC) + j]; |
| 354 | + A[(i * (OuterTC + 1)) + j] = X; |
| 355 | + ); |
| 356 | + checkOverlappingMemoryTwoRuntimeChecksNested<uint8_t>( |
| 357 | + ScalarFn, VectorFn, 100, 100, "1 read, 1 write, nested loop (decreasing outer iv, matching trip counts), uint8_t"); |
| 358 | + checkOverlappingMemoryTwoRuntimeChecksNested<uint32_t>( |
| 359 | + ScalarFn, VectorFn, 100, 100, "1 read, 1 write, nested loop (decreasing outer iv, matching trip counts), uint32_t"); |
| 360 | + checkOverlappingMemoryTwoRuntimeChecksNested<uint64_t>( |
| 361 | + ScalarFn, VectorFn, 100, 100, "1 read, 1 write, nested loop (decreasing outer iv, matching trip counts), uint64_t"); |
| 362 | + } |
| 363 | + |
| 364 | + { |
| 365 | + DEFINE_NESTED_SCALAR_AND_VECTOR_FN5( |
| 366 | + auto X = B[(i * OuterTC) + j]; |
| 367 | + A[(i * (OuterTC + 1)) + j] += X; |
| 368 | + ); |
| 369 | + |
| 370 | + checkOverlappingMemoryTwoRuntimeChecksNested<uint8_t>( |
| 371 | + ScalarFn, VectorFn, 100, 100, "2 reads, 1 write, nested loop (decreasing outer iv, matching trip counts), uint8_t"); |
| 372 | + checkOverlappingMemoryTwoRuntimeChecksNested<uint32_t>( |
| 373 | + ScalarFn, VectorFn, 100, 100, "2 reads, 1 write, nested loop (decreasing outer iv, matching trip counts), uint32_t"); |
| 374 | + checkOverlappingMemoryTwoRuntimeChecksNested<uint64_t>( |
| 375 | + ScalarFn, VectorFn, 100, 100, "2 reads, 1 write, nested loop (decreasing outer iv, matching trip counts), uint64_t"); |
| 376 | + } |
| 377 | + |
264 | 378 | return 0;
|
265 | 379 | }
|
0 commit comments