Skip to content

Commit 90fb80d

Browse files
karthik-senthilaneshlya
authored andcommitted
[LLVM21][NFC] Update LIT test to account for improvements in SROA.
SROA was recently updated to optimize stores that fill different parts of same alloca - llvm/llvm-project#152793. This affects generated IR/ASM by ISPC for a LIT test that was used to validate if 2 versions of source generate identical ASM. This PR updates the test, and creates a duplicate for LLVM_21_0+ builds.
1 parent 0481cb3 commit 90fb80d

File tree

2 files changed

+78
-1
lines changed

2 files changed

+78
-1
lines changed

tests/lit-tests/1678_unroll.ispc

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
// RUN: %{ispc} %s --target=avx2-i32x8 --arch=x86-64 --nostdlib --emit-asm -o - | FileCheck %s
22

3-
// REQUIRES: X86_ENABLED
3+
// Test produces different IR/ASM with LLVM_21_0+ due to SROA improvements. Check
4+
// the LIT test 1678_unroll_llvm_21.ll for more details.
5+
// REQUIRES: X86_ENABLED && !LLVM_21_0+
46

57
// The goal of this test is to check that code generation for both versions is the same.
68

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
// RUN: %{ispc} %s --target=avx2-i32x8 --arch=x86-64 --nostdlib --emit-asm -o - | FileCheck %s
2+
3+
// Test produces different IR/ASM with LLVM_21_0+ due to SROA improvements. Multiple stores
4+
// filling same alloca are tree-optimized - https://github.com/llvm/llvm-project/pull/152793
5+
// REQUIRES: X86_ENABLED && LLVM_21_0+
6+
7+
// The goal of this test is to check that code generation for both versions is the same.
8+
9+
struct FVector4 {
10+
float<4> V;
11+
};
12+
13+
struct WideFVector4 {
14+
float V[programCount];
15+
};
16+
17+
unmasked inline uniform WideFVector4 operator+(const uniform WideFVector4 &A, const uniform WideFVector4 &B) {
18+
uniform WideFVector4 Result;
19+
Result.V[programIndex] = A.V[programIndex] + B.V[programIndex];
20+
return Result;
21+
}
22+
23+
unmasked inline void LoadWideFVector4(uniform FVector4 *uniform DstPtr, const uniform FVector4 *uniform SrcPtr) {
24+
*DstPtr = *SrcPtr;
25+
*(DstPtr + 1) = *(SrcPtr + 1);
26+
}
27+
28+
unmasked inline void StoreWideFVector4(uniform FVector4 *uniform DstPtr, const uniform FVector4 *uniform SrcPtr) {
29+
*DstPtr = *SrcPtr;
30+
*(DstPtr + 1) = *(SrcPtr + 1);
31+
}
32+
33+
// CHECK-LABEL: AddWide___
34+
// CHECK-COUNT-2: vmovaps
35+
// CHECK-NOT: vmovups
36+
// CHECK-COUNT-2: vaddps
37+
// CHECK-COUNT-2: vmovaps
38+
// CHECK-NOT: vmovaps
39+
unmasked void AddWide(uniform FVector4 Result[], uniform FVector4 Source1[], uniform FVector4 Source2[]) {
40+
uniform int Index = 0;
41+
uniform WideFVector4 S1, S2;
42+
43+
LoadWideFVector4((uniform FVector4 * uniform) & S1, (uniform FVector4 * uniform) & Source1[Index]);
44+
LoadWideFVector4((uniform FVector4 * uniform) & S2, (uniform FVector4 * uniform) & Source2[Index]);
45+
const uniform WideFVector4 R = S1 + S2;
46+
StoreWideFVector4((uniform FVector4 * uniform) & Result[Index], (uniform FVector4 * uniform) & R);
47+
}
48+
49+
unmasked inline void LoadWideFVector4_2(uniform FVector4 *uniform DstPtr, const uniform FVector4 *uniform SrcPtr) {
50+
for (uniform int i = 0; i < (programCount / 4); i++) {
51+
*(DstPtr + i) = *(SrcPtr + i);
52+
}
53+
}
54+
55+
unmasked inline void StoreWideFVector4_2(uniform FVector4 *uniform DstPtr, const uniform FVector4 *uniform SrcPtr) {
56+
for (uniform int i = 0; i < (programCount / 4); i++) {
57+
*(DstPtr + i) = *(SrcPtr + i);
58+
}
59+
}
60+
61+
// CHECK-LABEL: AddWide_2___
62+
// CHECK-COUNT-2: vmovaps
63+
// CHECK-NOT: vmovups
64+
// CHECK-COUNT-2: vaddps
65+
// CHECK-COUNT-2: vmovaps
66+
// CHECK-NOT: vmovaps
67+
unmasked void AddWide_2(uniform FVector4 Result[], uniform FVector4 Source1[], uniform FVector4 Source2[]) {
68+
uniform int Index = 0;
69+
uniform WideFVector4 S1, S2;
70+
71+
LoadWideFVector4_2((uniform FVector4 * uniform) & S1, (uniform FVector4 * uniform) & Source1[Index]);
72+
LoadWideFVector4_2((uniform FVector4 * uniform) & S2, (uniform FVector4 * uniform) & Source2[Index]);
73+
const uniform WideFVector4 R = S1 + S2;
74+
StoreWideFVector4_2((uniform FVector4 * uniform) & Result[Index], (uniform FVector4 * uniform) & R);
75+
}

0 commit comments

Comments
 (0)