[LLVM21][NFC] Update LIT test to account for improvements in SROA.

karthik-senthil · aneshlya · commit 90fb80d6db91 · 2025-10-20T16:21:14.000-07:00
SROA was recently updated to optimize stores that fill different parts of same alloca - llvm/llvm-project#152793. This affects generated IR/ASM by ISPC for a LIT test that was used to validate if 2 versions of source generate identical ASM. This PR updates the test, and creates a duplicate for LLVM_21_0+ builds.
diff --git a/tests/lit-tests/1678_unroll.ispc b/tests/lit-tests/1678_unroll.ispc
@@ -1,6 +1,8 @@
 // RUN: %{ispc} %s --target=avx2-i32x8 --arch=x86-64 --nostdlib --emit-asm -o - | FileCheck %s
 
-// REQUIRES: X86_ENABLED
+// Test produces different IR/ASM with LLVM_21_0+ due to SROA improvements. Check
+// the LIT test 1678_unroll_llvm_21.ll for more details.
+// REQUIRES: X86_ENABLED && !LLVM_21_0+
 
 // The goal of this test is to check that code generation for both versions is the same.
 
diff --git a/tests/lit-tests/1678_unroll_llvm_21.ispc b/tests/lit-tests/1678_unroll_llvm_21.ispc
@@ -0,0 +1,75 @@
+// RUN: %{ispc} %s --target=avx2-i32x8 --arch=x86-64 --nostdlib --emit-asm -o - | FileCheck %s
+
+// Test produces different IR/ASM with LLVM_21_0+ due to SROA improvements. Multiple stores
+// filling same alloca are tree-optimized - https://github.com/llvm/llvm-project/pull/152793
+// REQUIRES: X86_ENABLED && LLVM_21_0+
+
+// The goal of this test is to check that code generation for both versions is the same.
+
+struct FVector4 {
+    float<4> V;
+};
+
+struct WideFVector4 {
+    float V[programCount];
+};
+
+unmasked inline uniform WideFVector4 operator+(const uniform WideFVector4 &A, const uniform WideFVector4 &B) {
+    uniform WideFVector4 Result;
+    Result.V[programIndex] = A.V[programIndex] + B.V[programIndex];
+    return Result;
+}
+
+unmasked inline void LoadWideFVector4(uniform FVector4 *uniform DstPtr, const uniform FVector4 *uniform SrcPtr) {
+    *DstPtr = *SrcPtr;
+    *(DstPtr + 1) = *(SrcPtr + 1);
+}
+
+unmasked inline void StoreWideFVector4(uniform FVector4 *uniform DstPtr, const uniform FVector4 *uniform SrcPtr) {
+    *DstPtr = *SrcPtr;
+    *(DstPtr + 1) = *(SrcPtr + 1);
+}
+
+// CHECK-LABEL: AddWide___
+// CHECK-COUNT-2: vmovaps
+// CHECK-NOT: vmovups
+// CHECK-COUNT-2: vaddps
+// CHECK-COUNT-2: vmovaps
+// CHECK-NOT: vmovaps
+unmasked void AddWide(uniform FVector4 Result[], uniform FVector4 Source1[], uniform FVector4 Source2[]) {
+    uniform int Index = 0;
+    uniform WideFVector4 S1, S2;
+
+    LoadWideFVector4((uniform FVector4 * uniform) & S1, (uniform FVector4 * uniform) & Source1[Index]);
+    LoadWideFVector4((uniform FVector4 * uniform) & S2, (uniform FVector4 * uniform) & Source2[Index]);
+    const uniform WideFVector4 R = S1 + S2;
+    StoreWideFVector4((uniform FVector4 * uniform) & Result[Index], (uniform FVector4 * uniform) & R);
+}
+
+unmasked inline void LoadWideFVector4_2(uniform FVector4 *uniform DstPtr, const uniform FVector4 *uniform SrcPtr) {
+    for (uniform int i = 0; i < (programCount / 4); i++) {
+        *(DstPtr + i) = *(SrcPtr + i);
+    }
+}
+
+unmasked inline void StoreWideFVector4_2(uniform FVector4 *uniform DstPtr, const uniform FVector4 *uniform SrcPtr) {
+    for (uniform int i = 0; i < (programCount / 4); i++) {
+        *(DstPtr + i) = *(SrcPtr + i);
+    }
+}
+
+// CHECK-LABEL: AddWide_2___
+// CHECK-COUNT-2: vmovaps
+// CHECK-NOT: vmovups
+// CHECK-COUNT-2: vaddps
+// CHECK-COUNT-2: vmovaps
+// CHECK-NOT: vmovaps
+unmasked void AddWide_2(uniform FVector4 Result[], uniform FVector4 Source1[], uniform FVector4 Source2[]) {
+    uniform int Index = 0;
+    uniform WideFVector4 S1, S2;
+
+    LoadWideFVector4_2((uniform FVector4 * uniform) & S1, (uniform FVector4 * uniform) & Source1[Index]);
+    LoadWideFVector4_2((uniform FVector4 * uniform) & S2, (uniform FVector4 * uniform) & Source2[Index]);
+    const uniform WideFVector4 R = S1 + S2;
+    StoreWideFVector4_2((uniform FVector4 * uniform) & Result[Index], (uniform FVector4 * uniform) & R);
+}