Skip to content

Conversation

@preames
Copy link
Collaborator

@preames preames commented Jul 16, 2025

If we're checking to see if a number is a multiple of a small constant, we need to be sure the multiply doesn't overflow for the mul logic to hold. The VL is a unsigned number, so we care about unsigned overflow. Once we've proven a number of a multiple, we can also use an
exact udiv as we know we're not discarding any bits.

This fixes what is technically a miscompile with EVL vectorization, but I doubt we'd ever have seen it in practice since most EVLs are going to much less than UINT_MAX.

If we're checking to see if a number is a multiple of a small constant, we
need to be sure the multiply doesn't overflow for the mul logic to hold.
The VL is a unsigned number, so we care about unsigned overflow.

Once we've proven a number of a multiple, we can also use an exact udiv
as we know we're not discarding any bits.

This fixes what is technically a miscompile with EVL vectorization, but
I doubt we'd ever have seen it in practice since most EVLs are going to
much less than UINT_MAX.
@llvmbot
Copy link
Member

llvmbot commented Jul 16, 2025

@llvm/pr-subscribers-backend-risc-v

Author: Philip Reames (preames)

Changes

If we're checking to see if a number is a multiple of a small constant, we need to be sure the multiply doesn't overflow for the mul logic to hold. The VL is a unsigned number, so we care about unsigned overflow. Once we've proven a number of a multiple, we can also use an
exact udiv as we know we're not discarding any bits.

This fixes what is technically a miscompile with EVL vectorization, but I doubt we'd ever have seen it in practice since most EVLs are going to much less than UINT_MAX.


Patch is 23.80 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/149205.diff

2 Files Affected:

  • (modified) llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp (+9-11)
  • (modified) llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll (+27-137)
diff --git a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
index 025054d5a2a60..1be59e1ce4c1a 100644
--- a/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
+++ b/llvm/lib/Target/RISCV/RISCVInterleavedAccess.cpp
@@ -244,7 +244,7 @@ static bool isMultipleOfN(const Value *V, const DataLayout &DL, unsigned N) {
   // Right now we're only recognizing the simplest pattern.
   uint64_t C;
   if (match(V, m_CombineOr(m_ConstantInt(C),
-                           m_c_Mul(m_Value(), m_ConstantInt(C)))) &&
+                           m_NUWMul(m_Value(), m_ConstantInt(C)))) &&
       C && C % N == 0)
     return true;
 
@@ -296,10 +296,8 @@ bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(
     if (!isMultipleOfN(WideEVL, Load->getDataLayout(), Factor))
       return false;
 
-    VL = Builder.CreateZExt(
-        Builder.CreateUDiv(WideEVL,
-                           ConstantInt::get(WideEVL->getType(), Factor)),
-        XLenTy);
+    auto *FactorC = ConstantInt::get(WideEVL->getType(), Factor);
+    VL = Builder.CreateZExt(Builder.CreateExactUDiv(WideEVL, FactorC), XLenTy);
   }
 
   Type *PtrTy = Ptr->getType();
@@ -465,9 +463,9 @@ bool RISCVTargetLowering::lowerInterleavedVPLoad(
 
   auto *PtrTy = Load->getArgOperand(0)->getType();
   auto *XLenTy = Type::getIntNTy(Load->getContext(), Subtarget.getXLen());
-  Value *EVL = Builder.CreateZExt(
-      Builder.CreateUDiv(WideEVL, ConstantInt::get(WideEVL->getType(), Factor)),
-      XLenTy);
+  auto *FactorC = ConstantInt::get(WideEVL->getType(), Factor);
+  Value *EVL =
+      Builder.CreateZExt(Builder.CreateExactUDiv(WideEVL, FactorC), XLenTy);
 
   Value *Return = nullptr;
   if (isa<FixedVectorType>(VTy)) {
@@ -572,9 +570,9 @@ bool RISCVTargetLowering::lowerInterleavedVPStore(
 
   auto *PtrTy = Store->getArgOperand(1)->getType();
   auto *XLenTy = Type::getIntNTy(Store->getContext(), Subtarget.getXLen());
-  Value *EVL = Builder.CreateZExt(
-      Builder.CreateUDiv(WideEVL, ConstantInt::get(WideEVL->getType(), Factor)),
-      XLenTy);
+  auto *FactorC = ConstantInt::get(WideEVL->getType(), Factor);
+  Value *EVL =
+      Builder.CreateZExt(Builder.CreateExactUDiv(WideEVL, FactorC), XLenTy);
 
   if (isa<FixedVectorType>(VTy)) {
     SmallVector<Value *, 8> Operands(InterleaveOperands);
diff --git a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll
index 27ecbe56bda42..8cfa237858aca 100644
--- a/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll
+++ b/llvm/test/CodeGen/RISCV/rvv/vp-vector-interleaved-access.ll
@@ -18,7 +18,7 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>} @load_factor2_v2(ptr %ptr, i32 %
 ; RV64-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
 ; RV64-NEXT:    vlseg2e32.v v8, (a0)
 ; RV64-NEXT:    ret
-  %rvl = mul i32 %evl, 2
+  %rvl = mul nuw i32 %evl, 2
   %wide.masked.load = call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr %ptr, <vscale x 4 x i1> splat (i1 true), i32 %rvl)
   %deinterleaved.results = call { <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.vector.deinterleave2.nxv4i32(<vscale x 4 x i32> %wide.masked.load)
   %t0 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32> } %deinterleaved.results, 0
@@ -31,30 +31,18 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>} @load_factor2_v2(ptr %ptr, i32 %
 define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>} @load_factor3_v2(ptr %ptr, i32 %evl) {
 ; RV32-LABEL: load_factor3_v2:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    slli a2, a1, 1
-; RV32-NEXT:    add a1, a2, a1
-; RV32-NEXT:    lui a2, 699051
-; RV32-NEXT:    addi a2, a2, -1365
-; RV32-NEXT:    mulhu a1, a1, a2
-; RV32-NEXT:    srli a1, a1, 1
 ; RV32-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
 ; RV32-NEXT:    vlseg3e32.v v8, (a0)
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: load_factor3_v2:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    slli a2, a1, 1
-; RV64-NEXT:    add a1, a2, a1
-; RV64-NEXT:    lui a2, 699051
-; RV64-NEXT:    addi a2, a2, -1365
 ; RV64-NEXT:    slli a1, a1, 32
-; RV64-NEXT:    slli a2, a2, 32
-; RV64-NEXT:    mulhu a1, a1, a2
-; RV64-NEXT:    srli a1, a1, 33
+; RV64-NEXT:    srli a1, a1, 32
 ; RV64-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
 ; RV64-NEXT:    vlseg3e32.v v8, (a0)
 ; RV64-NEXT:    ret
-  %rvl = mul i32 %evl, 3
+  %rvl = mul nuw i32 %evl, 3
   %wide.masked.load = call <vscale x 6 x i32> @llvm.vp.load(ptr %ptr, <vscale x 6 x i1> splat (i1 true), i32 %rvl)
   %deinterleaved.results = call { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.vector.deinterleave3(<vscale x 6 x i32> %wide.masked.load)
   %t0 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } %deinterleaved.results, 0
@@ -69,12 +57,6 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>} @load_factor
 define {<vscale x 2 x i32>, <vscale x 2 x i32>} @load_factor3_partial(ptr %ptr, i32 %evl) {
 ; RV32-LABEL: load_factor3_partial:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    slli a2, a1, 1
-; RV32-NEXT:    add a1, a2, a1
-; RV32-NEXT:    lui a2, 699051
-; RV32-NEXT:    addi a2, a2, -1365
-; RV32-NEXT:    mulhu a1, a1, a2
-; RV32-NEXT:    srli a1, a1, 1
 ; RV32-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
 ; RV32-NEXT:    vlseg3e32.v v7, (a0)
 ; RV32-NEXT:    vmv1r.v v8, v7
@@ -82,19 +64,13 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>} @load_factor3_partial(ptr %ptr,
 ;
 ; RV64-LABEL: load_factor3_partial:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    slli a2, a1, 1
-; RV64-NEXT:    add a1, a2, a1
-; RV64-NEXT:    lui a2, 699051
-; RV64-NEXT:    addi a2, a2, -1365
 ; RV64-NEXT:    slli a1, a1, 32
-; RV64-NEXT:    slli a2, a2, 32
-; RV64-NEXT:    mulhu a1, a1, a2
-; RV64-NEXT:    srli a1, a1, 33
+; RV64-NEXT:    srli a1, a1, 32
 ; RV64-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
 ; RV64-NEXT:    vlseg3e32.v v7, (a0)
 ; RV64-NEXT:    vmv1r.v v8, v7
 ; RV64-NEXT:    ret
-  %rvl = mul i32 %evl, 3
+  %rvl = mul nuw i32 %evl, 3
   %wide.masked.load = call <vscale x 6 x i32> @llvm.vp.load(ptr %ptr, <vscale x 6 x i1> splat (i1 true), i32 %rvl)
   %deinterleaved.results = call { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.vector.deinterleave3(<vscale x 6 x i32> %wide.masked.load)
   %t0 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } %deinterleaved.results, 0
@@ -111,12 +87,6 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>} @load_factor3_no_extract(ptr %pt
 ; RV32-NEXT:    li a2, 12
 ; RV32-NEXT:    beq a1, a2, .LBB3_2
 ; RV32-NEXT:  # %bb.1: # %bb0
-; RV32-NEXT:    slli a2, a1, 1
-; RV32-NEXT:    add a1, a2, a1
-; RV32-NEXT:    lui a2, 699051
-; RV32-NEXT:    addi a2, a2, -1365
-; RV32-NEXT:    mulhu a1, a1, a2
-; RV32-NEXT:    srli a1, a1, 1
 ; RV32-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
 ; RV32-NEXT:    vlseg3e32.v v7, (a0)
 ; RV32-NEXT:    j .LBB3_3
@@ -134,14 +104,8 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>} @load_factor3_no_extract(ptr %pt
 ; RV64-NEXT:    li a3, 12
 ; RV64-NEXT:    beq a2, a3, .LBB3_2
 ; RV64-NEXT:  # %bb.1: # %bb0
-; RV64-NEXT:    slli a2, a1, 1
-; RV64-NEXT:    add a1, a2, a1
-; RV64-NEXT:    lui a2, 699051
-; RV64-NEXT:    addi a2, a2, -1365
 ; RV64-NEXT:    slli a1, a1, 32
-; RV64-NEXT:    slli a2, a2, 32
-; RV64-NEXT:    mulhu a1, a1, a2
-; RV64-NEXT:    srli a1, a1, 33
+; RV64-NEXT:    srli a1, a1, 32
 ; RV64-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
 ; RV64-NEXT:    vlseg3e32.v v7, (a0)
 ; RV64-NEXT:    j .LBB3_3
@@ -156,7 +120,7 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>} @load_factor3_no_extract(ptr %pt
   br i1 %p, label %bb0, label %bb1
 
 bb0:
-  %rvl.0 = mul i32 %evl, 3
+  %rvl.0 = mul nuw i32 %evl, 3
   %wide.load.0 = call <vscale x 6 x i32> @llvm.vp.load(ptr %ptr, <vscale x 6 x i1> splat (i1 true), i32 %rvl.0)
   %deinterleaved.results.0 = call { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.vector.deinterleave3(<vscale x 6 x i32> %wide.load.0)
   br label %merge
@@ -191,7 +155,7 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2
 ; RV64-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
 ; RV64-NEXT:    vlseg4e32.v v8, (a0)
 ; RV64-NEXT:    ret
-  %rvl = mul i32 %evl, 4
+  %rvl = mul nuw i32 %evl, 4
   %wide.masked.load = call <vscale x 8 x i32> @llvm.vp.load.nxv8i32.p0(ptr %ptr, <vscale x 8 x i1> splat (i1 true), i32 %rvl)
   %d = call { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.vector.deinterleave4.nxv8i32(<vscale x 8 x i32> %wide.masked.load)
   %t0 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } %d, 0
@@ -209,30 +173,18 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2
 define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>} @load_factor5_v2(ptr %ptr, i32 %evl) {
 ; RV32-LABEL: load_factor5_v2:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    slli a2, a1, 2
-; RV32-NEXT:    add a1, a2, a1
-; RV32-NEXT:    lui a2, 838861
-; RV32-NEXT:    addi a2, a2, -819
-; RV32-NEXT:    mulhu a1, a1, a2
-; RV32-NEXT:    srli a1, a1, 2
 ; RV32-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
 ; RV32-NEXT:    vlseg5e32.v v8, (a0)
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: load_factor5_v2:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    slli a2, a1, 2
-; RV64-NEXT:    add a1, a2, a1
-; RV64-NEXT:    lui a2, 838861
-; RV64-NEXT:    addi a2, a2, -819
 ; RV64-NEXT:    slli a1, a1, 32
-; RV64-NEXT:    slli a2, a2, 32
-; RV64-NEXT:    mulhu a1, a1, a2
-; RV64-NEXT:    srli a1, a1, 34
+; RV64-NEXT:    srli a1, a1, 32
 ; RV64-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
 ; RV64-NEXT:    vlseg5e32.v v8, (a0)
 ; RV64-NEXT:    ret
-  %rvl = mul i32 %evl, 5
+  %rvl = mul nuw i32 %evl, 5
   %wide.masked.load = call <vscale x 10 x i32> @llvm.vp.load(ptr %ptr, <vscale x 10 x i1> splat (i1 true), i32 %rvl)
   %deinterleaved.results = call { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.vector.deinterleave5(<vscale x 10 x i32> %wide.masked.load)
   %t0 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } %deinterleaved.results, 0
@@ -251,37 +203,18 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2
 define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>} @load_factor7_v2(ptr %ptr, i32 %evl) {
 ; RV32-LABEL: load_factor7_v2:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    slli a2, a1, 3
-; RV32-NEXT:    sub a2, a2, a1
-; RV32-NEXT:    lui a1, 149797
-; RV32-NEXT:    addi a1, a1, -1755
-; RV32-NEXT:    mulhu a1, a2, a1
-; RV32-NEXT:    sub a2, a2, a1
-; RV32-NEXT:    srli a2, a2, 1
-; RV32-NEXT:    add a1, a2, a1
-; RV32-NEXT:    srli a1, a1, 2
 ; RV32-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
 ; RV32-NEXT:    vlseg7e32.v v8, (a0)
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: load_factor7_v2:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    slli a2, a1, 3
-; RV64-NEXT:    lui a3, 149797
-; RV64-NEXT:    subw a2, a2, a1
-; RV64-NEXT:    addi a1, a3, -1755
-; RV64-NEXT:    slli a3, a2, 32
 ; RV64-NEXT:    slli a1, a1, 32
-; RV64-NEXT:    mulhu a1, a3, a1
 ; RV64-NEXT:    srli a1, a1, 32
-; RV64-NEXT:    subw a2, a2, a1
-; RV64-NEXT:    srliw a2, a2, 1
-; RV64-NEXT:    add a1, a2, a1
-; RV64-NEXT:    srli a1, a1, 2
 ; RV64-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
 ; RV64-NEXT:    vlseg7e32.v v8, (a0)
 ; RV64-NEXT:    ret
-  %rvl = mul i32 %evl, 7
+  %rvl = mul nuw i32 %evl, 7
   %wide.masked.load = call <vscale x 14 x i32> @llvm.vp.load(ptr %ptr, <vscale x 14 x i1> splat (i1 true), i32 %rvl)
   %deinterleaved.results = call { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.vector.deinterleave7(<vscale x 14 x i32> %wide.masked.load)
   %t0 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } %deinterleaved.results, 0
@@ -317,7 +250,7 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2
 ; RV64-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
 ; RV64-NEXT:    vlseg8e32.v v8, (a0)
 ; RV64-NEXT:    ret
-  %rvl = mul i32 %evl, 8
+  %rvl = mul nuw i32 %evl, 8
   %wide.masked.load = call <vscale x 16 x i32> @llvm.vp.load.nxv16i32.p0(ptr %ptr, <vscale x 16 x i1> splat (i1 true), i32 %rvl)
   %d = call { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.vector.deinterleave8.nxv16i32(<vscale x 16 x i32> %wide.masked.load)
   %t0 = extractvalue { <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32> } %d, 0
@@ -356,7 +289,7 @@ define void @store_factor2_v2(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, pt
 ; RV64-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
 ; RV64-NEXT:    vsseg2e32.v v8, (a0)
 ; RV64-NEXT:    ret
-  %rvl = mul i32 %evl, 2
+  %rvl = mul nuw i32 %evl, 2
   %interleaved.vec = call <vscale x 2 x i32> @llvm.vector.interleave2.nxv2i32(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1)
   call void @llvm.vp.store.nxv2i32.p0(<vscale x 2 x i32> %interleaved.vec, ptr %ptr, <vscale x 2 x i1> splat (i1 true), i32 %rvl)
   ret void
@@ -365,30 +298,18 @@ define void @store_factor2_v2(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, pt
 define void @store_factor3_v2(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, <vscale x 1 x i32> %v2, ptr %ptr, i32 %evl) {
 ; RV32-LABEL: store_factor3_v2:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    slli a2, a1, 1
-; RV32-NEXT:    add a1, a2, a1
-; RV32-NEXT:    lui a2, 699051
-; RV32-NEXT:    addi a2, a2, -1365
-; RV32-NEXT:    mulhu a1, a1, a2
-; RV32-NEXT:    srli a1, a1, 1
 ; RV32-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
 ; RV32-NEXT:    vsseg3e32.v v8, (a0)
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: store_factor3_v2:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    slli a2, a1, 1
-; RV64-NEXT:    add a1, a2, a1
-; RV64-NEXT:    lui a2, 699051
-; RV64-NEXT:    addi a2, a2, -1365
 ; RV64-NEXT:    slli a1, a1, 32
-; RV64-NEXT:    slli a2, a2, 32
-; RV64-NEXT:    mulhu a1, a1, a2
-; RV64-NEXT:    srli a1, a1, 33
+; RV64-NEXT:    srli a1, a1, 32
 ; RV64-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
 ; RV64-NEXT:    vsseg3e32.v v8, (a0)
 ; RV64-NEXT:    ret
-  %rvl = mul i32 %evl, 3
+  %rvl = mul nuw i32 %evl, 3
   %interleaved.vec = call <vscale x 3 x i32> @llvm.vector.interleave3(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, <vscale x 1 x i32> %v2)
   call void @llvm.vp.store(<vscale x 3 x i32> %interleaved.vec, ptr %ptr, <vscale x 3 x i1> splat (i1 true), i32 %rvl)
   ret void
@@ -414,7 +335,7 @@ define void @store_factor4_v2(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, pt
 ; RV64-NEXT:    vmv1r.v v11, v9
 ; RV64-NEXT:    vsseg4e32.v v8, (a0)
 ; RV64-NEXT:    ret
-  %rvl = mul i32 %evl, 8
+  %rvl = mul nuw i32 %evl, 8
   %interleaved.vec = call <vscale x 4 x i32> @llvm.vector.interleave4.nxv4i32(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, <vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1)
   call void @llvm.vp.store.nxv4i32.p0(<vscale x 4 x i32> %interleaved.vec, ptr %ptr, <vscale x 4 x i1> splat (i1 true), i32 %rvl)
   ret void
@@ -423,30 +344,18 @@ define void @store_factor4_v2(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, pt
 define void @store_factor5_v2(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, <vscale x 1 x i32> %v2, <vscale x 1 x i32> %v3, <vscale x 1 x i32> %v4, ptr %ptr, i32 %evl) {
 ; RV32-LABEL: store_factor5_v2:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    slli a2, a1, 2
-; RV32-NEXT:    add a1, a2, a1
-; RV32-NEXT:    lui a2, 838861
-; RV32-NEXT:    addi a2, a2, -819
-; RV32-NEXT:    mulhu a1, a1, a2
-; RV32-NEXT:    srli a1, a1, 2
 ; RV32-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
 ; RV32-NEXT:    vsseg5e32.v v8, (a0)
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: store_factor5_v2:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    slli a2, a1, 2
-; RV64-NEXT:    add a1, a2, a1
-; RV64-NEXT:    lui a2, 838861
-; RV64-NEXT:    addi a2, a2, -819
 ; RV64-NEXT:    slli a1, a1, 32
-; RV64-NEXT:    slli a2, a2, 32
-; RV64-NEXT:    mulhu a1, a1, a2
-; RV64-NEXT:    srli a1, a1, 34
+; RV64-NEXT:    srli a1, a1, 32
 ; RV64-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
 ; RV64-NEXT:    vsseg5e32.v v8, (a0)
 ; RV64-NEXT:    ret
-  %rvl = mul i32 %evl, 5
+  %rvl = mul nuw i32 %evl, 5
   %interleaved.vec = call <vscale x 5 x i32> @llvm.vector.interleave5(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, <vscale x 1 x i32> %v2, <vscale x 1 x i32> %v3, <vscale x 1 x i32> %v4)
   call void @llvm.vp.store(<vscale x 5 x i32> %interleaved.vec, ptr %ptr, <vscale x 5 x i1> splat (i1 true), i32 %rvl)
   ret void
@@ -455,37 +364,18 @@ define void @store_factor5_v2(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, <v
 define void @store_factor7_v2(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, <vscale x 1 x i32> %v2, <vscale x 1 x i32> %v3, <vscale x 1 x i32> %v4, <vscale x 1 x i32> %v5, <vscale x 1 x i32> %v6, ptr %ptr, i32 %evl) {
 ; RV32-LABEL: store_factor7_v2:
 ; RV32:       # %bb.0:
-; RV32-NEXT:    slli a2, a1, 3
-; RV32-NEXT:    sub a2, a2, a1
-; RV32-NEXT:    lui a1, 149797
-; RV32-NEXT:    addi a1, a1, -1755
-; RV32-NEXT:    mulhu a1, a2, a1
-; RV32-NEXT:    sub a2, a2, a1
-; RV32-NEXT:    srli a2, a2, 1
-; RV32-NEXT:    add a1, a2, a1
-; RV32-NEXT:    srli a1, a1, 2
 ; RV32-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
 ; RV32-NEXT:    vsseg7e32.v v8, (a0)
 ; RV32-NEXT:    ret
 ;
 ; RV64-LABEL: store_factor7_v2:
 ; RV64:       # %bb.0:
-; RV64-NEXT:    slli a2, a1, 3
-; RV64-NEXT:    lui a3, 149797
-; RV64-NEXT:    subw a2, a2, a1
-; RV64-NEXT:    addi a1, a3, -1755
-; RV64-NEXT:    slli a3, a2, 32
 ; RV64-NEXT:    slli a1, a1, 32
-; RV64-NEXT:    mulhu a1, a3, a1
 ; RV64-NEXT:    srli a1, a1, 32
-; RV64-NEXT:    subw a2, a2, a1
-; RV64-NEXT:    srliw a2, a2, 1
-; RV64-NEXT:    add a1, a2, a1
-; RV64-NEXT:    srli a1, a1, 2
 ; RV64-NEXT:    vsetvli zero, a1, e32, mf2, ta, ma
 ; RV64-NEXT:    vsseg7e32.v v8, (a0)
 ; RV64-NEXT:    ret
-  %rvl = mul i32 %evl, 7
+  %rvl = mul nuw i32 %evl, 7
   %interleaved.vec = call <vscale x 7 x i32> @llvm.vector.interleave7(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, <vscale x 1 x i32> %v2, <vscale x 1 x i32> %v3, <vscale x 1 x i32> %v4,  <vscale x 1 x i32> %v5, <vscale x 1 x i32> %v6)
   call void @llvm.vp.store(<vscale x 7 x i32> %interleaved.vec, ptr %ptr, <vscale x 7 x i1> splat (i1 true), i32 %rvl)
   ret void
@@ -519,7 +409,7 @@ define void @store_factor8_v2(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, pt
 ; RV64-NEXT:    vmv1r.v v15, v9
 ; RV64-NEXT:    vsseg8e32.v v8, (a0)
 ; RV64-NEXT:    ret
-  %rvl = mul i32 %evl, 8
+  %rvl = mul nuw i32 %evl, 8
   %interleaved.vec = call <vscale x 8 x i32> @llvm.vector.interleave8.nxv8i32(<vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, <vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, <vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1, <vscale x 1 x i32> %v0, <vscale x 1 x i32> %v1)
   call void @llvm.vp.store.nxv8i32.p0(<vscale x 8 x i32> %interleaved.vec, ptr %ptr, <vscale x 8 x i1> splat (i1 true), i32 %rvl)
   ret void
@@ -541,7 +431,7 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>} @masked_load_factor2_v2(<vscale
 ; RV64-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
 ; RV64-NEXT:    vlseg2e32.v v8, (a0), v0.t
 ; RV64-NEXT:    ret
-  %rvl = mul i32 %evl, 2
+  %rvl = mul nuw i32 %evl, 2
   %interleaved.mask = tail call <vscale x 4 x i1> @llvm.vector.interleave2.nxv4i1(<vscale x 2 x i1> %mask, <vscale x 2 x i1> %mask)
   %wide.masked.load = tail call <vscale x 4 x i32> @llvm.vp.load.nxv4i32.p0(ptr %ptr, <vscale x 4 x i1> %interleaved.mask, i32 %rvl)
   %deinterleaved.results = tail call { <vscale x 2 x i32>, <vscale x 2 x i32> } @llvm.vector.deinterleave2.nxv16i32(<vscale x 4 x i32> %wide.masked.load)
@@ -568,7 +458,7 @@ define {<vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2 x i32>, <vscale x 2
 ; RV64-NEXT:    vsetvli zero, a1, e32, m1, ta, ma
 ; RV64-NEXT:    vlseg4e32.v v8, (a0), v0.t
 ; RV64-NEXT:    ret
-  %rvl = mul i32 %evl, 4
+  %rvl = mul nuw i32 %evl, 4
   %interleaved.mask = call <vscale x 8 x i...
[truncated]

Copy link
Member

@mshockwave mshockwave left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

Copy link
Collaborator

@topperc topperc left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

LGTM

@preames preames merged commit b361885 into llvm:main Jul 17, 2025
9 checks passed
@preames preames deleted the wip-riscv-interleave-exact branch July 17, 2025 02:11
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Projects

None yet

Development

Successfully merging this pull request may close these issues.

5 participants