llvm
diff --git a/‎llvm/lib/Transforms/Vectorize/LoopVectorize.cpp‎
Lines changed: 12 additions & 6 deletions b/‎llvm/lib/Transforms/Vectorize/LoopVectorize.cpp‎
Lines changed: 12 additions & 6 deletions
diff --git a/‎llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll‎
Lines changed: 12 additions & 12 deletions b/‎llvm/test/Transforms/LoopVectorize/AArch64/sve-vector-reverse.ll‎
Lines changed: 12 additions & 12 deletions
diff --git a/‎llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll‎
Lines changed: 2 additions & 2 deletions b/‎llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse-mask4.ll‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse.ll‎
Lines changed: 8 additions & 8 deletions b/‎llvm/test/Transforms/LoopVectorize/AArch64/vector-reverse.ll‎
Lines changed: 8 additions & 8 deletions
diff --git a/‎llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll‎
Lines changed: 2 additions & 2 deletions b/‎llvm/test/Transforms/LoopVectorize/ARM/mve-gather-scatter-tailpred.ll‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll‎
Lines changed: 2 additions & 2 deletions b/‎llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll‎
Lines changed: 2 additions & 2 deletions
@@ -8343,17 +8343,23 @@ VPRecipeBuilder::tryToWidenMemory(Instruction *I, ArrayRef<VPValue *> Operands,
     auto *GEP = dyn_cast<GetElementPtrInst>(
         Ptr->getUnderlyingValue()->stripPointerCasts());
     VPSingleDefRecipe *VectorPtr;
-    if (Reverse)
-      // N.B. we deliberately do pass getGEPNoWrapFlags here, because this
-      // transform can invalidate `inbounds`.
+    if (Reverse) {
+      // When folding the tail, we may compute an address that we don't in the
+      // original scalar loop and it may not be inbounds. Drop Inbounds in that
+      // case.
+      GEPNoWrapFlags Flags =
+          (CM.foldTailByMasking() || !GEP || !GEP->isInBounds())
+              ? GEPNoWrapFlags::none()
+              : GEPNoWrapFlags::inBounds();
       VectorPtr = new VPReverseVectorPointerRecipe(
-          Ptr, &Plan.getVF(), getLoadStoreType(I), GEPNoWrapFlags::none(),
-          I->getDebugLoc());
-    else
+          Ptr, &Plan.getVF(), getLoadStoreType(I),
+          Flags, I->getDebugLoc());
+    } else {
       VectorPtr = new VPVectorPointerRecipe(Ptr, getLoadStoreType(I),
                                             GEP ? GEP->getNoWrapFlags()
                                                 : GEPNoWrapFlags::none(),
                                             I->getDebugLoc());
+    }
     Builder.getInsertBlock()->appendRecipe(VectorPtr);
     Ptr = VectorPtr;
   }
 
@@ -33,22 +33,22 @@ define void @vector_reverse_f64(i64 %N, ptr noalias %a, ptr noalias %b) #0{
 ; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[N]], [[TMP7]]
 ; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds double, ptr [[B:%.*]], i64 [[TMP8]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = sub i64 1, [[TMP5]]
-; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr double, ptr [[TMP9]], i64 [[TMP10]]
+; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds double, ptr [[TMP9]], i64 [[TMP10]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = sub i64 0, [[TMP5]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = sub i64 1, [[TMP5]]
-; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr double, ptr [[TMP9]], i64 [[TMP12]]
-; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr double, ptr [[TMP14]], i64 [[TMP13]]
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds double, ptr [[TMP9]], i64 [[TMP12]]
+; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds double, ptr [[TMP14]], i64 [[TMP13]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 8 x double>, ptr [[TMP11]], align 8
 ; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <vscale x 8 x double>, ptr [[TMP15]], align 8
 ; CHECK-NEXT:    [[TMP16:%.*]] = fadd <vscale x 8 x double> [[WIDE_LOAD]], splat (double 1.000000e+00)
 ; CHECK-NEXT:    [[TMP17:%.*]] = fadd <vscale x 8 x double> [[WIDE_LOAD1]], splat (double 1.000000e+00)
 ; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds double, ptr [[A:%.*]], i64 [[TMP8]]
 ; CHECK-NEXT:    [[TMP19:%.*]] = sub i64 1, [[TMP5]]
-; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr double, ptr [[TMP18]], i64 [[TMP19]]
+; CHECK-NEXT:    [[TMP20:%.*]] = getelementptr inbounds double, ptr [[TMP18]], i64 [[TMP19]]
 ; CHECK-NEXT:    [[TMP21:%.*]] = sub i64 0, [[TMP5]]
 ; CHECK-NEXT:    [[TMP22:%.*]] = sub i64 1, [[TMP5]]
-; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr double, ptr [[TMP18]], i64 [[TMP21]]
-; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr double, ptr [[TMP23]], i64 [[TMP22]]
+; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds double, ptr [[TMP18]], i64 [[TMP21]]
+; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds double, ptr [[TMP23]], i64 [[TMP22]]
 ; CHECK-NEXT:    store <vscale x 8 x double> [[TMP16]], ptr [[TMP20]], align 8
 ; CHECK-NEXT:    store <vscale x 8 x double> [[TMP17]], ptr [[TMP24]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP6]]
@@ -128,22 +128,22 @@ define void @vector_reverse_i64(i64 %N, ptr %a, ptr %b) #0 {
 ; CHECK-NEXT:    [[TMP11:%.*]] = add i64 [[N]], [[TMP10]]
 ; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i64, ptr [[B]], i64 [[TMP11]]
 ; CHECK-NEXT:    [[TMP13:%.*]] = sub i64 1, [[TMP8]]
-; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr i64, ptr [[TMP12]], i64 [[TMP13]]
+; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 [[TMP13]]
 ; CHECK-NEXT:    [[TMP15:%.*]] = sub i64 0, [[TMP8]]
 ; CHECK-NEXT:    [[TMP16:%.*]] = sub i64 1, [[TMP8]]
-; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr i64, ptr [[TMP12]], i64 [[TMP15]]
-; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr i64, ptr [[TMP17]], i64 [[TMP16]]
+; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i64, ptr [[TMP12]], i64 [[TMP15]]
+; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i64, ptr [[TMP17]], i64 [[TMP16]]
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <vscale x 8 x i64>, ptr [[TMP14]], align 8
 ; CHECK-NEXT:    [[WIDE_LOAD3:%.*]] = load <vscale x 8 x i64>, ptr [[TMP18]], align 8
 ; CHECK-NEXT:    [[TMP19:%.*]] = add <vscale x 8 x i64> [[WIDE_LOAD]], splat (i64 1)
 ; CHECK-NEXT:    [[TMP20:%.*]] = add <vscale x 8 x i64> [[WIDE_LOAD3]], splat (i64 1)
 ; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[TMP11]]
 ; CHECK-NEXT:    [[TMP22:%.*]] = sub i64 1, [[TMP8]]
-; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr i64, ptr [[TMP21]], i64 [[TMP22]]
+; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i64 [[TMP22]]
 ; CHECK-NEXT:    [[TMP24:%.*]] = sub i64 0, [[TMP8]]
 ; CHECK-NEXT:    [[TMP25:%.*]] = sub i64 1, [[TMP8]]
-; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr i64, ptr [[TMP21]], i64 [[TMP24]]
-; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr i64, ptr [[TMP26]], i64 [[TMP25]]
+; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i64, ptr [[TMP21]], i64 [[TMP24]]
+; CHECK-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i64, ptr [[TMP26]], i64 [[TMP25]]
 ; CHECK-NEXT:    store <vscale x 8 x i64> [[TMP19]], ptr [[TMP23]], align 8
 ; CHECK-NEXT:    store <vscale x 8 x i64> [[TMP20]], ptr [[TMP27]], align 8
 ; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
 
@@ -34,8 +34,8 @@ define void @vector_reverse_mask_v4i1(ptr noalias %a, ptr noalias %cond, i64 %N)
 ; CHECK-NEXT:    [[TMP0:%.*]] = xor i64 [[INDEX]], -1
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[N]], [[TMP0]]
 ; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds double, ptr [[COND:%.*]], i64 [[TMP1]]
-; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i8, ptr [[TMP2]], i64 -24
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i8, ptr [[TMP2]], i64 -56
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 -24
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i8, ptr [[TMP2]], i64 -56
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x double>, ptr [[TMP3]], align 8
 ; CHECK-NEXT:    [[REVERSE:%.*]] = shufflevector <4 x double> [[WIDE_LOAD]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x double>, ptr [[TMP4]], align 8
 
@@ -11,14 +11,14 @@
 define void @vector_reverse_f64(i64 %N, ptr %a, ptr %b) #0 {
 ; CHECK-LABEL: vector_reverse_f64
 ; CHECK-LABEL: vector.body
-; CHECK: %[[GEP:.*]] = getelementptr double, ptr %{{.*}}, i32 0
-; CHECK-NEXT: %[[GEP1:.*]] = getelementptr double, ptr %[[GEP]], i32 -7
+; CHECK: %[[GEP:.*]] = getelementptr inbounds double, ptr %{{.*}}, i32 0
+; CHECK-NEXT: %[[GEP1:.*]] = getelementptr inbounds double, ptr %[[GEP]], i32 -7
 ; CHECK-NEXT: %[[WIDE:.*]] = load <8 x double>, ptr %[[GEP1]], align 8
 ; CHECK-NEXT: %[[REVERSE:.*]] = shufflevector <8 x double> %[[WIDE]], <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT: %[[FADD:.*]] = fadd <8 x double> %[[REVERSE]]
 ; CHECK-NEXT: %[[GEP2:.*]] = getelementptr inbounds double, ptr {{.*}}, i64 {{.*}}
-; CHECK-NEXT: %[[GEP3:.*]] = getelementptr double, ptr %[[GEP2]], i32 0
-; CHECK-NEXT: %[[GEP4:.*]] = getelementptr double, ptr %[[GEP3]], i32 -7
+; CHECK-NEXT: %[[GEP3:.*]] = getelementptr inbounds double, ptr %[[GEP2]], i32 0
+; CHECK-NEXT: %[[GEP4:.*]] = getelementptr inbounds double, ptr %[[GEP3]], i32 -7
 ; CHECK-NEXT: %[[REVERSE6:.*]] = shufflevector <8 x double> %[[FADD]], <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:  store <8 x double> %[[REVERSE6]], ptr %[[GEP4]], align 8
 
@@ -44,14 +44,14 @@ for.body:                                         ; preds = %entry, %for.body
 define void @vector_reverse_i64(i64 %N, ptr %a, ptr %b) #0 {
 ; CHECK-LABEL: vector_reverse_i64
 ; CHECK-LABEL: vector.body
-; CHECK: %[[GEP:.*]] = getelementptr i64, ptr %{{.*}}, i32 0
-; CHECK-NEXT: %[[GEP1:.*]] = getelementptr i64, ptr %[[GEP]], i32 -7
+; CHECK: %[[GEP:.*]] = getelementptr inbounds i64, ptr %{{.*}}, i32 0
+; CHECK-NEXT: %[[GEP1:.*]] = getelementptr inbounds i64, ptr %[[GEP]], i32 -7
 ; CHECK-NEXT: %[[WIDE:.*]] = load <8 x i64>, ptr %[[GEP1]], align 8
 ; CHECK-NEXT: %[[REVERSE:.*]] = shufflevector <8 x i64> %[[WIDE]], <8 x i64> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT: %[[FADD:.*]] = add <8 x i64> %[[REVERSE]]
 ; CHECK-NEXT: %[[GEP2:.*]] = getelementptr inbounds i64, ptr {{.*}}, i64 {{.*}}
-; CHECK-NEXT: %[[GEP3:.*]] = getelementptr i64, ptr %[[GEP2]], i32 0
-; CHECK-NEXT: %[[GEP4:.*]] = getelementptr i64, ptr %[[GEP3]], i32 -7
+; CHECK-NEXT: %[[GEP3:.*]] = getelementptr inbounds i64, ptr %[[GEP2]], i32 0
+; CHECK-NEXT: %[[GEP4:.*]] = getelementptr inbounds i64, ptr %[[GEP3]], i32 -7
 ; CHECK-NEXT: %[[REVERSE6:.*]] = shufflevector <8 x i64> %[[FADD]], <8 x i64> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:  store <8 x i64> %[[REVERSE6]], ptr %[[GEP4]], align 8
 
 
@@ -80,8 +80,8 @@ define void @test_stride-1_4i32(ptr readonly %data, ptr noalias nocapture %dst,
 ; CHECK-NEXT:    [[TMP1:%.*]] = mul nuw nsw i32 [[TMP0]], -1
 ; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i32 [[TMP1]], 2
 ; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, ptr [[DATA:%.*]], i32 [[TMP2]]
-; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr i32, ptr [[TMP3]], i32 0
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i32, ptr [[TMP4]], i32 -3
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[TMP3]], i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 -3
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP5]], align 4
 ; CHECK-NEXT:    [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    [[TMP6:%.*]] = add nsw <4 x i32> splat (i32 5), [[REVERSE]]
 
@@ -272,8 +272,8 @@ define void @strides_different_direction(ptr noalias nocapture %A, ptr noalias n
 ; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
 ; CHECK-NEXT:    [[TMP3:%.*]] = sub nsw i32 [[N:%.*]], [[TMP0]]
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, ptr [[C:%.*]], i32 [[TMP3]]
-; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr i32, ptr [[TMP4]], i32 0
-; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr i32, ptr [[TMP5]], i32 -3
+; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, ptr [[TMP4]], i32 0
+; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[TMP5]], i32 -3
 ; CHECK-NEXT:    [[WIDE_LOAD1:%.*]] = load <4 x i32>, ptr [[TMP6]], align 4
 ; CHECK-NEXT:    [[REVERSE:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD1]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 ; CHECK-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[REVERSE]], [[WIDE_LOAD]]