-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[VPlan] Preserve nusw in createInBoundsPtrAdd #151549
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-llvm-transforms Author: Ramkumar Ramachandra (artagnon) ChangesPatch is 23.68 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/151549.diff 3 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index ea0fa0668ef6b..71d26eae37a1f 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -250,19 +250,13 @@ class VPBuilder {
}
VPInstruction *createPtrAdd(VPValue *Ptr, VPValue *Offset,
+ GEPNoWrapFlags Flags = GEPNoWrapFlags::none(),
DebugLoc DL = DebugLoc::getUnknown(),
const Twine &Name = "") {
return tryInsertInstruction(
new VPInstruction(VPInstruction::PtrAdd, {Ptr, Offset},
GEPNoWrapFlags::none(), DL, Name));
}
- VPInstruction *createInBoundsPtrAdd(VPValue *Ptr, VPValue *Offset,
- DebugLoc DL = DebugLoc::getUnknown(),
- const Twine &Name = "") {
- return tryInsertInstruction(
- new VPInstruction(VPInstruction::PtrAdd, {Ptr, Offset},
- GEPNoWrapFlags::inBounds(), DL, Name));
- }
VPPhi *createScalarPhi(ArrayRef<VPValue *> IncomingValues, DebugLoc DL,
const Twine &Name = "") {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index a1d12a3a01e5e..a94eaeefa16c6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -670,6 +670,7 @@ static void legalizeAndOptimizeInductions(VPlan &Plan) {
nullptr, StartV, StepV, PtrIV->getDebugLoc(), Builder);
VPValue *PtrAdd = Builder.createPtrAdd(PtrIV->getStartValue(), Steps,
+ GEPNoWrapFlags::none(),
PtrIV->getDebugLoc(), "next.gep");
PtrIV->replaceAllUsesWith(PtrAdd);
@@ -848,9 +849,9 @@ optimizeLatchExitInductionUser(VPlan &Plan, VPTypeAnalysis &TypeInfo,
if (ScalarTy->isPointerTy()) {
auto *Zero = Plan.getOrAddLiveIn(
ConstantInt::get(Step->getLiveInIRValue()->getType(), 0));
- return B.createPtrAdd(EndValue,
- B.createNaryOp(Instruction::Sub, {Zero, Step}), {},
- "ind.escape");
+ return B.createPtrAdd(
+ EndValue, B.createNaryOp(Instruction::Sub, {Zero, Step}),
+ GEPNoWrapFlags::none(), DebugLoc::getCompilerGenerated(), "ind.escape");
}
if (ScalarTy->isFloatingPointTy()) {
const auto &ID = WideIV->getInductionDescriptor();
@@ -2557,10 +2558,10 @@ void VPlanTransforms::createInterleaveGroups(
auto *InsertPos =
cast<VPWidenMemoryRecipe>(RecipeBuilder.getRecipe(IRInsertPos));
- bool InBounds = false;
+ GEPNoWrapFlags NW = GEPNoWrapFlags::none();
if (auto *Gep = dyn_cast<GetElementPtrInst>(
getLoadStorePointerOperand(IRInsertPos)->stripPointerCasts()))
- InBounds = Gep->isInBounds();
+ NW = Gep->getNoWrapFlags();
// Get or create the start address for the interleave group.
auto *Start =
@@ -2584,8 +2585,7 @@ void VPlanTransforms::createInterleaveGroups(
VPValue *OffsetVPV =
Plan.getOrAddLiveIn(ConstantInt::get(Plan.getContext(), -Offset));
VPBuilder B(InsertPos);
- Addr = InBounds ? B.createInBoundsPtrAdd(InsertPos->getAddr(), OffsetVPV)
- : B.createPtrAdd(InsertPos->getAddr(), OffsetVPV);
+ Addr = B.createPtrAdd(InsertPos->getAddr(), OffsetVPV, NW);
}
// If the group is reverse, adjust the index to refer to the last vector
// lane instead of the first. We adjust the index from the first vector
@@ -2594,9 +2594,7 @@ void VPlanTransforms::createInterleaveGroups(
if (IG->isReverse()) {
auto *ReversePtr = new VPVectorEndPointerRecipe(
Addr, &Plan.getVF(), getLoadStoreType(IRInsertPos),
- -(int64_t)IG->getFactor(),
- InBounds ? GEPNoWrapFlags::inBounds() : GEPNoWrapFlags::none(),
- InsertPos->getDebugLoc());
+ -(int64_t)IG->getFactor(), NW, InsertPos->getDebugLoc());
ReversePtr->insertBefore(InsertPos);
Addr = ReversePtr;
}
diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll
index add58758788f9..953d106e3d390 100644
--- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll
@@ -398,6 +398,69 @@ for.body: ; preds = %for.body, %entry
br i1 %cmp, label %for.body, label %for.cond.cleanup
}
+; Variant of the above with nusw flag on the GEP.
+define void @nowrap_flag_preservation(ptr noalias %A, ptr %B) {
+; CHECK-LABEL: @nowrap_flag_preservation(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 1023, i32 1022, i32 1021, i32 1020>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr nusw [[STRUCT_ST2:%.*]], ptr [[A:%.*]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nusw i8, ptr [[TMP0]], i64 -24
+; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP1]], align 4
+; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: [[REVERSE2:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC1]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[REVERSE]], [[VEC_IND]]
+; CHECK-NEXT: [[TMP3:%.*]] = sub nsw <4 x i32> [[REVERSE2]], [[VEC_IND]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr nusw [[STRUCT_ST2]], ptr [[B:%.*]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr nusw i8, ptr [[TMP4]], i64 -24
+; CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: [[REVERSE4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i32> [[REVERSE3]], <4 x i32> [[REVERSE4]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+; CHECK-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP5]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 -4)
+; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.cond.cleanup:
+; CHECK-NEXT: ret void
+; CHECK: for.body:
+; CHECK-NEXT: br i1 poison, label [[FOR_BODY]], label [[FOR_COND_CLEANUP]], !llvm.loop [[LOOP13:![0-9]+]]
+;
+entry:
+ br label %for.body
+
+for.cond.cleanup: ; preds = %for.body
+ ret void
+
+for.body: ; preds = %for.body, %entry
+ %iv = phi i64 [ 1023, %entry ], [ %iv.next, %for.body ]
+ %x = getelementptr nusw %struct.ST2, ptr %A, i64 %iv, i32 0
+ %tmp = load i32, ptr %x, align 4
+ %tmp1 = trunc i64 %iv to i32
+ %add = add nsw i32 %tmp, %tmp1
+ %y = getelementptr nusw %struct.ST2, ptr %A, i64 %iv, i32 1
+ %tmp2 = load i32, ptr %y, align 4
+ %sub = sub nsw i32 %tmp2, %tmp1
+ %x5 = getelementptr nusw %struct.ST2, ptr %B, i64 %iv, i32 0
+ store i32 %add, ptr %x5, align 4
+ %y8 = getelementptr nusw %struct.ST2, ptr %B, i64 %iv, i32 1
+ store i32 %sub, ptr %y8, align 4
+ %iv.next = add nsw i64 %iv, -1
+ %cmp = icmp sgt i64 %iv, 0
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+}
+
; Check vectorization on an interleaved load group of factor 2 with 1 gap
; (missing the load of odd elements). Because the vectorized loop would
; speculatively access memory out-of-bounds, we must execute at least one
@@ -427,7 +490,7 @@ define void @even_load_static_tc(ptr noalias nocapture readonly %A, ptr noalias
; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[TMP3]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 508
-; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[SCALAR_PH]]
; CHECK: scalar.ph:
@@ -444,7 +507,7 @@ define void @even_load_static_tc(ptr noalias nocapture readonly %A, ptr noalias
; CHECK-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX2]], align 4
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 2
; CHECK-NEXT: [[CMP:%.*]] = icmp samesign ult i64 [[INDVARS_IV]], 1022
-; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP15:![0-9]+]]
;
entry:
br label %for.body
@@ -503,7 +566,7 @@ define void @even_load_dynamic_tc(ptr noalias nocapture readonly %A, ptr noalias
; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[SCALAR_PH]]
; CHECK: scalar.ph:
@@ -521,7 +584,7 @@ define void @even_load_dynamic_tc(ptr noalias nocapture readonly %A, ptr noalias
; CHECK-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX2]], align 4
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 2
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[INDVARS_IV_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP15:![0-9]+]]
+; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP17:![0-9]+]]
;
entry:
br label %for.body
@@ -614,13 +677,13 @@ define void @load_gap_reverse(ptr noalias nocapture %P1, ptr noalias nocapture %
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 -4)
; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
-; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[FOR_EXIT:%.*]]
; CHECK: scalar.ph:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
-; CHECK-NEXT: br i1 poison, label [[FOR_BODY]], label [[FOR_EXIT]], !llvm.loop [[LOOP17:![0-9]+]]
+; CHECK-NEXT: br i1 poison, label [[FOR_BODY]], label [[FOR_EXIT]], !llvm.loop [[LOOP19:![0-9]+]]
; CHECK: for.exit:
; CHECK-NEXT: ret void
;
@@ -676,7 +739,7 @@ define void @mixed_load2_store2(ptr noalias nocapture readonly %A, ptr noalias n
; CHECK-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP4]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512
-; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]]
; CHECK: scalar.ph:
@@ -684,7 +747,7 @@ define void @mixed_load2_store2(ptr noalias nocapture readonly %A, ptr noalias n
; CHECK: for.cond.cleanup:
; CHECK-NEXT: ret void
; CHECK: for.body:
-; CHECK-NEXT: br i1 poison, label [[FOR_BODY]], label [[FOR_COND_CLEANUP]], !llvm.loop [[LOOP19:![0-9]+]]
+; CHECK-NEXT: br i1 poison, label [[FOR_BODY]], label [[FOR_COND_CLEANUP]], !llvm.loop [[LOOP21:![0-9]+]]
;
entry:
br label %for.body
@@ -748,7 +811,7 @@ define void @mixed_load3_store3(ptr nocapture %A) {
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
-; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]]
; CHECK: scalar.ph:
@@ -756,7 +819,7 @@ define void @mixed_load3_store3(ptr nocapture %A) {
; CHECK: for.cond.cleanup:
; CHECK-NEXT: ret void
; CHECK: for.body:
-; CHECK-NEXT: br i1 poison, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
+; CHECK-NEXT: br i1 poison, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
;
entry:
br label %for.body
@@ -831,7 +894,7 @@ define void @int_float_struct(ptr nocapture readonly %A) #0 {
; CHECK-NEXT: [[TMP3]] = fadd fast <4 x float> [[VEC_PHI]], [[TMP1]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
-; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP5:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP3]])
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP2]])
@@ -845,7 +908,7 @@ define void @int_float_struct(ptr nocapture readonly %A) #0 {
; CHECK-NEXT: store float [[ADD3_LCSSA]], ptr @SB, align 4
; CHECK-NEXT: ret void
; CHECK: for.body:
-; CHECK-NEXT: br i1 poison, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
+; CHECK-NEXT: br i1 poison, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
;
entry:
br label %for.body
@@ -928,7 +991,7 @@ define void @PR27626_0(ptr %p, i32 %z, i64 %n) {
; CHECK-NEXT: store i32 [[TMP16]], ptr [[TMP12]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[SCALAR_PH]]
; CHECK: scalar.ph:
@@ -942,7 +1005,7 @@ define void @PR27626_0(ptr %p, i32 %z, i64 %n) {
; CHECK-NEXT: store i32 [[Z]], ptr [[P_I_Y]], align 4
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP25:![0-9]+]]
+; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP27:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
@@ -1012,7 +1075,7 @@ define i32 @PR27626_1(ptr %p, i64 %n) {
; CHECK-NEXT: [[TMP14]] = add <4 x i32> [[STRIDED_VEC2]], [[VEC_PHI]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP16:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP14]])
; CHECK-NEXT: br label [[SCALAR_PH]]
@@ -1030,7 +1093,7 @@ define i32 @PR27626_1(ptr %p, i64 %n) {
; CHECK-NEXT: [[TMP18]] = add nsw i32 [[TMP17]], [[S]]
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP27:![0-9]+]]
+; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP29:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: ret i32 [[TMP18]]
;
@@ -1106,7 +1169,7 @@ define void @PR27626_2(ptr %p, i64 %n, i32 %z) {
; CHECK-NEXT: store i32 [[TMP17]], ptr [[TMP13]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[SCALAR_PH]]
; CHECK: scalar.ph:
@@ -1122,7 +1185,7 @@ define void @PR27626_2(ptr %p, i64 %n, i32 %z) {
; CHECK-NEXT: store i32 [[TMP19]], ptr [[P_I_Y]], align 4
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP29:![0-9]+]]
+; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP31:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
@@ -1198,7 +1261,7 @@ define i32 @PR27626_3(ptr %p, i64 %n, i32 %z) {
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP17]])
; CHECK-NEXT: br label [[SCALAR_PH]]
@@ -1219,7 +1282,7 @@ define i32 @PR27626_3(ptr %p, i64 %n, i32 %z) {
; CHECK-NEXT: [[TMP22]] = add nsw i32 [[TMP21]], [[S]]
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP31:![0-9]+]]
+; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP33:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: ret i32 [[TMP22]]
;
@@ -1294,7 +1357,7 @@ define void @PR27626_4(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) {
; CHECK-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP14]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP13]], label [[MI...
[truncated]
|
|
@llvm/pr-subscribers-vectorizers Author: Ramkumar Ramachandra (artagnon) ChangesPatch is 23.68 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/151549.diff 3 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
index ea0fa0668ef6b..71d26eae37a1f 100644
--- a/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
+++ b/llvm/lib/Transforms/Vectorize/LoopVectorizationPlanner.h
@@ -250,19 +250,13 @@ class VPBuilder {
}
VPInstruction *createPtrAdd(VPValue *Ptr, VPValue *Offset,
+ GEPNoWrapFlags Flags = GEPNoWrapFlags::none(),
DebugLoc DL = DebugLoc::getUnknown(),
const Twine &Name = "") {
return tryInsertInstruction(
new VPInstruction(VPInstruction::PtrAdd, {Ptr, Offset},
GEPNoWrapFlags::none(), DL, Name));
}
- VPInstruction *createInBoundsPtrAdd(VPValue *Ptr, VPValue *Offset,
- DebugLoc DL = DebugLoc::getUnknown(),
- const Twine &Name = "") {
- return tryInsertInstruction(
- new VPInstruction(VPInstruction::PtrAdd, {Ptr, Offset},
- GEPNoWrapFlags::inBounds(), DL, Name));
- }
VPPhi *createScalarPhi(ArrayRef<VPValue *> IncomingValues, DebugLoc DL,
const Twine &Name = "") {
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index a1d12a3a01e5e..a94eaeefa16c6 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -670,6 +670,7 @@ static void legalizeAndOptimizeInductions(VPlan &Plan) {
nullptr, StartV, StepV, PtrIV->getDebugLoc(), Builder);
VPValue *PtrAdd = Builder.createPtrAdd(PtrIV->getStartValue(), Steps,
+ GEPNoWrapFlags::none(),
PtrIV->getDebugLoc(), "next.gep");
PtrIV->replaceAllUsesWith(PtrAdd);
@@ -848,9 +849,9 @@ optimizeLatchExitInductionUser(VPlan &Plan, VPTypeAnalysis &TypeInfo,
if (ScalarTy->isPointerTy()) {
auto *Zero = Plan.getOrAddLiveIn(
ConstantInt::get(Step->getLiveInIRValue()->getType(), 0));
- return B.createPtrAdd(EndValue,
- B.createNaryOp(Instruction::Sub, {Zero, Step}), {},
- "ind.escape");
+ return B.createPtrAdd(
+ EndValue, B.createNaryOp(Instruction::Sub, {Zero, Step}),
+ GEPNoWrapFlags::none(), DebugLoc::getCompilerGenerated(), "ind.escape");
}
if (ScalarTy->isFloatingPointTy()) {
const auto &ID = WideIV->getInductionDescriptor();
@@ -2557,10 +2558,10 @@ void VPlanTransforms::createInterleaveGroups(
auto *InsertPos =
cast<VPWidenMemoryRecipe>(RecipeBuilder.getRecipe(IRInsertPos));
- bool InBounds = false;
+ GEPNoWrapFlags NW = GEPNoWrapFlags::none();
if (auto *Gep = dyn_cast<GetElementPtrInst>(
getLoadStorePointerOperand(IRInsertPos)->stripPointerCasts()))
- InBounds = Gep->isInBounds();
+ NW = Gep->getNoWrapFlags();
// Get or create the start address for the interleave group.
auto *Start =
@@ -2584,8 +2585,7 @@ void VPlanTransforms::createInterleaveGroups(
VPValue *OffsetVPV =
Plan.getOrAddLiveIn(ConstantInt::get(Plan.getContext(), -Offset));
VPBuilder B(InsertPos);
- Addr = InBounds ? B.createInBoundsPtrAdd(InsertPos->getAddr(), OffsetVPV)
- : B.createPtrAdd(InsertPos->getAddr(), OffsetVPV);
+ Addr = B.createPtrAdd(InsertPos->getAddr(), OffsetVPV, NW);
}
// If the group is reverse, adjust the index to refer to the last vector
// lane instead of the first. We adjust the index from the first vector
@@ -2594,9 +2594,7 @@ void VPlanTransforms::createInterleaveGroups(
if (IG->isReverse()) {
auto *ReversePtr = new VPVectorEndPointerRecipe(
Addr, &Plan.getVF(), getLoadStoreType(IRInsertPos),
- -(int64_t)IG->getFactor(),
- InBounds ? GEPNoWrapFlags::inBounds() : GEPNoWrapFlags::none(),
- InsertPos->getDebugLoc());
+ -(int64_t)IG->getFactor(), NW, InsertPos->getDebugLoc());
ReversePtr->insertBefore(InsertPos);
Addr = ReversePtr;
}
diff --git a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll
index add58758788f9..953d106e3d390 100644
--- a/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll
+++ b/llvm/test/Transforms/LoopVectorize/interleaved-accesses.ll
@@ -398,6 +398,69 @@ for.body: ; preds = %for.body, %entry
br i1 %cmp, label %for.body, label %for.cond.cleanup
}
+; Variant of the above with nusw flag on the GEP.
+define void @nowrap_flag_preservation(ptr noalias %A, ptr %B) {
+; CHECK-LABEL: @nowrap_flag_preservation(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
+; CHECK: vector.ph:
+; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 1023, i32 1022, i32 1021, i32 1020>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
+; CHECK-NEXT: [[OFFSET_IDX:%.*]] = sub i64 1023, [[INDEX]]
+; CHECK-NEXT: [[TMP0:%.*]] = getelementptr nusw [[STRUCT_ST2:%.*]], ptr [[A:%.*]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[TMP1:%.*]] = getelementptr nusw i8, ptr [[TMP0]], i64 -24
+; CHECK-NEXT: [[WIDE_VEC:%.*]] = load <8 x i32>, ptr [[TMP1]], align 4
+; CHECK-NEXT: [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-NEXT: [[REVERSE:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: [[STRIDED_VEC1:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 1, i32 3, i32 5, i32 7>
+; CHECK-NEXT: [[REVERSE2:%.*]] = shufflevector <4 x i32> [[STRIDED_VEC1]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[REVERSE]], [[VEC_IND]]
+; CHECK-NEXT: [[TMP3:%.*]] = sub nsw <4 x i32> [[REVERSE2]], [[VEC_IND]]
+; CHECK-NEXT: [[TMP4:%.*]] = getelementptr nusw [[STRUCT_ST2]], ptr [[B:%.*]], i64 [[OFFSET_IDX]]
+; CHECK-NEXT: [[TMP5:%.*]] = getelementptr nusw i8, ptr [[TMP4]], i64 -24
+; CHECK-NEXT: [[REVERSE3:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: [[REVERSE4:%.*]] = shufflevector <4 x i32> [[TMP3]], <4 x i32> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: [[INTERLEAVED_VEC:%.*]] = shufflevector <4 x i32> [[REVERSE3]], <4 x i32> [[REVERSE4]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+; CHECK-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP5]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
+; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 -4)
+; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
+; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK: middle.block:
+; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]]
+; CHECK: scalar.ph:
+; CHECK-NEXT: br label [[FOR_BODY:%.*]]
+; CHECK: for.cond.cleanup:
+; CHECK-NEXT: ret void
+; CHECK: for.body:
+; CHECK-NEXT: br i1 poison, label [[FOR_BODY]], label [[FOR_COND_CLEANUP]], !llvm.loop [[LOOP13:![0-9]+]]
+;
+entry:
+ br label %for.body
+
+for.cond.cleanup: ; preds = %for.body
+ ret void
+
+for.body: ; preds = %for.body, %entry
+ %iv = phi i64 [ 1023, %entry ], [ %iv.next, %for.body ]
+ %x = getelementptr nusw %struct.ST2, ptr %A, i64 %iv, i32 0
+ %tmp = load i32, ptr %x, align 4
+ %tmp1 = trunc i64 %iv to i32
+ %add = add nsw i32 %tmp, %tmp1
+ %y = getelementptr nusw %struct.ST2, ptr %A, i64 %iv, i32 1
+ %tmp2 = load i32, ptr %y, align 4
+ %sub = sub nsw i32 %tmp2, %tmp1
+ %x5 = getelementptr nusw %struct.ST2, ptr %B, i64 %iv, i32 0
+ store i32 %add, ptr %x5, align 4
+ %y8 = getelementptr nusw %struct.ST2, ptr %B, i64 %iv, i32 1
+ store i32 %sub, ptr %y8, align 4
+ %iv.next = add nsw i64 %iv, -1
+ %cmp = icmp sgt i64 %iv, 0
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+}
+
; Check vectorization on an interleaved load group of factor 2 with 1 gap
; (missing the load of odd elements). Because the vectorized loop would
; speculatively access memory out-of-bounds, we must execute at least one
@@ -427,7 +490,7 @@ define void @even_load_static_tc(ptr noalias nocapture readonly %A, ptr noalias
; CHECK-NEXT: store <4 x i32> [[TMP1]], ptr [[TMP3]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 508
-; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[SCALAR_PH]]
; CHECK: scalar.ph:
@@ -444,7 +507,7 @@ define void @even_load_static_tc(ptr noalias nocapture readonly %A, ptr noalias
; CHECK-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX2]], align 4
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 2
; CHECK-NEXT: [[CMP:%.*]] = icmp samesign ult i64 [[INDVARS_IV]], 1022
-; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP13:![0-9]+]]
+; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP15:![0-9]+]]
;
entry:
br label %for.body
@@ -503,7 +566,7 @@ define void @even_load_dynamic_tc(ptr noalias nocapture readonly %A, ptr noalias
; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr [[TMP8]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[SCALAR_PH]]
; CHECK: scalar.ph:
@@ -521,7 +584,7 @@ define void @even_load_dynamic_tc(ptr noalias nocapture readonly %A, ptr noalias
; CHECK-NEXT: store i32 [[MUL]], ptr [[ARRAYIDX2]], align 4
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 2
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[INDVARS_IV_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP15:![0-9]+]]
+; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]], !llvm.loop [[LOOP17:![0-9]+]]
;
entry:
br label %for.body
@@ -614,13 +677,13 @@ define void @load_gap_reverse(ptr noalias nocapture %P1, ptr noalias nocapture %
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 -4)
; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
-; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[FOR_EXIT:%.*]]
; CHECK: scalar.ph:
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
; CHECK: for.body:
-; CHECK-NEXT: br i1 poison, label [[FOR_BODY]], label [[FOR_EXIT]], !llvm.loop [[LOOP17:![0-9]+]]
+; CHECK-NEXT: br i1 poison, label [[FOR_BODY]], label [[FOR_EXIT]], !llvm.loop [[LOOP19:![0-9]+]]
; CHECK: for.exit:
; CHECK-NEXT: ret void
;
@@ -676,7 +739,7 @@ define void @mixed_load2_store2(ptr noalias nocapture readonly %A, ptr noalias n
; CHECK-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP4]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 512
-; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]]
; CHECK: scalar.ph:
@@ -684,7 +747,7 @@ define void @mixed_load2_store2(ptr noalias nocapture readonly %A, ptr noalias n
; CHECK: for.cond.cleanup:
; CHECK-NEXT: ret void
; CHECK: for.body:
-; CHECK-NEXT: br i1 poison, label [[FOR_BODY]], label [[FOR_COND_CLEANUP]], !llvm.loop [[LOOP19:![0-9]+]]
+; CHECK-NEXT: br i1 poison, label [[FOR_BODY]], label [[FOR_COND_CLEANUP]], !llvm.loop [[LOOP21:![0-9]+]]
;
entry:
br label %for.body
@@ -748,7 +811,7 @@ define void @mixed_load3_store3(ptr nocapture %A) {
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
-; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[FOR_COND_CLEANUP:%.*]]
; CHECK: scalar.ph:
@@ -756,7 +819,7 @@ define void @mixed_load3_store3(ptr nocapture %A) {
; CHECK: for.cond.cleanup:
; CHECK-NEXT: ret void
; CHECK: for.body:
-; CHECK-NEXT: br i1 poison, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
+; CHECK-NEXT: br i1 poison, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
;
entry:
br label %for.body
@@ -831,7 +894,7 @@ define void @int_float_struct(ptr nocapture readonly %A) #0 {
; CHECK-NEXT: [[TMP3]] = fadd fast <4 x float> [[VEC_PHI]], [[TMP1]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024
-; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP5:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float 0.000000e+00, <4 x float> [[TMP3]])
; CHECK-NEXT: [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP2]])
@@ -845,7 +908,7 @@ define void @int_float_struct(ptr nocapture readonly %A) #0 {
; CHECK-NEXT: store float [[ADD3_LCSSA]], ptr @SB, align 4
; CHECK-NEXT: ret void
; CHECK: for.body:
-; CHECK-NEXT: br i1 poison, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
+; CHECK-NEXT: br i1 poison, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
;
entry:
br label %for.body
@@ -928,7 +991,7 @@ define void @PR27626_0(ptr %p, i32 %z, i64 %n) {
; CHECK-NEXT: store i32 [[TMP16]], ptr [[TMP12]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[SCALAR_PH]]
; CHECK: scalar.ph:
@@ -942,7 +1005,7 @@ define void @PR27626_0(ptr %p, i32 %z, i64 %n) {
; CHECK-NEXT: store i32 [[Z]], ptr [[P_I_Y]], align 4
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP25:![0-9]+]]
+; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP27:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
@@ -1012,7 +1075,7 @@ define i32 @PR27626_1(ptr %p, i64 %n) {
; CHECK-NEXT: [[TMP14]] = add <4 x i32> [[STRIDED_VEC2]], [[VEC_PHI]]
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP16:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP14]])
; CHECK-NEXT: br label [[SCALAR_PH]]
@@ -1030,7 +1093,7 @@ define i32 @PR27626_1(ptr %p, i64 %n) {
; CHECK-NEXT: [[TMP18]] = add nsw i32 [[TMP17]], [[S]]
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP27:![0-9]+]]
+; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP29:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: ret i32 [[TMP18]]
;
@@ -1106,7 +1169,7 @@ define void @PR27626_2(ptr %p, i64 %n, i32 %z) {
; CHECK-NEXT: store i32 [[TMP17]], ptr [[TMP13]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: br label [[SCALAR_PH]]
; CHECK: scalar.ph:
@@ -1122,7 +1185,7 @@ define void @PR27626_2(ptr %p, i64 %n, i32 %z) {
; CHECK-NEXT: store i32 [[TMP19]], ptr [[P_I_Y]], align 4
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP29:![0-9]+]]
+; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP31:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: ret void
;
@@ -1198,7 +1261,7 @@ define i32 @PR27626_3(ptr %p, i64 %n, i32 %z) {
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], splat (i64 4)
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
+; CHECK-NEXT: br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
; CHECK: middle.block:
; CHECK-NEXT: [[TMP19:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP17]])
; CHECK-NEXT: br label [[SCALAR_PH]]
@@ -1219,7 +1282,7 @@ define i32 @PR27626_3(ptr %p, i64 %n, i32 %z) {
; CHECK-NEXT: [[TMP22]] = add nsw i32 [[TMP21]], [[S]]
; CHECK-NEXT: [[I_NEXT]] = add nuw nsw i64 [[I]], 1
; CHECK-NEXT: [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
-; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP31:![0-9]+]]
+; CHECK-NEXT: br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP33:![0-9]+]]
; CHECK: for.end:
; CHECK-NEXT: ret i32 [[TMP22]]
;
@@ -1294,7 +1357,7 @@ define void @PR27626_4(ptr %a, i32 %x, i32 %y, i32 %z, i64 %n) {
; CHECK-NEXT: store <8 x i32> [[INTERLEAVED_VEC]], ptr [[TMP14]], align 4
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; CHECK-NEXT: br i1 [[TMP13]], label [[MI...
[truncated]
|
1c85be3 to
cfc11a5
Compare
cfc11a5 to
3f27213
Compare
3f27213 to
d69f681
Compare
|
Gentle ping. |
fhahn
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks
Rename createInBoundsPtrAdd to createNoWrapPtrAdd, and preserve nusw as well as inbounds at the callsite.