-
Notifications
You must be signed in to change notification settings - Fork 15.3k
[VPlan] Handle canonical VPWidenIntOrFpInduction in branch-condition simplification #153539
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-vectorizers @llvm/pr-subscribers-llvm-transforms Author: Shih-Po Hung (arcbbb) ChangesSimplifyBranchConditionForVFAndUF only recognized canonical IVs and a few PHI This patch replaces canonical VPWidenIntOrFpInduction with a StepVector in the vector preheader Patch is 86.67 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/153539.diff 9 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
index 7c79a9b5e3432..7ed9a591900ab 100644
--- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
+++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp
@@ -1499,10 +1499,21 @@ static bool simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF,
// (BranchOnCond true).
auto *Header = cast<VPBasicBlock>(VectorRegion->getEntry());
auto *CanIVTy = Plan.getCanonicalIV()->getScalarType();
- if (all_of(Header->phis(),
- IsaPred<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe,
- VPFirstOrderRecurrencePHIRecipe, VPPhi>)) {
+ if (all_of(Header->phis(), [](VPRecipeBase &Phi) {
+ if (auto *R = dyn_cast<VPWidenIntOrFpInductionRecipe>(&Phi))
+ return R->isCanonical();
+ return isa<VPCanonicalIVPHIRecipe, VPEVLBasedIVPHIRecipe,
+ VPFirstOrderRecurrencePHIRecipe, VPPhi>(&Phi);
+ })) {
for (VPRecipeBase &HeaderR : make_early_inc_range(Header->phis())) {
+ if (auto *R = dyn_cast<VPWidenIntOrFpInductionRecipe>(&HeaderR)) {
+ VPBuilder Builder(Plan.getVectorPreheader());
+ VPValue *StepV = Builder.createNaryOp(VPInstruction::StepVector, {},
+ R->getScalarType());
+ HeaderR.getVPSingleValue()->replaceAllUsesWith(StepV);
+ HeaderR.eraseFromParent();
+ continue;
+ }
auto *Phi = cast<VPPhiAccessors>(&HeaderR);
HeaderR.getVPSingleValue()->replaceAllUsesWith(Phi->getIncomingValue(0));
HeaderR.eraseFromParent();
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
index a2c6a21796e8f..62c1b7b1fcf2f 100644
--- a/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll
@@ -771,85 +771,55 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) {
; DEFAULT: [[VECTOR_PH]]:
; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]]
; DEFAULT: [[VECTOR_BODY]]:
-; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE14:.*]] ]
-; DEFAULT-NEXT: [[VEC_IND:%.*]] = phi <8 x i8> [ <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE14]] ]
-; DEFAULT-NEXT: [[TMP0:%.*]] = trunc i64 [[INDEX]] to i8
-; DEFAULT-NEXT: [[TMP1:%.*]] = icmp ule <8 x i8> [[VEC_IND]], splat (i8 6)
-; DEFAULT-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0
-; DEFAULT-NEXT: br i1 [[TMP2]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; DEFAULT-NEXT: br i1 true, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; DEFAULT: [[PRED_STORE_IF]]:
-; DEFAULT-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
-; DEFAULT-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP3]]
-; DEFAULT-NEXT: [[TMP5:%.*]] = add i8 [[TMP0]], 0
-; DEFAULT-NEXT: store i8 [[TMP5]], ptr [[TMP4]], align 1
+; DEFAULT-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[DST]], i64 0
+; DEFAULT-NEXT: store i8 0, ptr [[TMP0]], align 1
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE]]
; DEFAULT: [[PRED_STORE_CONTINUE]]:
-; DEFAULT-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1
-; DEFAULT-NEXT: br i1 [[TMP6]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
+; DEFAULT-NEXT: br i1 true, label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
; DEFAULT: [[PRED_STORE_IF1]]:
-; DEFAULT-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 1
-; DEFAULT-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP7]]
-; DEFAULT-NEXT: [[TMP9:%.*]] = add i8 [[TMP0]], 1
-; DEFAULT-NEXT: store i8 [[TMP9]], ptr [[TMP8]], align 1
+; DEFAULT-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[DST]], i64 1
+; DEFAULT-NEXT: store i8 1, ptr [[TMP1]], align 1
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE2]]
; DEFAULT: [[PRED_STORE_CONTINUE2]]:
-; DEFAULT-NEXT: [[TMP10:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2
-; DEFAULT-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
+; DEFAULT-NEXT: br i1 true, label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
; DEFAULT: [[PRED_STORE_IF3]]:
-; DEFAULT-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 2
-; DEFAULT-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP11]]
-; DEFAULT-NEXT: [[TMP13:%.*]] = add i8 [[TMP0]], 2
-; DEFAULT-NEXT: store i8 [[TMP13]], ptr [[TMP12]], align 1
+; DEFAULT-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i64 2
+; DEFAULT-NEXT: store i8 2, ptr [[TMP2]], align 1
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE4]]
; DEFAULT: [[PRED_STORE_CONTINUE4]]:
-; DEFAULT-NEXT: [[TMP14:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3
-; DEFAULT-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
+; DEFAULT-NEXT: br i1 true, label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
; DEFAULT: [[PRED_STORE_IF5]]:
-; DEFAULT-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 3
-; DEFAULT-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP15]]
-; DEFAULT-NEXT: [[TMP17:%.*]] = add i8 [[TMP0]], 3
-; DEFAULT-NEXT: store i8 [[TMP17]], ptr [[TMP16]], align 1
+; DEFAULT-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[DST]], i64 3
+; DEFAULT-NEXT: store i8 3, ptr [[TMP3]], align 1
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE6]]
; DEFAULT: [[PRED_STORE_CONTINUE6]]:
-; DEFAULT-NEXT: [[TMP18:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4
-; DEFAULT-NEXT: br i1 [[TMP18]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
+; DEFAULT-NEXT: br i1 true, label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
; DEFAULT: [[PRED_STORE_IF7]]:
-; DEFAULT-NEXT: [[TMP19:%.*]] = add i64 [[INDEX]], 4
-; DEFAULT-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP19]]
-; DEFAULT-NEXT: [[TMP21:%.*]] = add i8 [[TMP0]], 4
-; DEFAULT-NEXT: store i8 [[TMP21]], ptr [[TMP20]], align 1
+; DEFAULT-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[DST]], i64 4
+; DEFAULT-NEXT: store i8 4, ptr [[TMP4]], align 1
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE8]]
; DEFAULT: [[PRED_STORE_CONTINUE8]]:
-; DEFAULT-NEXT: [[TMP22:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5
-; DEFAULT-NEXT: br i1 [[TMP22]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
+; DEFAULT-NEXT: br i1 true, label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
; DEFAULT: [[PRED_STORE_IF9]]:
-; DEFAULT-NEXT: [[TMP23:%.*]] = add i64 [[INDEX]], 5
-; DEFAULT-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP23]]
-; DEFAULT-NEXT: [[TMP25:%.*]] = add i8 [[TMP0]], 5
-; DEFAULT-NEXT: store i8 [[TMP25]], ptr [[TMP24]], align 1
+; DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DST]], i64 5
+; DEFAULT-NEXT: store i8 5, ptr [[TMP5]], align 1
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE10]]
; DEFAULT: [[PRED_STORE_CONTINUE10]]:
-; DEFAULT-NEXT: [[TMP26:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6
-; DEFAULT-NEXT: br i1 [[TMP26]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
+; DEFAULT-NEXT: br i1 true, label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
; DEFAULT: [[PRED_STORE_IF11]]:
-; DEFAULT-NEXT: [[TMP27:%.*]] = add i64 [[INDEX]], 6
-; DEFAULT-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP27]]
-; DEFAULT-NEXT: [[TMP29:%.*]] = add i8 [[TMP0]], 6
-; DEFAULT-NEXT: store i8 [[TMP29]], ptr [[TMP28]], align 1
+; DEFAULT-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[DST]], i64 6
+; DEFAULT-NEXT: store i8 6, ptr [[TMP6]], align 1
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE12]]
; DEFAULT: [[PRED_STORE_CONTINUE12]]:
-; DEFAULT-NEXT: [[TMP30:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7
-; DEFAULT-NEXT: br i1 [[TMP30]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14]]
+; DEFAULT-NEXT: br i1 false, label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
; DEFAULT: [[PRED_STORE_IF13]]:
-; DEFAULT-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 7
-; DEFAULT-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP31]]
-; DEFAULT-NEXT: [[TMP33:%.*]] = add i8 [[TMP0]], 7
-; DEFAULT-NEXT: store i8 [[TMP33]], ptr [[TMP32]], align 1
+; DEFAULT-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[DST]], i64 7
+; DEFAULT-NEXT: store i8 7, ptr [[TMP7]], align 1
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE14]]
; DEFAULT: [[PRED_STORE_CONTINUE14]]:
-; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <8 x i8> [[VEC_IND]], splat (i8 8)
-; DEFAULT-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
+; DEFAULT-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; DEFAULT: [[MIDDLE_BLOCK]]:
; DEFAULT-NEXT: br label %[[EXIT:.*]]
; DEFAULT: [[SCALAR_PH]]:
@@ -861,7 +831,7 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) {
; DEFAULT-NEXT: store i8 [[IV_TRUNC]], ptr [[GEP]], align 1
; DEFAULT-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; DEFAULT-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 7
-; DEFAULT-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP25:![0-9]+]]
+; DEFAULT-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP24:![0-9]+]]
; DEFAULT: [[EXIT]]:
; DEFAULT-NEXT: ret void
;
@@ -872,85 +842,55 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) {
; PRED: [[VECTOR_PH]]:
; PRED-NEXT: br label %[[VECTOR_BODY:.*]]
; PRED: [[VECTOR_BODY]]:
-; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE14:.*]] ]
-; PRED-NEXT: [[VEC_IND:%.*]] = phi <8 x i8> [ <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[PRED_STORE_CONTINUE14]] ]
-; PRED-NEXT: [[TMP0:%.*]] = trunc i64 [[INDEX]] to i8
-; PRED-NEXT: [[TMP1:%.*]] = icmp ule <8 x i8> [[VEC_IND]], splat (i8 6)
-; PRED-NEXT: [[TMP2:%.*]] = extractelement <8 x i1> [[TMP1]], i32 0
-; PRED-NEXT: br i1 [[TMP2]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
+; PRED-NEXT: br i1 true, label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
; PRED: [[PRED_STORE_IF]]:
-; PRED-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
-; PRED-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP3]]
-; PRED-NEXT: [[TMP5:%.*]] = add i8 [[TMP0]], 0
-; PRED-NEXT: store i8 [[TMP5]], ptr [[TMP4]], align 1
+; PRED-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[DST]], i64 0
+; PRED-NEXT: store i8 0, ptr [[TMP0]], align 1
; PRED-NEXT: br label %[[PRED_STORE_CONTINUE]]
; PRED: [[PRED_STORE_CONTINUE]]:
-; PRED-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP1]], i32 1
-; PRED-NEXT: br i1 [[TMP6]], label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
+; PRED-NEXT: br i1 true, label %[[PRED_STORE_IF1:.*]], label %[[PRED_STORE_CONTINUE2:.*]]
; PRED: [[PRED_STORE_IF1]]:
-; PRED-NEXT: [[TMP7:%.*]] = add i64 [[INDEX]], 1
-; PRED-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP7]]
-; PRED-NEXT: [[TMP9:%.*]] = add i8 [[TMP0]], 1
-; PRED-NEXT: store i8 [[TMP9]], ptr [[TMP8]], align 1
+; PRED-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[DST]], i64 1
+; PRED-NEXT: store i8 1, ptr [[TMP1]], align 1
; PRED-NEXT: br label %[[PRED_STORE_CONTINUE2]]
; PRED: [[PRED_STORE_CONTINUE2]]:
-; PRED-NEXT: [[TMP10:%.*]] = extractelement <8 x i1> [[TMP1]], i32 2
-; PRED-NEXT: br i1 [[TMP10]], label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
+; PRED-NEXT: br i1 true, label %[[PRED_STORE_IF3:.*]], label %[[PRED_STORE_CONTINUE4:.*]]
; PRED: [[PRED_STORE_IF3]]:
-; PRED-NEXT: [[TMP11:%.*]] = add i64 [[INDEX]], 2
-; PRED-NEXT: [[TMP12:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP11]]
-; PRED-NEXT: [[TMP13:%.*]] = add i8 [[TMP0]], 2
-; PRED-NEXT: store i8 [[TMP13]], ptr [[TMP12]], align 1
+; PRED-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i64 2
+; PRED-NEXT: store i8 2, ptr [[TMP2]], align 1
; PRED-NEXT: br label %[[PRED_STORE_CONTINUE4]]
; PRED: [[PRED_STORE_CONTINUE4]]:
-; PRED-NEXT: [[TMP14:%.*]] = extractelement <8 x i1> [[TMP1]], i32 3
-; PRED-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
+; PRED-NEXT: br i1 true, label %[[PRED_STORE_IF5:.*]], label %[[PRED_STORE_CONTINUE6:.*]]
; PRED: [[PRED_STORE_IF5]]:
-; PRED-NEXT: [[TMP15:%.*]] = add i64 [[INDEX]], 3
-; PRED-NEXT: [[TMP16:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP15]]
-; PRED-NEXT: [[TMP17:%.*]] = add i8 [[TMP0]], 3
-; PRED-NEXT: store i8 [[TMP17]], ptr [[TMP16]], align 1
+; PRED-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[DST]], i64 3
+; PRED-NEXT: store i8 3, ptr [[TMP3]], align 1
; PRED-NEXT: br label %[[PRED_STORE_CONTINUE6]]
; PRED: [[PRED_STORE_CONTINUE6]]:
-; PRED-NEXT: [[TMP18:%.*]] = extractelement <8 x i1> [[TMP1]], i32 4
-; PRED-NEXT: br i1 [[TMP18]], label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
+; PRED-NEXT: br i1 true, label %[[PRED_STORE_IF7:.*]], label %[[PRED_STORE_CONTINUE8:.*]]
; PRED: [[PRED_STORE_IF7]]:
-; PRED-NEXT: [[TMP19:%.*]] = add i64 [[INDEX]], 4
-; PRED-NEXT: [[TMP20:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP19]]
-; PRED-NEXT: [[TMP21:%.*]] = add i8 [[TMP0]], 4
-; PRED-NEXT: store i8 [[TMP21]], ptr [[TMP20]], align 1
+; PRED-NEXT: [[TMP4:%.*]] = getelementptr i8, ptr [[DST]], i64 4
+; PRED-NEXT: store i8 4, ptr [[TMP4]], align 1
; PRED-NEXT: br label %[[PRED_STORE_CONTINUE8]]
; PRED: [[PRED_STORE_CONTINUE8]]:
-; PRED-NEXT: [[TMP22:%.*]] = extractelement <8 x i1> [[TMP1]], i32 5
-; PRED-NEXT: br i1 [[TMP22]], label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
+; PRED-NEXT: br i1 true, label %[[PRED_STORE_IF9:.*]], label %[[PRED_STORE_CONTINUE10:.*]]
; PRED: [[PRED_STORE_IF9]]:
-; PRED-NEXT: [[TMP23:%.*]] = add i64 [[INDEX]], 5
-; PRED-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP23]]
-; PRED-NEXT: [[TMP25:%.*]] = add i8 [[TMP0]], 5
-; PRED-NEXT: store i8 [[TMP25]], ptr [[TMP24]], align 1
+; PRED-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[DST]], i64 5
+; PRED-NEXT: store i8 5, ptr [[TMP5]], align 1
; PRED-NEXT: br label %[[PRED_STORE_CONTINUE10]]
; PRED: [[PRED_STORE_CONTINUE10]]:
-; PRED-NEXT: [[TMP26:%.*]] = extractelement <8 x i1> [[TMP1]], i32 6
-; PRED-NEXT: br i1 [[TMP26]], label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
+; PRED-NEXT: br i1 true, label %[[PRED_STORE_IF11:.*]], label %[[PRED_STORE_CONTINUE12:.*]]
; PRED: [[PRED_STORE_IF11]]:
-; PRED-NEXT: [[TMP27:%.*]] = add i64 [[INDEX]], 6
-; PRED-NEXT: [[TMP28:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP27]]
-; PRED-NEXT: [[TMP29:%.*]] = add i8 [[TMP0]], 6
-; PRED-NEXT: store i8 [[TMP29]], ptr [[TMP28]], align 1
+; PRED-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[DST]], i64 6
+; PRED-NEXT: store i8 6, ptr [[TMP6]], align 1
; PRED-NEXT: br label %[[PRED_STORE_CONTINUE12]]
; PRED: [[PRED_STORE_CONTINUE12]]:
-; PRED-NEXT: [[TMP30:%.*]] = extractelement <8 x i1> [[TMP1]], i32 7
-; PRED-NEXT: br i1 [[TMP30]], label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14]]
+; PRED-NEXT: br i1 false, label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
; PRED: [[PRED_STORE_IF13]]:
-; PRED-NEXT: [[TMP31:%.*]] = add i64 [[INDEX]], 7
-; PRED-NEXT: [[TMP32:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP31]]
-; PRED-NEXT: [[TMP33:%.*]] = add i8 [[TMP0]], 7
-; PRED-NEXT: store i8 [[TMP33]], ptr [[TMP32]], align 1
+; PRED-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[DST]], i64 7
+; PRED-NEXT: store i8 7, ptr [[TMP7]], align 1
; PRED-NEXT: br label %[[PRED_STORE_CONTINUE14]]
; PRED: [[PRED_STORE_CONTINUE14]]:
-; PRED-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
-; PRED-NEXT: [[VEC_IND_NEXT]] = add <8 x i8> [[VEC_IND]], splat (i8 8)
-; PRED-NEXT: br i1 true, label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
+; PRED-NEXT: br label %[[MIDDLE_BLOCK:.*]]
; PRED: [[MIDDLE_BLOCK]]:
; PRED-NEXT: br label %[[EXIT:.*]]
; PRED: [[SCALAR_PH]]:
@@ -962,7 +902,7 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) {
; PRED-NEXT: store i8 [[IV_TRUNC]], ptr [[GEP]], align 1
; PRED-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; PRED-NEXT: [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 7
-; PRED-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP7:![0-9]+]]
+; PRED-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP6:![0-9]+]]
; PRED: [[EXIT]]:
; PRED-NEXT: ret void
;
@@ -1163,7 +1103,7 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
; DEFAULT-NEXT: [[TMP80:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
-; DEFAULT-NEXT: br i1 [[TMP80]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
+; DEFAULT-NEXT: br i1 [[TMP80]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
; DEFAULT: [[MIDDLE_BLOCK]]:
; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
; DEFAULT-NEXT: br i1 [[CMP_N]], label %[[EXIT:.*]], label %[[SCALAR_PH]]
@@ -1194,7 +1134,7 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; DEFAULT: [[LOOP_LATCH]]:
; DEFAULT-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; DEFAULT-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]]
-; DEFAULT-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP27:![0-9]+]]
+; DEFAULT-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP26:![0-9]+]]
; DEFAULT: [[EXIT]]:
; DEFAULT-NEXT: ret void
;
@@ -1383,7 +1323,7 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; PRED-NEXT: [[TMP84:%.*]] = extractelement <8 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
; PRED-NEXT: [[TMP85:%.*]] = xor i1 [[TMP84]], true
; PRED-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
-; PRED-NEXT: br i1 [[TMP85]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
+; PRED-NEXT: br i1 [[TMP85]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
; PRED: [[MIDDLE_BLOCK]]:
; PRED-NEXT: br label %[[EXIT:.*]]
; PRED: [[SCALAR_PH]]:
@@ -1413,7 +1353,7 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
; PRED: [[LOOP_LATCH]]:
; PRED-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1
; PRED-NEXT: [[EC:%.*]] = icmp eq i64 [[IV]], [[N]]
-; PRED-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP9:![0-9]+]]
+; PRED-NEXT: br i1 [[EC]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP8:![0-9]+]]
; PRED: [[EXIT]]:
; PRED-NEXT: ret void
;
@@ -1470,7 +1410,7 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) {
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], splat (i64 4)
; DEFAULT-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
-; DEFAULT-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
+; DEFAULT-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
; DEFAULT: [[MIDDLE_BLOCK]]:
; DEFAULT-NEXT: br l...
[truncated]
|
437eba4 to
e5374bd
Compare
Mel-Chen
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LG
fhahn
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thais for the update. It looks like there is a test failure in the precommit tests which looks related
fhahn
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM, thanks
SimplifyBranchConditionForVFAndUF only recognized canonical IVs and a few PHI
recipes in the loop header. With more IV-step optimizations,
the canonical widen-canonical-iv can be replaced by a canonical VPWidenIntOrFpInduction,
which the pass did not handle, causing regressions (missed simplifications).
This patch replaces canonical VPWidenIntOrFpInduction with a StepVector in the vector preheader
since the vector loop region only executes once.