Skip to content

Commit 967e1e1

Browse files
committed
!fixup update after merge.
1 parent bc6af8a commit 967e1e1

File tree

2 files changed

+52
-50
lines changed

2 files changed

+52
-50
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 38 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -1090,41 +1090,6 @@ void VPlanTransforms::simplifyRecipes(VPlan &Plan, Type &CanonicalIVTy) {
10901090
}
10911091
}
10921092

1093-
/// Return true if \p Cond is known to be true for given \p BestVF and \p
1094-
/// BestUF.
1095-
static bool isConditionTrueViaVFAndUF(VPValue *Cond, VPlan &Plan,
1096-
ElementCount BestVF, unsigned BestUF,
1097-
ScalarEvolution &SE) {
1098-
using namespace llvm::VPlanPatternMatch;
1099-
if (match(Cond, m_Binary<Instruction::Or>(m_VPValue(), m_VPValue())))
1100-
return any_of(Cond->getDefiningRecipe()->operands(), [&Plan, BestVF, BestUF,
1101-
&SE](VPValue *C) {
1102-
return isConditionTrueViaVFAndUF(C, Plan, BestVF, BestUF, SE);
1103-
});
1104-
1105-
auto *CanIV = Plan.getCanonicalIV();
1106-
if (!match(Cond, m_Binary<Instruction::ICmp>(
1107-
m_Specific(CanIV->getBackedgeValue()),
1108-
m_Specific(&Plan.getVectorTripCount()))) ||
1109-
cast<VPRecipeWithIRFlags>(Cond->getDefiningRecipe())->getPredicate() !=
1110-
CmpInst::ICMP_EQ)
1111-
return false;
1112-
1113-
// The compare checks CanIV + VFxUF == vector trip count. The vector trip
1114-
// count is not conveniently available as SCEV so far, so we compare directly
1115-
// against the original trip count. This is stricter than necessary, as we
1116-
// will only return true if the trip count == vector trip count.
1117-
// TODO: Use SCEV for vector trip count once available, to cover cases where
1118-
// vector trip count == UF * VF, but original trip count != UF * VF.
1119-
const SCEV *TripCount =
1120-
vputils::getSCEVExprForVPValue(Plan.getTripCount(), SE);
1121-
assert(!isa<SCEVCouldNotCompute>(TripCount) &&
1122-
"Trip count SCEV must be computable");
1123-
ElementCount NumElements = BestVF.multiplyCoefficientBy(BestUF);
1124-
const SCEV *C = SE.getElementCount(TripCount->getType(), NumElements);
1125-
return SE.isKnownPredicate(CmpInst::ICMP_EQ, TripCount, C);
1126-
}
1127-
11281093
/// Optimize the width of vector induction variables in \p Plan based on a known
11291094
/// constant Trip Count, \p BestVF and \p BestUF.
11301095
static bool optimizeVectorInductionWidthForTCAndVFUF(VPlan &Plan,
@@ -1198,6 +1163,41 @@ static bool optimizeVectorInductionWidthForTCAndVFUF(VPlan &Plan,
11981163
return MadeChange;
11991164
}
12001165

1166+
/// Return true if \p Cond is known to be true for given \p BestVF and \p
1167+
/// BestUF.
1168+
static bool isConditionTrueViaVFAndUF(VPValue *Cond, VPlan &Plan,
1169+
ElementCount BestVF, unsigned BestUF,
1170+
ScalarEvolution &SE) {
1171+
using namespace llvm::VPlanPatternMatch;
1172+
if (match(Cond, m_Binary<Instruction::Or>(m_VPValue(), m_VPValue())))
1173+
return any_of(Cond->getDefiningRecipe()->operands(), [&Plan, BestVF, BestUF,
1174+
&SE](VPValue *C) {
1175+
return isConditionTrueViaVFAndUF(C, Plan, BestVF, BestUF, SE);
1176+
});
1177+
1178+
auto *CanIV = Plan.getCanonicalIV();
1179+
if (!match(Cond, m_Binary<Instruction::ICmp>(
1180+
m_Specific(CanIV->getBackedgeValue()),
1181+
m_Specific(&Plan.getVectorTripCount()))) ||
1182+
cast<VPRecipeWithIRFlags>(Cond->getDefiningRecipe())->getPredicate() !=
1183+
CmpInst::ICMP_EQ)
1184+
return false;
1185+
1186+
// The compare checks CanIV + VFxUF == vector trip count. The vector trip
1187+
// count is not conveniently available as SCEV so far, so we compare directly
1188+
// against the original trip count. This is stricter than necessary, as we
1189+
// will only return true if the trip count == vector trip count.
1190+
// TODO: Use SCEV for vector trip count once available, to cover cases where
1191+
// vector trip count == UF * VF, but original trip count != UF * VF.
1192+
const SCEV *TripCount =
1193+
vputils::getSCEVExprForVPValue(Plan.getTripCount(), SE);
1194+
assert(!isa<SCEVCouldNotCompute>(TripCount) &&
1195+
"Trip count SCEV must be computable");
1196+
ElementCount NumElements = BestVF.multiplyCoefficientBy(BestUF);
1197+
const SCEV *C = SE.getElementCount(TripCount->getType(), NumElements);
1198+
return SE.isKnownPredicate(CmpInst::ICMP_EQ, TripCount, C);
1199+
}
1200+
12011201
/// Try to simplify the branch condition of \p Plan. This may restrict the
12021202
/// resulting plan to \p BestVF and \p BestUF.
12031203
static bool simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF,
@@ -1223,14 +1223,14 @@ static bool simplifyBranchConditionForVFAndUF(VPlan &Plan, ElementCount BestVF,
12231223
const SCEV *C = SE.getElementCount(TripCount->getType(), NumElements);
12241224
if (TripCount->isZero() ||
12251225
!SE.isKnownPredicate(CmpInst::ICMP_ULE, TripCount, C))
1226-
return;
1226+
return false;
12271227
} else if (match(Term, m_BranchOnCond(m_VPValue(Cond)))) {
12281228
// For BranchOnCond, check if we can prove the condition to be true using VF
12291229
// and UF.
12301230
if (!isConditionTrueViaVFAndUF(Cond, Plan, BestVF, BestUF, SE))
1231-
return;
1231+
return false;
12321232
} else {
1233-
return;
1233+
return false;
12341234
}
12351235

12361236
// The vector loop region only executes once. If possible, completely remove

llvm/test/Transforms/LoopVectorize/vector-loop-backedge-elimination-early-exit.ll

Lines changed: 14 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ define i8 @test_early_exit_max_tc_less_than_16(ptr dereferenceable(16) %A) nosyn
5555
; VF8UF2: [[VECTOR_PH]]:
5656
; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]]
5757
; VF8UF2: [[VECTOR_BODY]]:
58+
; VF8UF2-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 0
5859
; VF8UF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P_SRC]], i32 0
5960
; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1
6061
; VF8UF2-NEXT: [[TMP3:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer
@@ -90,6 +91,7 @@ define i8 @test_early_exit_max_tc_less_than_16(ptr dereferenceable(16) %A) nosyn
9091
; VF16UF1: [[VECTOR_PH]]:
9192
; VF16UF1-NEXT: br label %[[VECTOR_BODY:.*]]
9293
; VF16UF1: [[VECTOR_BODY]]:
94+
; VF16UF1-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 0
9395
; VF16UF1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P_SRC]], i32 0
9496
; VF16UF1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
9597
; VF16UF1-NEXT: [[TMP3:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], zeroinitializer
@@ -188,19 +190,19 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer
188190
; VF8UF2: [[VECTOR_PH]]:
189191
; VF8UF2-NEXT: br label %[[VECTOR_BODY:.*]]
190192
; VF8UF2: [[VECTOR_BODY]]:
191-
; VF8UF2-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
192-
; VF8UF2-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX]]
193+
; VF8UF2-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 0
193194
; VF8UF2-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P_SRC]], i32 0
194195
; VF8UF2-NEXT: [[WIDE_LOAD:%.*]] = load <8 x i8>, ptr [[TMP2]], align 1
195196
; VF8UF2-NEXT: [[TMP3:%.*]] = icmp eq <8 x i8> [[WIDE_LOAD]], zeroinitializer
196-
; VF8UF2-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
197+
; VF8UF2-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v8i1(<8 x i1> [[TMP3]])
198+
; VF8UF2-NEXT: br label %[[MIDDLE_SPLIT:.*]]
197199
; VF8UF2: [[MIDDLE_SPLIT]]:
198200
; VF8UF2-NEXT: br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
199201
; VF8UF2: [[MIDDLE_BLOCK]]:
200202
; VF8UF2-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
201203
; VF8UF2: [[VECTOR_EARLY_EXIT]]:
202204
; VF8UF2-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v8i1(<8 x i1> [[TMP3]], i1 true)
203-
; VF8UF2-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]]
205+
; VF8UF2-NEXT: [[TMP5:%.*]] = add i64 0, [[FIRST_ACTIVE_LANE]]
204206
; VF8UF2-NEXT: br label %[[EXIT]]
205207
; VF8UF2: [[SCALAR_PH]]:
206208
; VF8UF2-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
@@ -214,9 +216,9 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer
214216
; VF8UF2: [[LOOP_LATCH]]:
215217
; VF8UF2-NEXT: [[IV_NEXT]] = add nsw i64 [[IV1]], 1
216218
; VF8UF2-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 16
217-
; VF8UF2-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP4:![0-9]+]]
219+
; VF8UF2-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]]
218220
; VF8UF2: [[EXIT]]:
219-
; VF8UF2-NEXT: [[RES:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ [[TMP8]], %[[VECTOR_EARLY_EXIT]] ]
221+
; VF8UF2-NEXT: [[RES:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ [[TMP5]], %[[VECTOR_EARLY_EXIT]] ]
220222
; VF8UF2-NEXT: ret i64 [[RES]]
221223
;
222224
; VF16UF1-LABEL: define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(
@@ -226,19 +228,19 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer
226228
; VF16UF1: [[VECTOR_PH]]:
227229
; VF16UF1-NEXT: br label %[[VECTOR_BODY:.*]]
228230
; VF16UF1: [[VECTOR_BODY]]:
229-
; VF16UF1-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
230-
; VF16UF1-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 [[INDEX]]
231+
; VF16UF1-NEXT: [[P_SRC:%.*]] = getelementptr inbounds i8, ptr [[A]], i64 0
231232
; VF16UF1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, ptr [[P_SRC]], i32 0
232233
; VF16UF1-NEXT: [[WIDE_LOAD:%.*]] = load <16 x i8>, ptr [[TMP2]], align 1
233234
; VF16UF1-NEXT: [[TMP3:%.*]] = icmp eq <16 x i8> [[WIDE_LOAD]], zeroinitializer
234-
; VF16UF1-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 16
235+
; VF16UF1-NEXT: [[TMP4:%.*]] = call i1 @llvm.vector.reduce.or.v16i1(<16 x i1> [[TMP3]])
236+
; VF16UF1-NEXT: br label %[[MIDDLE_SPLIT:.*]]
235237
; VF16UF1: [[MIDDLE_SPLIT]]:
236238
; VF16UF1-NEXT: br i1 [[TMP4]], label %[[VECTOR_EARLY_EXIT:.*]], label %[[MIDDLE_BLOCK:.*]]
237239
; VF16UF1: [[MIDDLE_BLOCK]]:
238240
; VF16UF1-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]]
239241
; VF16UF1: [[VECTOR_EARLY_EXIT]]:
240242
; VF16UF1-NEXT: [[FIRST_ACTIVE_LANE:%.*]] = call i64 @llvm.experimental.cttz.elts.i64.v16i1(<16 x i1> [[TMP3]], i1 true)
241-
; VF16UF1-NEXT: [[TMP8:%.*]] = add i64 [[INDEX]], [[FIRST_ACTIVE_LANE]]
243+
; VF16UF1-NEXT: [[TMP5:%.*]] = add i64 0, [[FIRST_ACTIVE_LANE]]
242244
; VF16UF1-NEXT: br label %[[EXIT]]
243245
; VF16UF1: [[SCALAR_PH]]:
244246
; VF16UF1-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 16, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
@@ -252,9 +254,9 @@ define i64 @test_early_exit_max_tc_less_than_16_with_iv_used_outside(ptr derefer
252254
; VF16UF1: [[LOOP_LATCH]]:
253255
; VF16UF1-NEXT: [[IV_NEXT]] = add nsw i64 [[IV1]], 1
254256
; VF16UF1-NEXT: [[CMP:%.*]] = icmp eq i64 [[IV_NEXT]], 16
255-
; VF16UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP4:![0-9]+]]
257+
; VF16UF1-NEXT: br i1 [[CMP]], label %[[EXIT]], label %[[LOOP_HEADER]], !llvm.loop [[LOOP3:![0-9]+]]
256258
; VF16UF1: [[EXIT]]:
257-
; VF16UF1-NEXT: [[RES:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ [[TMP8]], %[[VECTOR_EARLY_EXIT]] ]
259+
; VF16UF1-NEXT: [[RES:%.*]] = phi i64 [ [[IV1]], %[[LOOP_HEADER]] ], [ 1, %[[LOOP_LATCH]] ], [ 1, %[[MIDDLE_BLOCK]] ], [ [[TMP5]], %[[VECTOR_EARLY_EXIT]] ]
258260
; VF16UF1-NEXT: ret i64 [[RES]]
259261
;
260262
entry:

0 commit comments

Comments
 (0)