Skip to content

Commit 87325fd

Browse files
committed
!fixup address latest comments, thanks
1 parent a78311d commit 87325fd

File tree

10 files changed

+132
-311
lines changed

10 files changed

+132
-311
lines changed

llvm/lib/Analysis/IVDescriptors.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -222,6 +222,7 @@ static bool isMinMaxReductionWithLoopUsersOutsideReductionChain(
222222
BasicBlock *Latch = TheLoop->getLoopLatch();
223223
if (!Latch)
224224
return false;
225+
225226
assert(Phi->getNumIncomingValues() == 2 && "phi must have 2 incoming values");
226227
Value *Inc = Phi->getIncomingValueForBlock(Latch);
227228
if (Phi->hasOneUse() || !Inc->hasOneUse() ||

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7998,9 +7998,10 @@ void VPRecipeBuilder::collectScaledReductions(VFRange &Range) {
79987998
MapVector<Instruction *,
79997999
SmallVector<std::pair<PartialReductionChain, unsigned>>>
80008000
ChainsByPhi;
8001-
for (const auto &[Phi, RdxDesc] : Legal->getReductionVars())
8001+
for (const auto &[Phi, RdxDesc] : Legal->getReductionVars()) {
80028002
if (Instruction *RdxExitInstr = RdxDesc.getLoopExitInstr())
80038003
getScaledReductions(Phi, RdxExitInstr, Range, ChainsByPhi[Phi]);
8004+
}
80048005

80058006
// A partial reduction is invalid if any of its extends are used by
80068007
// something that isn't another partial reduction. This is because the

llvm/lib/Transforms/Vectorize/VPlanConstruction.cpp

Lines changed: 24 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -821,16 +821,15 @@ void VPlanTransforms::addMinimumVectorEpilogueIterationCheck(
821821
/// If \p V is used by a recipe matching pattern \p P, return it. Otherwise
822822
/// return nullptr;
823823
template <typename MatchT>
824-
static VPRecipeBase *findUser(VPValue *V, const MatchT &P) {
824+
static VPRecipeBase *findUserOf(VPValue *V, const MatchT &P) {
825825
auto It = find_if(V->users(), match_fn(P));
826826
return It == V->user_end() ? nullptr : cast<VPRecipeBase>(*It);
827827
}
828828

829829
/// If \p V is used by a VPInstruction with \p Opcode, return it. Otherwise
830830
/// return nullptr.
831-
template <unsigned Opcode>
832-
static VPInstruction *findUserVPInstruction(VPValue *V) {
833-
return cast_or_null<VPInstruction>(findUser(V, m_VPInstruction<Opcode>()));
831+
template <unsigned Opcode> static VPInstruction *findUserOf(VPValue *V) {
832+
return cast_or_null<VPInstruction>(findUserOf(V, m_VPInstruction<Opcode>()));
834833
}
835834

836835
bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) {
@@ -938,7 +937,7 @@ bool VPlanTransforms::handleMaxMinNumReductions(VPlan &Plan) {
938937
// If we exit early due to NaNs, compute the final reduction result based on
939938
// the reduction phi at the beginning of the last vector iteration.
940939
auto *RdxResult =
941-
findUserVPInstruction<VPInstruction::ComputeReductionResult>(RedPhiR);
940+
findUserOf<VPInstruction::ComputeReductionResult>(RedPhiR);
942941

943942
auto *NewSel = MiddleBuilder.createSelect(AnyNaNLane, RedPhiR,
944943
RdxResult->getOperand(1));
@@ -1017,15 +1016,28 @@ bool VPlanTransforms::handleMultiUseReductions(VPlan &Plan) {
10171016
dyn_cast<VPRecipeWithIRFlags>(MinMaxPhiR->getBackedgeValue());
10181017
if (!MinMaxOp || MinMaxOp->getNumUsers() != 2)
10191018
return false;
1019+
1020+
assert((isa<VPWidenIntrinsicRecipe>(MinMaxOp) ||
1021+
(isa<VPReplicateRecipe>(MinMaxOp) &&
1022+
isa<IntrinsicInst>(
1023+
cast<VPReplicateRecipe>(MinMaxOp)->getUnderlyingValue()))) &&
1024+
"MinMaxOp must be a wide or scalar intrinsic");
1025+
VPValue *MinMaxOpA = MinMaxOp->getOperand(0);
1026+
VPValue *MinMaxOpB = MinMaxOp->getOperand(1);
1027+
if (MinMaxOpA != MinMaxPhiR)
1028+
std::swap(MinMaxOpA, MinMaxOpB);
1029+
if (MinMaxOpA != MinMaxPhiR)
1030+
return false;
1031+
10201032
VPValue *CmpOpA;
10211033
VPValue *CmpOpB;
1022-
CmpInst::Predicate Pred;
1023-
auto *Cmp = dyn_cast_or_null<VPRecipeWithIRFlags>(
1024-
findUser(MinMaxPhiR, m_Cmp(m_VPValue(CmpOpA), m_VPValue(CmpOpB))));
1025-
if (!Cmp || Cmp->getNumUsers() != 1)
1034+
CmpPredicate Pred;
1035+
auto *Cmp = dyn_cast_or_null<VPRecipeWithIRFlags>(findUserOf(
1036+
MinMaxPhiR, m_Cmp(Pred, m_VPValue(CmpOpA), m_VPValue(CmpOpB))));
1037+
if (!Cmp || Cmp->getNumUsers() != 1 ||
1038+
(CmpOpA != MinMaxOpB && CmpOpB != MinMaxOpB))
10261039
return false;
10271040

1028-
Pred = Cmp->getPredicate();
10291041
// TODO: Strict predicates need to find the first IV value for which the
10301042
// predicate holds, not the last.
10311043
if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE ||
@@ -1063,10 +1075,9 @@ bool VPlanTransforms::handleMultiUseReductions(VPlan &Plan) {
10631075
// 3. Select the lanes of the partial FindLastIV reductions which
10641076
// correspond to the lanes matching the min/max reduction result.
10651077
VPInstruction *FindIVResult =
1066-
findUserVPInstruction<VPInstruction::ComputeFindIVResult>(FindIVPhiR);
1078+
findUserOf<VPInstruction::ComputeFindIVResult>(FindIVPhiR);
10671079
VPInstruction *MinMaxResult =
1068-
findUserVPInstruction<VPInstruction::ComputeReductionResult>(
1069-
MinMaxPhiR);
1080+
findUserOf<VPInstruction::ComputeReductionResult>(MinMaxPhiR);
10701081
MinMaxResult->moveBefore(*FindIVResult->getParent(),
10711082
FindIVResult->getIterator());
10721083

llvm/test/Transforms/LoopVectorize/AArch64/select-index.ll

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -140,7 +140,7 @@ define i64 @test_vectorize_select_smin_first_idx(ptr %src, i64 %n) {
140140
; CHECK: [[LOOP]]:
141141
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
142142
; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ]
143-
; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 100, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ]
143+
; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ]
144144
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]]
145145
; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 8
146146
; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i64 [[MIN_VAL]], [[L]]
@@ -159,7 +159,7 @@ entry:
159159
loop:
160160
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
161161
%min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ]
162-
%min.val = phi i64 [ 100, %entry ], [ %min.val.next, %loop ]
162+
%min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ]
163163
%gep = getelementptr i64, ptr %src, i64 %iv
164164
%l = load i64, ptr %gep
165165
%cmp = icmp sgt i64 %min.val, %l
@@ -271,7 +271,7 @@ define i64 @test_vectorize_select_umax_first_idx(ptr %src, i64 %n) {
271271
; CHECK: [[LOOP]]:
272272
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
273273
; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ]
274-
; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 100, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ]
274+
; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ]
275275
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]]
276276
; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 8
277277
; CHECK-NEXT: [[CMP:%.*]] = icmp ult i64 [[MIN_VAL]], [[L]]
@@ -290,7 +290,7 @@ entry:
290290
loop:
291291
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
292292
%min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ]
293-
%min.val = phi i64 [ 100, %entry ], [ %min.val.next, %loop ]
293+
%min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ]
294294
%gep = getelementptr i64, ptr %src, i64 %iv
295295
%l = load i64, ptr %gep
296296
%cmp = icmp ult i64 %min.val, %l
@@ -320,8 +320,8 @@ define i64 @test_vectorize_select_umax_last_idx(ptr %src, i64 %n) {
320320
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
321321
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ]
322322
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ]
323-
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ splat (i64 100), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
324-
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ splat (i64 100), %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ]
323+
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
324+
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ]
325325
; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
326326
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]]
327327
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i32 2
@@ -355,7 +355,7 @@ define i64 @test_vectorize_select_umax_last_idx(ptr %src, i64 %n) {
355355
; CHECK: [[SCALAR_PH]]:
356356
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
357357
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
358-
; CHECK-NEXT: [[BC_MERGE_RDX6:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 100, %[[ENTRY]] ]
358+
; CHECK-NEXT: [[BC_MERGE_RDX6:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
359359
; CHECK-NEXT: br label %[[LOOP:.*]]
360360
; CHECK: [[LOOP]]:
361361
; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
@@ -379,7 +379,7 @@ entry:
379379
loop:
380380
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
381381
%min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ]
382-
%min.val = phi i64 [ 100, %entry ], [ %min.val.next, %loop ]
382+
%min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ]
383383
%gep = getelementptr i64, ptr %src, i64 %iv
384384
%l = load i64, ptr %gep
385385
%cmp = icmp ule i64 %min.val, %l
@@ -402,7 +402,7 @@ define i64 @test_vectorize_select_smax_first_idx(ptr %src, i64 %n) {
402402
; CHECK: [[LOOP]]:
403403
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
404404
; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ]
405-
; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 100, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ]
405+
; CHECK-NEXT: [[MIN_VAL:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_VAL_NEXT:%.*]], %[[LOOP]] ]
406406
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]]
407407
; CHECK-NEXT: [[L:%.*]] = load i64, ptr [[GEP]], align 8
408408
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i64 [[MIN_VAL]], [[L]]
@@ -421,7 +421,7 @@ entry:
421421
loop:
422422
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
423423
%min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ]
424-
%min.val = phi i64 [ 100, %entry ], [ %min.val.next, %loop ]
424+
%min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ]
425425
%gep = getelementptr i64, ptr %src, i64 %iv
426426
%l = load i64, ptr %gep
427427
%cmp = icmp slt i64 %min.val, %l
@@ -451,8 +451,8 @@ define i64 @test_vectorize_select_smax_last_idx(ptr %src, i64 %n) {
451451
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
452452
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <2 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP7:%.*]], %[[VECTOR_BODY]] ]
453453
; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <2 x i64> [ splat (i64 -9223372036854775808), %[[VECTOR_PH]] ], [ [[TMP8:%.*]], %[[VECTOR_BODY]] ]
454-
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ splat (i64 100), %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
455-
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ splat (i64 100), %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ]
454+
; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP5:%.*]], %[[VECTOR_BODY]] ]
455+
; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <2 x i64> [ zeroinitializer, %[[VECTOR_PH]] ], [ [[TMP6:%.*]], %[[VECTOR_BODY]] ]
456456
; CHECK-NEXT: [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], splat (i64 2)
457457
; CHECK-NEXT: [[GEP:%.*]] = getelementptr i64, ptr [[SRC]], i64 [[IV]]
458458
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[GEP]], i32 2
@@ -486,7 +486,7 @@ define i64 @test_vectorize_select_smax_last_idx(ptr %src, i64 %n) {
486486
; CHECK: [[SCALAR_PH]]:
487487
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
488488
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i64 [ [[RDX_SELECT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
489-
; CHECK-NEXT: [[BC_MERGE_RDX6:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 100, %[[ENTRY]] ]
489+
; CHECK-NEXT: [[BC_MERGE_RDX6:%.*]] = phi i64 [ [[TMP10]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
490490
; CHECK-NEXT: br label %[[LOOP:.*]]
491491
; CHECK: [[LOOP]]:
492492
; CHECK-NEXT: [[IV1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
@@ -510,7 +510,7 @@ entry:
510510
loop:
511511
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
512512
%min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ]
513-
%min.val = phi i64 [ 100, %entry ], [ %min.val.next, %loop ]
513+
%min.val = phi i64 [ 0, %entry ], [ %min.val.next, %loop ]
514514
%gep = getelementptr i64, ptr %src, i64 %iv
515515
%l = load i64, ptr %gep
516516
%cmp = icmp sle i64 %min.val, %l
Lines changed: 27 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5
22
; RUN: opt -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S %s | FileCheck %s
33

4-
define i64 @test_vectorize_select_fmaxnum_idx(ptr %src, i64 %n) {
5-
; CHECK-LABEL: define i64 @test_vectorize_select_fmaxnum_idx(
4+
define i64 @test_vectorize_select_fmaxnum_last_idx(ptr %src, i64 %n) {
5+
; CHECK-LABEL: define i64 @test_vectorize_select_fmaxnum_last_idx(
66
; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) {
77
; CHECK-NEXT: [[ENTRY:.*]]:
88
; CHECK-NEXT: br label %[[LOOP:.*]]
@@ -27,106 +27,102 @@ entry:
2727

2828
loop:
2929
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
30-
%min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ]
30+
%max.idx = phi i64 [ 0, %entry ], [ %max.idx.next, %loop ]
3131
%max.val = phi float [ -3.0000, %entry ], [ %max.val.next, %loop ]
3232
%gep = getelementptr float, ptr %src, i64 %iv
3333
%l = load float, ptr %gep
3434
%cmp = fcmp uge float %max.val, %l
3535
%max.val.next = call float @llvm.maxnum.f32(float %l, float %max.val)
36-
%min.idx.next = select i1 %cmp, i64 %iv, i64 %min.idx
36+
%max.idx.next = select i1 %cmp, i64 %iv, i64 %max.idx
3737
%iv.next = add nuw nsw i64 %iv, 1
3838
%exitcond.not = icmp eq i64 %iv.next, %n
3939
br i1 %exitcond.not, label %exit, label %loop
4040

4141
exit:
42-
%res = phi i64 [ %min.idx.next, %loop ]
42+
%res = phi i64 [ %max.idx.next, %loop ]
4343
ret i64 %res
4444
}
4545

46-
define i64 @test_vectorize_select_fmax_idx_without_fmfs(ptr %src, i64 %n) {
47-
; CHECK-LABEL: define i64 @test_vectorize_select_fmax_idx_without_fmfs(
46+
define i64 @test_vectorize_select_fmax_last_idx_without_fmfs(ptr %src, i64 %n) {
47+
; CHECK-LABEL: define i64 @test_vectorize_select_fmax_last_idx_without_fmfs(
4848
; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) {
4949
; CHECK-NEXT: [[ENTRY:.*]]:
5050
; CHECK-NEXT: br label %[[LOOP:.*]]
5151
; CHECK: [[LOOP]]:
5252
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
53-
; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ]
53+
; CHECK-NEXT: [[MAX_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MAX_IDX_NEXT:%.*]], %[[LOOP]] ]
5454
; CHECK-NEXT: [[MAX_VAL:%.*]] = phi float [ -3.000000e+00, %[[ENTRY]] ], [ [[MAX_VAL_NEXT:%.*]], %[[LOOP]] ]
5555
; CHECK-NEXT: [[GEP:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV]]
5656
; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP]], align 4
57-
; CHECK-NEXT: [[CMP:%.*]] = fcmp uge float [[MAX_VAL]], [[L]]
58-
; CHECK-NEXT: [[MAX_CMP:%.*]] = fcmp ogt float [[L]], [[MAX_VAL]]
57+
; CHECK-NEXT: [[MAX_CMP:%.*]] = fcmp ule float [[MAX_VAL]], [[L]]
5958
; CHECK-NEXT: [[MAX_VAL_NEXT]] = select i1 [[MAX_CMP]], float [[L]], float [[MAX_VAL]]
60-
; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]]
59+
; CHECK-NEXT: [[MAX_IDX_NEXT]] = select i1 [[MAX_CMP]], i64 [[IV]], i64 [[MAX_IDX]]
6160
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
6261
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
6362
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]]
6463
; CHECK: [[EXIT]]:
65-
; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ]
64+
; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MAX_IDX_NEXT]], %[[LOOP]] ]
6665
; CHECK-NEXT: ret i64 [[RES]]
6766
;
6867
entry:
6968
br label %loop
7069

7170
loop:
7271
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
73-
%min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ]
72+
%max.idx = phi i64 [ 0, %entry ], [ %max.idx.next, %loop ]
7473
%max.val = phi float [ -3.0000, %entry ], [ %max.val.next, %loop ]
7574
%gep = getelementptr float, ptr %src, i64 %iv
7675
%l = load float, ptr %gep
77-
%cmp = fcmp uge float %max.val, %l
78-
%max.cmp = fcmp ogt float %l, %max.val
79-
%max.val.next = select i1 %max.cmp, float %l, float %max.val
80-
%min.idx.next = select i1 %cmp, i64 %iv, i64 %min.idx
76+
%cmp = fcmp ule float %max.val, %l
77+
%max.val.next = select i1 %cmp, float %l, float %max.val
78+
%max.idx.next = select i1 %cmp, i64 %iv, i64 %max.idx
8179
%iv.next = add nuw nsw i64 %iv, 1
8280
%exitcond.not = icmp eq i64 %iv.next, %n
8381
br i1 %exitcond.not, label %exit, label %loop
8482

8583
exit:
86-
%res = phi i64 [ %min.idx.next, %loop ]
84+
%res = phi i64 [ %max.idx.next, %loop ]
8785
ret i64 %res
8886
}
8987

90-
define i64 @test_vectorize_select_fmax_idx_with_fmfs(ptr %src, i64 %n) {
91-
; CHECK-LABEL: define i64 @test_vectorize_select_fmax_idx_with_fmfs(
88+
define i64 @test_vectorize_select_fmax_last_idx_with_fmfs(ptr %src, i64 %n) {
89+
; CHECK-LABEL: define i64 @test_vectorize_select_fmax_last_idx_with_fmfs(
9290
; CHECK-SAME: ptr [[SRC:%.*]], i64 [[N:%.*]]) {
9391
; CHECK-NEXT: [[ENTRY:.*]]:
9492
; CHECK-NEXT: br label %[[LOOP:.*]]
9593
; CHECK: [[LOOP]]:
9694
; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
97-
; CHECK-NEXT: [[MIN_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MIN_IDX_NEXT:%.*]], %[[LOOP]] ]
95+
; CHECK-NEXT: [[MAX_IDX:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[MAX_IDX_NEXT:%.*]], %[[LOOP]] ]
9896
; CHECK-NEXT: [[MAX_VAL:%.*]] = phi float [ -3.000000e+00, %[[ENTRY]] ], [ [[MAX_VAL_NEXT:%.*]], %[[LOOP]] ]
9997
; CHECK-NEXT: [[GEP:%.*]] = getelementptr float, ptr [[SRC]], i64 [[IV]]
10098
; CHECK-NEXT: [[L:%.*]] = load float, ptr [[GEP]], align 4
101-
; CHECK-NEXT: [[CMP:%.*]] = fcmp uge float [[MAX_VAL]], [[L]]
102-
; CHECK-NEXT: [[MAX_CMP:%.*]] = fcmp fast ogt float [[L]], [[MAX_VAL]]
99+
; CHECK-NEXT: [[MAX_CMP:%.*]] = fcmp ule float [[MAX_VAL]], [[L]]
103100
; CHECK-NEXT: [[MAX_VAL_NEXT]] = select fast i1 [[MAX_CMP]], float [[L]], float [[MAX_VAL]]
104-
; CHECK-NEXT: [[MIN_IDX_NEXT]] = select i1 [[CMP]], i64 [[IV]], i64 [[MIN_IDX]]
101+
; CHECK-NEXT: [[MAX_IDX_NEXT]] = select i1 [[MAX_CMP]], i64 [[IV]], i64 [[MAX_IDX]]
105102
; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
106103
; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
107104
; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label %[[EXIT:.*]], label %[[LOOP]]
108105
; CHECK: [[EXIT]]:
109-
; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MIN_IDX_NEXT]], %[[LOOP]] ]
106+
; CHECK-NEXT: [[RES:%.*]] = phi i64 [ [[MAX_IDX_NEXT]], %[[LOOP]] ]
110107
; CHECK-NEXT: ret i64 [[RES]]
111108
;
112109
entry:
113110
br label %loop
114111

115112
loop:
116113
%iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
117-
%min.idx = phi i64 [ 0, %entry ], [ %min.idx.next, %loop ]
114+
%max.idx = phi i64 [ 0, %entry ], [ %max.idx.next, %loop ]
118115
%max.val = phi float [ -3.0000, %entry ], [ %max.val.next, %loop ]
119116
%gep = getelementptr float, ptr %src, i64 %iv
120117
%l = load float, ptr %gep
121-
%cmp = fcmp uge float %max.val, %l
122-
%max.cmp = fcmp fast ogt float %l, %max.val
123-
%max.val.next = select fast i1 %max.cmp, float %l, float %max.val
124-
%min.idx.next = select i1 %cmp, i64 %iv, i64 %min.idx
118+
%cmp = fcmp ule float %max.val, %l
119+
%max.val.next = select fast i1 %cmp, float %l, float %max.val
120+
%max.idx.next = select i1 %cmp, i64 %iv, i64 %max.idx
125121
%iv.next = add nuw nsw i64 %iv, 1
126122
%exitcond.not = icmp eq i64 %iv.next, %n
127123
br i1 %exitcond.not, label %exit, label %loop
128124

129125
exit:
130-
%res = phi i64 [ %min.idx.next, %loop ]
126+
%res = phi i64 [ %max.idx.next, %loop ]
131127
ret i64 %res
132128
}

0 commit comments

Comments
 (0)