Skip to content

Commit 3952b01

Browse files
committed
Move check into planContainsAdditionalSimplifications to account for dead uses of FOR
1 parent 3e3a167 commit 3952b01

File tree

2 files changed

+66
-11
lines changed

2 files changed

+66
-11
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6541,11 +6541,6 @@ LoopVectorizationCostModel::getInstructionCost(Instruction *I,
65416541
// TODO: Consider vscale_range info.
65426542
if (VF.isScalable() && VF.getKnownMinValue() == 1)
65436543
return InstructionCost::getInvalid();
6544-
// If a FOR has no users inside the loop we won't generate a splice.
6545-
if (none_of(Phi->users(), [this](User *U) {
6546-
return TheLoop->contains(cast<Instruction>(U));
6547-
}))
6548-
return 0;
65496544
SmallVector<int> Mask(VF.getKnownMinValue());
65506545
std::iota(Mask.begin(), Mask.end(), VF.getKnownMinValue() - 1);
65516546
return TTI.getShuffleCost(TargetTransformInfo::SK_Splice,
@@ -7472,6 +7467,16 @@ static bool planContainsAdditionalSimplifications(VPlan &Plan,
74727467
}
74737468
continue;
74747469
}
7470+
// If a FOR's splice wasn't used it will have been removed, so the VPlan
7471+
// model won't cost it whilst the legacy will.
7472+
if (auto *FOR = dyn_cast<VPFirstOrderRecurrencePHIRecipe>(&R)) {
7473+
if (none_of(FOR->users(), [](VPUser *U) {
7474+
auto *VPI = dyn_cast<VPInstruction>(U);
7475+
return VPI && VPI->getOpcode() ==
7476+
VPInstruction::FirstOrderRecurrenceSplice;
7477+
}))
7478+
return true;
7479+
}
74757480
// The VPlan-based cost model is more accurate for partial reduction and
74767481
// comparing against the legacy cost isn't desirable.
74777482
if (isa<VPPartialReductionRecipe>(&R))

llvm/test/Transforms/LoopVectorize/X86/pr131359.ll

Lines changed: 56 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,15 @@
11
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5
22
; RUN: opt -p loop-vectorize -S %s | FileCheck %s
33

4-
; Make sure the legacy cost model doesn't add a cost for a splice when the
5-
; first-order recurrence isn't used inside the loop. The VPlan cost model
6-
; eliminates the dead VPInstruction::FirstOrderRecurrenceSplice so the two cost
7-
; models would go out of sync otherwise.
4+
; If a FOR isn't used the VPInstruction::FirstOrderRecurrenceSplice will be dead
5+
; and won't be costed in the VPlan cost model. Make sure we account for this
6+
; simplifcation in comparison to the legacy cost model.
87

98
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
109
target triple = "x86_64"
1110

12-
define void @h() {
13-
; CHECK-LABEL: define void @h() {
11+
define void @no_use() {
12+
; CHECK-LABEL: define void @no_use() {
1413
; CHECK-NEXT: [[ENTRY:.*]]:
1514
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
1615
; CHECK: [[VECTOR_PH]]:
@@ -53,9 +52,60 @@ for.cond.i:
5352
f.exit:
5453
ret void
5554
}
55+
56+
57+
define void @dead_use() {
58+
; CHECK-LABEL: define void @dead_use() {
59+
; CHECK-NEXT: [[ENTRY:.*]]:
60+
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
61+
; CHECK: [[VECTOR_PH]]:
62+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
63+
; CHECK: [[VECTOR_BODY]]:
64+
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
65+
; CHECK-NEXT: [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, %[[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], %[[VECTOR_BODY]] ]
66+
; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %[[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], %[[VECTOR_BODY]] ]
67+
; CHECK-NEXT: [[STEP_ADD]] = add <4 x i32> [[VEC_IND]], splat (i32 4)
68+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
69+
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], splat (i32 4)
70+
; CHECK-NEXT: [[TMP0:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40
71+
; CHECK-NEXT: br i1 [[TMP0]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
72+
; CHECK: [[MIDDLE_BLOCK]]:
73+
; CHECK-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[STEP_ADD]], i32 3
74+
; CHECK-NEXT: br i1 false, label %[[F_EXIT:.*]], label %[[SCALAR_PH]]
75+
; CHECK: [[SCALAR_PH]]:
76+
; CHECK-NEXT: [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[VECTOR_RECUR_EXTRACT]], %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
77+
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 40, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ]
78+
; CHECK-NEXT: br label %[[FOR_COND_I:.*]]
79+
; CHECK: [[FOR_COND_I]]:
80+
; CHECK-NEXT: [[D_0_I:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], %[[SCALAR_PH]] ], [ [[E_0_I:%.*]], %[[FOR_COND_I]] ]
81+
; CHECK-NEXT: [[E_0_I]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[INC_I:%.*]], %[[FOR_COND_I]] ]
82+
; CHECK-NEXT: [[DEAD:%.*]] = add i32 [[D_0_I]], 1
83+
; CHECK-NEXT: [[INC_I]] = add i32 [[E_0_I]], 1
84+
; CHECK-NEXT: [[EXITCOND_NOT_I:%.*]] = icmp eq i32 [[E_0_I]], 43
85+
; CHECK-NEXT: br i1 [[EXITCOND_NOT_I]], label %[[F_EXIT]], label %[[FOR_COND_I]], !llvm.loop [[LOOP5:![0-9]+]]
86+
; CHECK: [[F_EXIT]]:
87+
; CHECK-NEXT: ret void
88+
;
89+
entry:
90+
br label %for.cond.i
91+
92+
for.cond.i:
93+
%d.0.i = phi i32 [ 0, %entry ], [ %e.0.i, %for.cond.i ]
94+
%e.0.i = phi i32 [ 0, %entry ], [ %inc.i, %for.cond.i ]
95+
%dead = add i32 %d.0.i, 1
96+
%inc.i = add i32 %e.0.i, 1
97+
%exitcond.not.i = icmp eq i32 %e.0.i, 43
98+
br i1 %exitcond.not.i, label %f.exit, label %for.cond.i
99+
100+
f.exit:
101+
ret void
102+
}
103+
56104
;.
57105
; CHECK: [[LOOP0]] = distinct !{[[LOOP0]], [[META1:![0-9]+]], [[META2:![0-9]+]]}
58106
; CHECK: [[META1]] = !{!"llvm.loop.isvectorized", i32 1}
59107
; CHECK: [[META2]] = !{!"llvm.loop.unroll.runtime.disable"}
60108
; CHECK: [[LOOP3]] = distinct !{[[LOOP3]], [[META2]], [[META1]]}
109+
; CHECK: [[LOOP4]] = distinct !{[[LOOP4]], [[META1]], [[META2]]}
110+
; CHECK: [[LOOP5]] = distinct !{[[LOOP5]], [[META2]], [[META1]]}
61111
;.

0 commit comments

Comments
 (0)