Skip to content

Commit 498f89d

Browse files
committed
[LV] Collect dead induction truncates
We currently collect the ICmp and Add from an induction variable, marking them as dead so that vplan values are not created for them. This extends that to include any single use trunk from the ICmp, which allows the Add to more readily be removed too. This can help with costing vplan nodes, as the ICmp and Add are more reliably removed and are not double-counted. Differential Revision: https://reviews.llvm.org/D88873
1 parent 81b4f33 commit 498f89d

File tree

6 files changed

+37
-41
lines changed

6 files changed

+37
-41
lines changed

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7080,9 +7080,16 @@ void LoopVectorizationPlanner::collectTriviallyDeadInstructions(
70807080
// condition will be dead after vectorization if it's only used by the
70817081
// branch.
70827082
auto *Cmp = dyn_cast<Instruction>(Latch->getTerminator()->getOperand(0));
7083-
if (Cmp && Cmp->hasOneUse())
7083+
if (Cmp && Cmp->hasOneUse()) {
70847084
DeadInstructions.insert(Cmp);
70857085

7086+
// The operands of the icmp is often a dead trunc, used by IndUpdate.
7087+
for (Value *Op : Cmp->operands()) {
7088+
if (isa<TruncInst>(Op) && Op->hasOneUse())
7089+
DeadInstructions.insert(cast<Instruction>(Op));
7090+
}
7091+
}
7092+
70867093
// We create new "steps" for induction variable updates to which the original
70877094
// induction variables map. An original update instruction will be dead if
70887095
// all its users except the induction variable are dead.

llvm/test/Transforms/LoopVectorize/ARM/tail-folding-not-allowed.ll

Lines changed: 5 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -218,11 +218,9 @@ define void @trunc_not_allowed(i32* noalias nocapture %A, i32* noalias nocapture
218218
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 0
219219
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>*
220220
; CHECK-NEXT: store <4 x i32> [[TMP7]], <4 x i32>* [[TMP10]], align 4
221-
; CHECK-NEXT: [[TMP11:%.*]] = add nuw nsw i32 [[TMP0]], 1
222-
; CHECK-NEXT: [[TMP12:%.*]] = trunc i32 [[TMP11]] to i16
223221
; CHECK-NEXT: [[INDEX_NEXT]] = add i32 [[INDEX]], 4
224-
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i32 [[INDEX_NEXT]], 428
225-
; CHECK-NEXT: br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]]
222+
; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], 428
223+
; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]]
226224
; CHECK: middle.block:
227225
; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i32 431, 428
228226
; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
@@ -234,10 +232,10 @@ define void @trunc_not_allowed(i32* noalias nocapture %A, i32* noalias nocapture
234232
; CHECK: for.body:
235233
; CHECK-NEXT: [[I_09:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD3:%.*]], [[FOR_BODY]] ]
236234
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[B]], i32 [[I_09]]
237-
; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
235+
; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX]], align 4
238236
; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[C]], i32 [[I_09]]
239-
; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX1]], align 4
240-
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], [[TMP14]]
237+
; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[ARRAYIDX1]], align 4
238+
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP12]]
241239
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[I_09]]
242240
; CHECK-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX2]], align 4
243241
; CHECK-NEXT: [[ADD3]] = add nuw nsw i32 [[I_09]], 1

llvm/test/Transforms/LoopVectorize/X86/pr36524.ll

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,6 @@ define void @foo() {
1212
; CHECK-NEXT: [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 1
1313
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 2
1414
; CHECK-NEXT: [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 3
15-
; CHECK-NEXT: [[OFFSET_IDX1:%.*]] = add i64 2, [[INDEX]]
16-
; CHECK-NEXT: [[TMP11:%.*]] = trunc i64 [[OFFSET_IDX1]] to i32
17-
; CHECK-NEXT: [[TMP12:%.*]] = add i32 [[TMP11]], 0
1815
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4
1916
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4>
2017
; CHECK-NEXT: [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], 80

llvm/test/Transforms/LoopVectorize/X86/tail_loop_folding.ll

Lines changed: 21 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -172,50 +172,48 @@ define i32 @reduction_i32(i32* nocapture readonly %A, i32* nocapture readonly %B
172172
; CHECK-NEXT: br label [[VECTOR_BODY:%.*]]
173173
; CHECK: vector.body:
174174
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
175-
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ]
175+
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <8 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
176176
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <8 x i64> undef, i64 [[INDEX]], i32 0
177177
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <8 x i64> [[BROADCAST_SPLATINSERT1]], <8 x i64> undef, <8 x i32> zeroinitializer
178178
; CHECK-NEXT: [[INDUCTION:%.*]] = add <8 x i64> [[BROADCAST_SPLAT2]], <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>
179179
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
180-
; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1
181-
; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP3]]
182-
; CHECK-NEXT: [[TMP6:%.*]] = icmp ule <8 x i64> [[INDUCTION]], [[BROADCAST_SPLAT]]
183-
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0
184-
; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <8 x i32>*
185-
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP8]], i32 4, <8 x i1> [[TMP6]], <8 x i32> undef)
186-
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP3]]
187-
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP9]], i32 0
188-
; CHECK-NEXT: [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <8 x i32>*
189-
; CHECK-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP11]], i32 4, <8 x i1> [[TMP6]], <8 x i32> undef)
190-
; CHECK-NEXT: [[TMP12:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD3]], [[WIDE_MASKED_LOAD]]
191-
; CHECK-NEXT: [[TMP13]] = add <8 x i32> [[TMP12]], [[VEC_PHI]]
192-
; CHECK-NEXT: [[TMP14:%.*]] = trunc i64 [[TMP4]] to i32
193-
; CHECK-NEXT: [[TMP15:%.*]] = select <8 x i1> [[TMP6]], <8 x i32> [[TMP13]], <8 x i32> [[VEC_PHI]]
180+
; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP3]]
181+
; CHECK-NEXT: [[TMP5:%.*]] = icmp ule <8 x i64> [[INDUCTION]], [[BROADCAST_SPLAT]]
182+
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0
183+
; CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <8 x i32>*
184+
; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP7]], i32 4, <8 x i1> [[TMP5]], <8 x i32> undef)
185+
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP3]]
186+
; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 0
187+
; CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <8 x i32>*
188+
; CHECK-NEXT: [[WIDE_MASKED_LOAD3:%.*]] = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* [[TMP10]], i32 4, <8 x i1> [[TMP5]], <8 x i32> undef)
189+
; CHECK-NEXT: [[TMP11:%.*]] = add nsw <8 x i32> [[WIDE_MASKED_LOAD3]], [[WIDE_MASKED_LOAD]]
190+
; CHECK-NEXT: [[TMP12]] = add <8 x i32> [[TMP11]], [[VEC_PHI]]
191+
; CHECK-NEXT: [[TMP13:%.*]] = select <8 x i1> [[TMP5]], <8 x i32> [[TMP12]], <8 x i32> [[VEC_PHI]]
194192
; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 8
195-
; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
196-
; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]]
193+
; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
194+
; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]]
197195
; CHECK: middle.block:
198-
; CHECK-NEXT: [[TMP17:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP15]])
196+
; CHECK-NEXT: [[TMP15:%.*]] = call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> [[TMP13]])
199197
; CHECK-NEXT: br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
200198
; CHECK: scalar.ph:
201199
; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
202-
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ]
200+
; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ]
203201
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
204202
; CHECK: for.body:
205203
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
206204
; CHECK-NEXT: [[SUM_0:%.*]] = phi i32 [ [[SUM_1:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
207205
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
208206
; CHECK-NEXT: [[ARRAYIDXA:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
209-
; CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* [[ARRAYIDXA]], align 4
207+
; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[ARRAYIDXA]], align 4
210208
; CHECK-NEXT: [[ARRAYIDXB:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
211-
; CHECK-NEXT: [[TMP19:%.*]] = load i32, i32* [[ARRAYIDXB]], align 4
212-
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP18]]
209+
; CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[ARRAYIDXB]], align 4
210+
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP17]], [[TMP16]]
213211
; CHECK-NEXT: [[SUM_1]] = add nuw nsw i32 [[ADD]], [[SUM_0]]
214212
; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
215213
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
216214
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], [[LOOP7:!llvm.loop !.*]]
217215
; CHECK: for.cond.cleanup:
218-
; CHECK-NEXT: [[SUM_1_LCSSA:%.*]] = phi i32 [ [[SUM_1]], [[FOR_BODY]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ]
216+
; CHECK-NEXT: [[SUM_1_LCSSA:%.*]] = phi i32 [ [[SUM_1]], [[FOR_BODY]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ]
219217
; CHECK-NEXT: ret i32 [[SUM_1_LCSSA]]
220218
;
221219
entry:

llvm/test/Transforms/LoopVectorize/followup.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ for.end:
3232
; CHECK-LABEL: @followup(
3333

3434
; CHECK-LABEL: vector.body:
35-
; CHECK: br i1 %13, label %middle.block, label %vector.body, !llvm.loop ![[LOOP_VECTOR:[0-9]+]]
35+
; CHECK: br i1 %{{[0-9]*}}, label %middle.block, label %vector.body, !llvm.loop ![[LOOP_VECTOR:[0-9]+]]
3636
; CHECK-LABEL: for.body:
3737
; CHECK: br i1 %exitcond, label %for.end.loopexit, label %for.body, !llvm.loop ![[LOOP_EPILOGUE:[0-9]+]]
3838

llvm/test/Transforms/LoopVectorize/if-pred-stores.ll

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -249,13 +249,9 @@ define void @bug18724(i1 %cond) {
249249
; UNROLL-NOSIMPLIFY-NEXT: [[TMP5:%.*]] = add i32 [[VEC_PHI2]], 1
250250
; UNROLL-NOSIMPLIFY-NEXT: [[PREDPHI]] = select i1 undef, i32 [[VEC_PHI]], i32 [[TMP4]]
251251
; UNROLL-NOSIMPLIFY-NEXT: [[PREDPHI5]] = select i1 undef, i32 [[VEC_PHI2]], i32 [[TMP5]]
252-
; UNROLL-NOSIMPLIFY-NEXT: [[OFFSET_IDX6:%.*]] = add i64 undef, [[INDEX]]
253-
; UNROLL-NOSIMPLIFY-NEXT: [[TMP6:%.*]] = trunc i64 [[OFFSET_IDX6]] to i32
254-
; UNROLL-NOSIMPLIFY-NEXT: [[INDUCTION7:%.*]] = add i32 [[TMP6]], 0
255-
; UNROLL-NOSIMPLIFY-NEXT: [[INDUCTION8:%.*]] = add i32 [[TMP6]], 1
256252
; UNROLL-NOSIMPLIFY-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 2
257-
; UNROLL-NOSIMPLIFY-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
258-
; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP3:!llvm.loop !.*]]
253+
; UNROLL-NOSIMPLIFY-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 0
254+
; UNROLL-NOSIMPLIFY-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP3:!llvm.loop !.*]]
259255
; UNROLL-NOSIMPLIFY: middle.block:
260256
; UNROLL-NOSIMPLIFY-NEXT: [[BIN_RDX:%.*]] = add i32 [[PREDPHI5]], [[PREDPHI]]
261257
; UNROLL-NOSIMPLIFY-NEXT: [[CMP_N:%.*]] = icmp eq i64 1, 0

0 commit comments

Comments
 (0)