Skip to content

Commit e73271c

Browse files
committed
[LV] Remove common extends and selects in CSE
This PR extends the LoopVectorizer's common sub-expression elimination (CSE) to remove identical extend and select instructions. Originally the work only included extends but that uncovered some duplicate selects in reduction-inloop.ll.
1 parent 169b43d commit e73271c

32 files changed

+1104
-205
lines changed

llvm/lib/IR/Instruction.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -935,6 +935,8 @@ bool Instruction::hasSameSpecialState(const Instruction *I2,
935935
if (const GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I1))
936936
return GEP->getSourceElementType() ==
937937
cast<GetElementPtrInst>(I2)->getSourceElementType();
938+
if (const CastInst *Cast = dyn_cast<CastInst>(I1))
939+
return Cast->getDestTy() == cast<CastInst>(I2)->getDestTy();
938940

939941
return true;
940942
}

llvm/lib/Transforms/Utils/FunctionComparator.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -804,6 +804,10 @@ int FunctionComparator::cmpOperations(const Instruction *L,
804804
return Res;
805805
}
806806
}
807+
if (const CastInst *Cast = dyn_cast<CastInst>(L)) {
808+
const CastInst *CastR = cast<CastInst>(R);
809+
return cmpTypes(Cast->getDestTy(), CastR->getDestTy());
810+
}
807811
return 0;
808812
}
809813

llvm/lib/Transforms/Vectorize/LoopVectorize.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2511,7 +2511,8 @@ namespace {
25112511
struct CSEDenseMapInfo {
25122512
static bool canHandle(const Instruction *I) {
25132513
return isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
2514-
isa<ShuffleVectorInst>(I) || isa<GetElementPtrInst>(I);
2514+
isa<ShuffleVectorInst>(I) || isa<GetElementPtrInst>(I) ||
2515+
isa<CastInst>(I) || isa<SelectInst>(I);
25152516
}
25162517

25172518
static inline Instruction *getEmptyKey() {

llvm/test/Transforms/LoopVectorize/AArch64/induction-costs-sve.ll

Lines changed: 7 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -44,10 +44,8 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 {
4444
; DEFAULT-NEXT: [[TMP27:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD4]] to <vscale x 8 x i16>
4545
; DEFAULT-NEXT: [[TMP28:%.*]] = mul <vscale x 8 x i16> [[TMP26]], [[TMP13]]
4646
; DEFAULT-NEXT: [[TMP29:%.*]] = mul <vscale x 8 x i16> [[TMP27]], [[TMP13]]
47-
; DEFAULT-NEXT: [[TMP30:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD]] to <vscale x 8 x i16>
48-
; DEFAULT-NEXT: [[TMP31:%.*]] = zext <vscale x 8 x i8> [[WIDE_LOAD4]] to <vscale x 8 x i16>
49-
; DEFAULT-NEXT: [[TMP32:%.*]] = or <vscale x 8 x i16> [[TMP28]], [[TMP30]]
50-
; DEFAULT-NEXT: [[TMP33:%.*]] = or <vscale x 8 x i16> [[TMP29]], [[TMP31]]
47+
; DEFAULT-NEXT: [[TMP32:%.*]] = or <vscale x 8 x i16> [[TMP28]], [[TMP26]]
48+
; DEFAULT-NEXT: [[TMP33:%.*]] = or <vscale x 8 x i16> [[TMP29]], [[TMP27]]
5149
; DEFAULT-NEXT: [[TMP34:%.*]] = lshr <vscale x 8 x i16> [[TMP32]], splat (i16 1)
5250
; DEFAULT-NEXT: [[TMP35:%.*]] = lshr <vscale x 8 x i16> [[TMP33]], splat (i16 1)
5351
; DEFAULT-NEXT: [[TMP36:%.*]] = trunc <vscale x 8 x i16> [[TMP34]] to <vscale x 8 x i8>
@@ -118,8 +116,7 @@ define void @iv_casts(ptr %dst, ptr %src, i32 %x, i64 %N) #0 {
118116
; PRED-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 16 x i8> @llvm.masked.load.nxv16i8.p0(ptr [[TMP18]], i32 1, <vscale x 16 x i1> [[ACTIVE_LANE_MASK]], <vscale x 16 x i8> poison)
119117
; PRED-NEXT: [[TMP17:%.*]] = zext <vscale x 16 x i8> [[WIDE_MASKED_LOAD]] to <vscale x 16 x i16>
120118
; PRED-NEXT: [[TMP22:%.*]] = mul <vscale x 16 x i16> [[TMP17]], [[TMP16]]
121-
; PRED-NEXT: [[TMP24:%.*]] = zext <vscale x 16 x i8> [[WIDE_MASKED_LOAD]] to <vscale x 16 x i16>
122-
; PRED-NEXT: [[TMP20:%.*]] = or <vscale x 16 x i16> [[TMP22]], [[TMP24]]
119+
; PRED-NEXT: [[TMP20:%.*]] = or <vscale x 16 x i16> [[TMP22]], [[TMP17]]
123120
; PRED-NEXT: [[TMP21:%.*]] = lshr <vscale x 16 x i16> [[TMP20]], splat (i16 1)
124121
; PRED-NEXT: [[TMP23:%.*]] = trunc <vscale x 16 x i16> [[TMP21]] to <vscale x 16 x i8>
125122
; PRED-NEXT: [[TMP26:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
@@ -370,9 +367,8 @@ define void @trunc_ivs_and_store(i32 %x, ptr %dst, i64 %N) #0 {
370367
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
371368
; DEFAULT-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32
372369
; DEFAULT-NEXT: [[TMP15:%.*]] = add i32 [[OFFSET_IDX]], 1
373-
; DEFAULT-NEXT: [[TMP16:%.*]] = trunc i64 [[INDEX]] to i32
374-
; DEFAULT-NEXT: [[TMP18:%.*]] = add i32 [[TMP16]], 1
375-
; DEFAULT-NEXT: [[TMP19:%.*]] = mul i32 [[MUL]], [[TMP16]]
370+
; DEFAULT-NEXT: [[TMP18:%.*]] = add i32 [[OFFSET_IDX]], 1
371+
; DEFAULT-NEXT: [[TMP19:%.*]] = mul i32 [[MUL]], [[OFFSET_IDX]]
376372
; DEFAULT-NEXT: [[TMP20:%.*]] = mul i32 [[MUL]], [[TMP18]]
377373
; DEFAULT-NEXT: [[TMP21:%.*]] = zext i32 [[TMP19]] to i64
378374
; DEFAULT-NEXT: [[TMP22:%.*]] = zext i32 [[TMP20]] to i64
@@ -562,9 +558,8 @@ define void @ivs_trunc_and_ext(i32 %x, ptr %dst, i64 %N) #0 {
562558
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
563559
; DEFAULT-NEXT: [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32
564560
; DEFAULT-NEXT: [[TMP14:%.*]] = add i32 [[OFFSET_IDX]], 1
565-
; DEFAULT-NEXT: [[TMP15:%.*]] = trunc i64 [[INDEX]] to i32
566-
; DEFAULT-NEXT: [[TMP17:%.*]] = add i32 [[TMP15]], 1
567-
; DEFAULT-NEXT: [[TMP18:%.*]] = mul i32 [[ADD]], [[TMP15]]
561+
; DEFAULT-NEXT: [[TMP17:%.*]] = add i32 [[OFFSET_IDX]], 1
562+
; DEFAULT-NEXT: [[TMP18:%.*]] = mul i32 [[ADD]], [[OFFSET_IDX]]
568563
; DEFAULT-NEXT: [[TMP19:%.*]] = mul i32 [[ADD]], [[TMP17]]
569564
; DEFAULT-NEXT: [[TMP20:%.*]] = zext i32 [[TMP18]] to i64
570565
; DEFAULT-NEXT: [[TMP21:%.*]] = zext i32 [[TMP19]] to i64

llvm/test/Transforms/LoopVectorize/AArch64/outer_loop_test1_no_explicit_vect_width.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,7 @@ define void @foo_i32(i32 %n) {
3939
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds [8 x i32], ptr @arr2, i64 0, <4 x i64> [[VEC_IND]]
4040
; CHECK-NEXT: [[TMP1:%.*]] = trunc <4 x i64> [[VEC_IND]] to <4 x i32>
4141
; CHECK-NEXT: call void @llvm.masked.scatter.v4i32.v4p0(<4 x i32> [[TMP1]], <4 x ptr> [[TMP0]], i32 4, <4 x i1> splat (i1 true))
42-
; CHECK-NEXT: [[TMP8:%.*]] = trunc <4 x i64> [[VEC_IND]] to <4 x i32>
43-
; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[TMP8]], [[BROADCAST_SPLAT]]
42+
; CHECK-NEXT: [[TMP2:%.*]] = add nsw <4 x i32> [[TMP1]], [[BROADCAST_SPLAT]]
4443
; CHECK-NEXT: br label %[[FOR_BODY31:.*]]
4544
; CHECK: [[FOR_BODY31]]:
4645
; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, %[[VECTOR_BODY]] ], [ [[TMP4:%.*]], %[[FOR_BODY31]] ]

llvm/test/Transforms/LoopVectorize/ARM/mve-reductions.ll

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1679,8 +1679,7 @@ define i64 @test_std_q31(ptr %x, i32 %n) #0 {
16791679
; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i32> [[TMP1]] to <4 x i64>
16801680
; CHECK-NEXT: [[TMP3:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP2]])
16811681
; CHECK-NEXT: [[TMP4]] = add i64 [[VEC_PHI]], [[TMP3]]
1682-
; CHECK-NEXT: [[TMP5:%.*]] = sext <4 x i32> [[TMP1]] to <4 x i64>
1683-
; CHECK-NEXT: [[TMP6:%.*]] = mul nsw <4 x i64> [[TMP5]], [[TMP5]]
1682+
; CHECK-NEXT: [[TMP6:%.*]] = mul nsw <4 x i64> [[TMP2]], [[TMP2]]
16841683
; CHECK-NEXT: [[TMP7:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[TMP6]])
16851684
; CHECK-NEXT: [[TMP8]] = add i64 [[VEC_PHI1]], [[TMP7]]
16861685
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4

llvm/test/Transforms/LoopVectorize/PowerPC/vectorize-bswap.ll

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,9 +19,7 @@ define dso_local void @test(ptr %Arr, i32 signext %Len) {
1919
; CHECK-NEXT: [[TMP2:%.*]] = getelementptr inbounds i32, ptr [[ARR:%.*]], i64 [[TMP1]]
2020
; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP2]], align 4
2121
; CHECK-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> [[WIDE_LOAD]])
22-
; CHECK-NEXT: [[TMP5:%.*]] = sext i32 [[INDEX]] to i64
23-
; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i32, ptr [[ARR]], i64 [[TMP5]]
24-
; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP6]], align 4
22+
; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP3]], align 4
2523
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
2624
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
2725
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]

llvm/test/Transforms/LoopVectorize/RISCV/blocks-with-dead-instructions.ll

Lines changed: 14 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,8 @@ define void @block_with_dead_inst_1(ptr %src, i64 %N) #0 {
2828
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
2929
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i16, ptr [[SRC]], <vscale x 8 x i64> [[VEC_IND]]
3030
; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x ptr> align 2 [[TMP20]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP12]])
31-
; CHECK-NEXT: [[TMP14:%.*]] = zext i32 [[TMP12]] to i64
32-
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP14]], [[EVL_BASED_IV]]
33-
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP14]]
31+
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP17]], [[EVL_BASED_IV]]
32+
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP17]]
3433
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]]
3534
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[TMP2]]
3635
; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -104,9 +103,8 @@ define void @block_with_dead_inst_2(ptr %src) #0 {
104103
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
105104
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[SRC]], <vscale x 8 x i64> [[VEC_IND]]
106105
; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x ptr> align 2 [[TMP10]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP9]])
107-
; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP9]] to i64
108-
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP11]], [[EVL_BASED_IV]]
109-
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]]
106+
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP7]], [[EVL_BASED_IV]]
107+
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]]
110108
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
111109
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 333
112110
; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
@@ -180,9 +178,8 @@ define void @multiple_blocks_with_dead_insts_3(ptr %src) #0 {
180178
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
181179
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[SRC]], <vscale x 8 x i64> [[VEC_IND]]
182180
; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x ptr> align 2 [[TMP10]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP9]])
183-
; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP9]] to i64
184-
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP11]], [[EVL_BASED_IV]]
185-
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]]
181+
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP7]], [[EVL_BASED_IV]]
182+
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]]
186183
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
187184
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 333
188185
; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
@@ -266,9 +263,8 @@ define void @multiple_blocks_with_dead_insts_4(ptr %src, i64 %N) #0 {
266263
; CHECK-NEXT: [[DOTSPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
267264
; CHECK-NEXT: [[TMP20:%.*]] = getelementptr i16, ptr [[SRC]], <vscale x 8 x i64> [[VEC_IND]]
268265
; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x ptr> align 2 [[TMP20]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP12]])
269-
; CHECK-NEXT: [[TMP14:%.*]] = zext i32 [[TMP12]] to i64
270-
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP14]], [[EVL_BASED_IV]]
271-
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP14]]
266+
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP17]], [[EVL_BASED_IV]]
267+
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP17]]
272268
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]]
273269
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[TMP2]]
274270
; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
@@ -354,9 +350,8 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_5(ptr %src) #0 {
354350
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 8 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
355351
; CHECK-NEXT: [[TMP10:%.*]] = getelementptr i16, ptr [[SRC]], <vscale x 8 x i64> [[VEC_IND]]
356352
; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x ptr> align 2 [[TMP10]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP9]])
357-
; CHECK-NEXT: [[TMP11:%.*]] = zext i32 [[TMP9]] to i64
358-
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP11]], [[EVL_BASED_IV]]
359-
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP11]]
353+
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP7]], [[EVL_BASED_IV]]
354+
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP7]]
360355
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
361356
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 333
362357
; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
@@ -466,9 +461,8 @@ define void @multiple_blocks_with_dead_inst_multiple_successors_6(ptr %src, i1 %
466461
; CHECK-NEXT: [[TMP23:%.*]] = select <vscale x 8 x i1> [[TMP18]], <vscale x 8 x i1> [[BROADCAST_SPLAT]], <vscale x 8 x i1> zeroinitializer
467462
; CHECK-NEXT: [[TMP24:%.*]] = or <vscale x 8 x i1> [[TMP22]], [[TMP23]]
468463
; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x ptr> align 2 [[TMP20]], <vscale x 8 x i1> [[TMP24]], i32 [[TMP27]])
469-
; CHECK-NEXT: [[TMP25:%.*]] = zext i32 [[TMP27]] to i64
470-
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP25]], [[EVL_BASED_IV]]
471-
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP25]]
464+
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP12]], [[EVL_BASED_IV]]
465+
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP12]]
472466
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT]]
473467
; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[TMP2]]
474468
; CHECK-NEXT: br i1 [[TMP26]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
@@ -737,9 +731,8 @@ define void @dead_load_in_block(ptr %dst, ptr %src, i8 %N, i64 %x) #0 {
737731
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
738732
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i32, ptr [[DST]], <vscale x 4 x i64> [[VEC_IND]]
739733
; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> zeroinitializer, <vscale x 4 x ptr> align 4 [[TMP21]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP18]]), !alias.scope [[META19:![0-9]+]], !noalias [[META22:![0-9]+]]
740-
; CHECK-NEXT: [[TMP22:%.*]] = zext i32 [[TMP18]] to i64
741-
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP22]], [[EVL_BASED_IV]]
742-
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP22]]
734+
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP17]], [[EVL_BASED_IV]]
735+
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP17]]
743736
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
744737
; CHECK-NEXT: [[TMP26:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[TMP3]]
745738
; CHECK-NEXT: br i1 [[TMP26]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]

llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll

Lines changed: 6 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -37,9 +37,8 @@ define void @dead_load(ptr %p, i16 %start) {
3737
; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <vscale x 8 x i64> [[DOTSPLATINSERT1]], <vscale x 8 x i64> poison, <vscale x 8 x i32> zeroinitializer
3838
; CHECK-NEXT: [[TMP21:%.*]] = getelementptr i16, ptr [[P]], <vscale x 8 x i64> [[VEC_IND]]
3939
; CHECK-NEXT: call void @llvm.vp.scatter.nxv8i16.nxv8p0(<vscale x 8 x i16> zeroinitializer, <vscale x 8 x ptr> align 2 [[TMP21]], <vscale x 8 x i1> splat (i1 true), i32 [[TMP16]])
40-
; CHECK-NEXT: [[TMP22:%.*]] = zext i32 [[TMP16]] to i64
41-
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP22]], [[EVL_BASED_IV]]
42-
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP22]]
40+
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP19]], [[EVL_BASED_IV]]
41+
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP19]]
4342
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 8 x i64> [[VEC_IND]], [[DOTSPLAT2]]
4443
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[TMP5]]
4544
; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -332,9 +331,8 @@ define void @test_phi_in_latch_redundant(ptr %dst, i32 %a) {
332331
; CHECK-NEXT: [[PREDPHI:%.*]] = select <vscale x 4 x i1> [[TMP14]], <vscale x 4 x i32> zeroinitializer, <vscale x 4 x i32> [[TMP19]]
333332
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[DST]], <vscale x 4 x i64> [[VEC_IND]]
334333
; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[PREDPHI]], <vscale x 4 x ptr> align 4 [[TMP16]], <vscale x 4 x i1> [[TMP15]], i32 [[TMP8]])
335-
; CHECK-NEXT: [[TMP17:%.*]] = zext i32 [[TMP8]] to i64
336-
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP17]], [[EVL_BASED_IV]]
337-
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP17]]
334+
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add nuw i64 [[TMP5]], [[EVL_BASED_IV]]
335+
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP5]]
338336
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT2]]
339337
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], 37
340338
; CHECK-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
@@ -419,9 +417,8 @@ define void @gather_interleave_group_with_dead_insert_pos(i64 %N, ptr noalias %s
419417
; CHECK-NEXT: [[TMP18:%.*]] = zext <vscale x 4 x i8> [[TMP17]] to <vscale x 4 x i32>
420418
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr i32, ptr [[DST]], <vscale x 4 x i64> [[VEC_IND]]
421419
; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP18]], <vscale x 4 x ptr> align 4 [[TMP19]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP10]])
422-
; CHECK-NEXT: [[TMP20:%.*]] = zext i32 [[TMP10]] to i64
423-
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP20]], [[EVL_BASED_IV]]
424-
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP20]]
420+
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
421+
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]]
425422
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[DOTSPLAT]]
426423
; CHECK-NEXT: [[TMP21:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[TMP2]]
427424
; CHECK-NEXT: br i1 [[TMP21]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]

llvm/test/Transforms/LoopVectorize/RISCV/evl-compatible-loops.ll

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,9 +24,8 @@ define void @test_wide_integer_induction(ptr noalias %a, i64 %N) {
2424
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
2525
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i64, ptr [[A]], i64 [[EVL_BASED_IV]]
2626
; CHECK-NEXT: call void @llvm.vp.store.nxv2i64.p0(<vscale x 2 x i64> [[VEC_IND]], ptr align 8 [[TMP14]], <vscale x 2 x i1> splat (i1 true), i32 [[TMP11]])
27-
; CHECK-NEXT: [[TMP16:%.*]] = zext i32 [[TMP11]] to i64
28-
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP16]], [[EVL_BASED_IV]]
29-
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP16]]
27+
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP12]], [[EVL_BASED_IV]]
28+
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP12]]
3029
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 2 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]]
3130
; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
3231
; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
@@ -80,8 +79,7 @@ define void @test_wide_ptr_induction(ptr noalias %a, ptr noalias %b, i64 %N) {
8079
; CHECK-NEXT: [[TMP9:%.*]] = zext i32 [[TMP7]] to i64
8180
; CHECK-NEXT: [[INDEX_EVL_NEXT]] = add i64 [[TMP9]], [[EVL_BASED_IV]]
8281
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP9]]
83-
; CHECK-NEXT: [[TMP10:%.*]] = zext i32 [[TMP7]] to i64
84-
; CHECK-NEXT: [[TMP11:%.*]] = mul i64 8, [[TMP10]]
82+
; CHECK-NEXT: [[TMP11:%.*]] = mul i64 8, [[TMP9]]
8583
; CHECK-NEXT: [[PTR_IND]] = getelementptr i8, ptr [[POINTER_PHI]], i64 [[TMP11]]
8684
; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_EVL_NEXT]], [[N]]
8785
; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]

0 commit comments

Comments
 (0)