Skip to content

Commit 317b42e

Browse files
committed
[VPlan] Remove original recipe after narrowing to single-scalar.
Directly remove RepOrWidenR after replacing all uses. Removing the dead user early unlocks additional opportunities for further narrowing.
1 parent 16ab8c0 commit 317b42e

File tree

3 files changed

+16
-33
lines changed

3 files changed

+16
-33
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1419,6 +1419,8 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) {
14191419
true /*IsSingleScalar*/);
14201420
Clone->insertBefore(RepOrWidenR);
14211421
RepOrWidenR->replaceAllUsesWith(Clone);
1422+
if (isDeadRecipe(*RepOrWidenR))
1423+
RepOrWidenR->eraseFromParent();
14221424
}
14231425
}
14241426
}

llvm/test/Transforms/LoopVectorize/X86/uniform_load.ll

Lines changed: 12 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -64,39 +64,24 @@ exit:
6464
define void @uniform_load_can_fold_users(ptr noalias %src, ptr %dst, i64 %start, double %d) {
6565
; CHECK-LABEL: define void @uniform_load_can_fold_users(
6666
; CHECK-SAME: ptr noalias [[SRC:%.*]], ptr [[DST:%.*]], i64 [[START:%.*]], double [[D:%.*]]) {
67-
; CHECK-NEXT: [[ENTRY:.*:]]
68-
; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[START]], 1
69-
; CHECK-NEXT: [[SMIN:%.*]] = call i64 @llvm.smin.i64(i64 [[START]], i64 0)
70-
; CHECK-NEXT: [[TMP1:%.*]] = sub i64 [[TMP0]], [[SMIN]]
71-
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP1]], 2
72-
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], [[SCALAR_PH:label %.*]], label %[[VECTOR_PH:.*]]
73-
; CHECK: [[VECTOR_PH]]:
74-
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[TMP1]], 2
75-
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP1]], [[N_MOD_VF]]
76-
; CHECK-NEXT: [[TMP2:%.*]] = sub i64 [[START]], [[N_VEC]]
77-
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
78-
; CHECK: [[VECTOR_BODY]]:
79-
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
80-
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[INDEX]], 0
81-
; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 1
67+
; CHECK-NEXT: [[ENTRY:.*]]:
68+
; CHECK-NEXT: br label %[[LOOP:.*]]
69+
; CHECK: [[LOOP]]:
70+
; CHECK-NEXT: [[TMP4:%.*]] = phi i64 [ 0, %[[ENTRY]] ], [ [[IV_1_NEXT:%.*]], %[[LOOP]] ]
71+
; CHECK-NEXT: [[IV_2:%.*]] = phi i64 [ [[START]], %[[ENTRY]] ], [ [[IV_2_NEXT:%.*]], %[[LOOP]] ]
8272
; CHECK-NEXT: [[TMP5:%.*]] = load double, ptr [[SRC]], align 8
83-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x double> poison, double [[TMP5]], i64 0
84-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x double> [[BROADCAST_SPLATINSERT]], <2 x double> poison, <2 x i32> zeroinitializer
85-
; CHECK-NEXT: [[TMP6:%.*]] = fmul <2 x double> [[BROADCAST_SPLAT]], splat (double 9.000000e+00)
86-
; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[TMP6]], i32 0
73+
; CHECK-NEXT: [[TMP7:%.*]] = fmul double [[TMP5]], 9.000000e+00
8774
; CHECK-NEXT: [[TMP8:%.*]] = fdiv double [[TMP7]], [[D]]
88-
; CHECK-NEXT: [[TMP9:%.*]] = sub i64 [[TMP3]], 1
8975
; CHECK-NEXT: [[TMP10:%.*]] = sub i64 [[TMP4]], 1
90-
; CHECK-NEXT: [[TMP11:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP3]]
9176
; CHECK-NEXT: [[TMP12:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP4]]
92-
; CHECK-NEXT: [[TMP13:%.*]] = getelementptr double, ptr [[TMP11]], i64 [[TMP9]]
9377
; CHECK-NEXT: [[TMP14:%.*]] = getelementptr double, ptr [[TMP12]], i64 [[TMP10]]
94-
; CHECK-NEXT: store double [[TMP8]], ptr [[TMP13]], align 8
9578
; CHECK-NEXT: store double [[TMP8]], ptr [[TMP14]], align 8
96-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
97-
; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
98-
; CHECK-NEXT: br i1 [[TMP15]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
99-
; CHECK: [[MIDDLE_BLOCK]]:
79+
; CHECK-NEXT: [[IV_1_NEXT]] = add i64 [[TMP4]], 1
80+
; CHECK-NEXT: [[IV_2_NEXT]] = add i64 [[IV_2]], -1
81+
; CHECK-NEXT: [[EC:%.*]] = icmp sgt i64 [[IV_2]], 0
82+
; CHECK-NEXT: br i1 [[EC]], label %[[LOOP]], label %[[EXIT:.*]]
83+
; CHECK: [[EXIT]]:
84+
; CHECK-NEXT: ret void
10085
;
10186
entry:
10287
br label %loop

llvm/test/Transforms/LoopVectorize/single-scalar-cast-minbw.ll

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -84,12 +84,8 @@ define void @single_scalar_cast_stored(ptr %src, ptr %dst, i32 %n) {
8484
; CHECK: [[VECTOR_BODY]]:
8585
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
8686
; CHECK-NEXT: [[TMP0:%.*]] = load i16, ptr [[SRC]], align 2, !alias.scope [[META4:![0-9]+]]
87-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i64 0
88-
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i16> [[BROADCAST_SPLATINSERT]], <4 x i16> poison, <4 x i32> zeroinitializer
89-
; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <4 x i16> [[BROADCAST_SPLAT]], zeroinitializer
90-
; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i16> [[BROADCAST_SPLAT]], splat (i16 15)
91-
; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
92-
; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x i16> [[TMP2]], i32 0
87+
; CHECK-NEXT: [[TMP3:%.*]] = icmp eq i16 [[TMP0]], 0
88+
; CHECK-NEXT: [[TMP4:%.*]] = and i16 [[TMP0]], 15
9389
; CHECK-NEXT: [[TMP5:%.*]] = select i1 [[TMP3]], i16 0, i16 [[TMP4]]
9490
; CHECK-NEXT: store i16 [[TMP5]], ptr [[DST]], align 2, !alias.scope [[META7:![0-9]+]], !noalias [[META4]]
9591
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4

0 commit comments

Comments
 (0)