Skip to content

Commit c2d4c7c

Browse files
authored
[VPlan] Permit more users in narrowToSingleScalars (#166559)
narrowToSingleScalarRecipes can permit users that are WidenStore, or a VPInstruction that has a suitable opcode. This is a generalization and extension of the existing code.
1 parent 8a83700 commit c2d4c7c

File tree

8 files changed

+149
-83
lines changed

8 files changed

+149
-83
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 20 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1420,10 +1420,26 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) {
14201420
// broadcasts.
14211421
if (!vputils::isSingleScalar(RepOrWidenR) ||
14221422
!all_of(RepOrWidenR->users(), [RepOrWidenR](const VPUser *U) {
1423-
return U->usesScalars(RepOrWidenR) ||
1424-
match(cast<VPRecipeBase>(U),
1425-
m_CombineOr(m_ExtractLastElement(m_VPValue()),
1426-
m_ExtractLastLanePerPart(m_VPValue())));
1423+
if (auto *Store = dyn_cast<VPWidenStoreRecipe>(U)) {
1424+
// VPWidenStore doesn't have users, and stores are always
1425+
// profitable to widen: hence, permitting single-scalar stored
1426+
// values is an important leaf condition. The assert must hold as
1427+
// we checked the RepOrWidenR operand against
1428+
// vputils::isSingleScalar.
1429+
assert(RepOrWidenR == Store->getAddr() ||
1430+
vputils::isSingleScalar(Store->getStoredValue()));
1431+
return true;
1432+
}
1433+
1434+
if (auto *VPI = dyn_cast<VPInstruction>(U)) {
1435+
unsigned Opcode = VPI->getOpcode();
1436+
if (Opcode == VPInstruction::ExtractLastElement ||
1437+
Opcode == VPInstruction::ExtractLastLanePerPart ||
1438+
Opcode == VPInstruction::ExtractPenultimateElement)
1439+
return true;
1440+
}
1441+
1442+
return U->usesScalars(RepOrWidenR);
14271443
}))
14281444
continue;
14291445

llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll

Lines changed: 23 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -429,48 +429,36 @@ define i32 @header_mask_and_invariant_compare(ptr %A, ptr %B, ptr %C, ptr %D, pt
429429
; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
430430
; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]]
431431
; DEFAULT: [[VECTOR_BODY]]:
432-
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE37:.*]] ]
433-
; DEFAULT-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4, !alias.scope [[META8:![0-9]+]]
434-
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT28:%.*]] = insertelement <4 x i32> poison, i32 [[TMP9]], i64 0
435-
; DEFAULT-NEXT: [[BROADCAST_SPLAT29:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT28]], <4 x i32> poison, <4 x i32> zeroinitializer
436-
; DEFAULT-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4, !alias.scope [[META11:![0-9]+]]
437-
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP19]], i64 0
438-
; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
439-
; DEFAULT-NEXT: [[TMP6:%.*]] = or <4 x i32> [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT29]]
440-
; DEFAULT-NEXT: [[TMP7:%.*]] = load i32, ptr [[C]], align 4, !alias.scope [[META13:![0-9]+]]
441-
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT30:%.*]] = insertelement <4 x i32> poison, i32 [[TMP7]], i64 0
442-
; DEFAULT-NEXT: [[BROADCAST_SPLAT31:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT30]], <4 x i32> poison, <4 x i32> zeroinitializer
443-
; DEFAULT-NEXT: [[TMP8:%.*]] = icmp ugt <4 x i32> [[BROADCAST_SPLAT31]], [[TMP6]]
432+
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE33:.*]] ]
433+
; DEFAULT-NEXT: [[TMP3:%.*]] = load i32, ptr [[A]], align 4, !alias.scope [[META8:![0-9]+]]
434+
; DEFAULT-NEXT: [[TMP4:%.*]] = load i32, ptr [[B]], align 4, !alias.scope [[META11:![0-9]+]]
435+
; DEFAULT-NEXT: [[TMP5:%.*]] = or i32 [[TMP4]], [[TMP3]]
436+
; DEFAULT-NEXT: [[TMP6:%.*]] = load i32, ptr [[C]], align 4, !alias.scope [[META13:![0-9]+]]
437+
; DEFAULT-NEXT: [[TMP7:%.*]] = icmp ugt i32 [[TMP6]], [[TMP5]]
438+
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[TMP7]], i64 0
439+
; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT]], <4 x i1> poison, <4 x i32> zeroinitializer
444440
; DEFAULT-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[D]], i64 [[INDEX]]
445-
; DEFAULT-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP8]], i32 0
446-
; DEFAULT-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
441+
; DEFAULT-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
447442
; DEFAULT: [[PRED_STORE_IF]]:
448-
; DEFAULT-NEXT: [[TMP11:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0
449-
; DEFAULT-NEXT: store i32 [[TMP11]], ptr [[E]], align 4, !alias.scope [[META15:![0-9]+]], !noalias [[META17:![0-9]+]]
443+
; DEFAULT-NEXT: store i32 [[TMP5]], ptr [[E]], align 4, !alias.scope [[META15:![0-9]+]], !noalias [[META17:![0-9]+]]
450444
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE]]
451445
; DEFAULT: [[PRED_STORE_CONTINUE]]:
452-
; DEFAULT-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP8]], i32 1
453-
; DEFAULT-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF32:.*]], label %[[PRED_STORE_CONTINUE33:.*]]
446+
; DEFAULT-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF28:.*]], label %[[PRED_STORE_CONTINUE29:.*]]
447+
; DEFAULT: [[PRED_STORE_IF28]]:
448+
; DEFAULT-NEXT: store i32 [[TMP5]], ptr [[E]], align 4, !alias.scope [[META15]], !noalias [[META17]]
449+
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE29]]
450+
; DEFAULT: [[PRED_STORE_CONTINUE29]]:
451+
; DEFAULT-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF30:.*]], label %[[PRED_STORE_CONTINUE31:.*]]
452+
; DEFAULT: [[PRED_STORE_IF30]]:
453+
; DEFAULT-NEXT: store i32 [[TMP5]], ptr [[E]], align 4, !alias.scope [[META15]], !noalias [[META17]]
454+
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE31]]
455+
; DEFAULT: [[PRED_STORE_CONTINUE31]]:
456+
; DEFAULT-NEXT: br i1 [[TMP7]], label %[[PRED_STORE_IF32:.*]], label %[[PRED_STORE_CONTINUE33]]
454457
; DEFAULT: [[PRED_STORE_IF32]]:
455-
; DEFAULT-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0
456-
; DEFAULT-NEXT: store i32 [[TMP13]], ptr [[E]], align 4, !alias.scope [[META15]], !noalias [[META17]]
458+
; DEFAULT-NEXT: store i32 [[TMP5]], ptr [[E]], align 4, !alias.scope [[META15]], !noalias [[META17]]
457459
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE33]]
458460
; DEFAULT: [[PRED_STORE_CONTINUE33]]:
459-
; DEFAULT-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP8]], i32 2
460-
; DEFAULT-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF34:.*]], label %[[PRED_STORE_CONTINUE35:.*]]
461-
; DEFAULT: [[PRED_STORE_IF34]]:
462-
; DEFAULT-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0
463-
; DEFAULT-NEXT: store i32 [[TMP15]], ptr [[E]], align 4, !alias.scope [[META15]], !noalias [[META17]]
464-
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE35]]
465-
; DEFAULT: [[PRED_STORE_CONTINUE35]]:
466-
; DEFAULT-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP8]], i32 3
467-
; DEFAULT-NEXT: br i1 [[TMP21]], label %[[PRED_STORE_IF36:.*]], label %[[PRED_STORE_CONTINUE37]]
468-
; DEFAULT: [[PRED_STORE_IF36]]:
469-
; DEFAULT-NEXT: [[TMP22:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0
470-
; DEFAULT-NEXT: store i32 [[TMP22]], ptr [[E]], align 4, !alias.scope [[META15]], !noalias [[META17]]
471-
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE37]]
472-
; DEFAULT: [[PRED_STORE_CONTINUE37]]:
473-
; DEFAULT-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> zeroinitializer, ptr align 4 [[TMP16]], <4 x i1> [[TMP8]]), !alias.scope [[META19:![0-9]+]], !noalias [[META20:![0-9]+]]
461+
; DEFAULT-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> zeroinitializer, ptr align 4 [[TMP16]], <4 x i1> [[BROADCAST_SPLAT]]), !alias.scope [[META19:![0-9]+]], !noalias [[META20:![0-9]+]]
474462
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
475463
; DEFAULT-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
476464
; DEFAULT-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]

llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
; CM: vector.ph:
1717
; CM: CLONE ir<%a> = extractvalue ir<%sv>
1818
; CM: CLONE ir<%b> = extractvalue ir<%sv>
19-
; CM: WIDEN ir<%add> = add ir<%a>, ir<%b>
19+
; CM: CLONE ir<%add> = add ir<%a>, ir<%b>
2020
; CM: Successor(s): vector loop
2121

2222
; CM: LV: Scalar loop costs: 5.
@@ -30,23 +30,22 @@ define void @test1(ptr %dst, {i64, i64} %sv) {
3030
; FORCED-NEXT: br label %[[VECTOR_PH:.*]]
3131
; FORCED: [[VECTOR_PH]]:
3232
; FORCED-NEXT: [[TMP0:%.*]] = extractvalue { i64, i64 } [[SV]], 0
33-
; FORCED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i64 0
34-
; FORCED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
3533
; FORCED-NEXT: [[TMP4:%.*]] = extractvalue { i64, i64 } [[SV]], 1
36-
; FORCED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP4]], i64 0
34+
; FORCED-NEXT: [[TMP5:%.*]] = add i64 [[TMP0]], [[TMP4]]
35+
; FORCED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP5]], i64 0
3736
; FORCED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer
38-
; FORCED-NEXT: [[TMP1:%.*]] = add <2 x i64> [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]]
3937
; FORCED-NEXT: br label %[[VECTOR_BODY:.*]]
4038
; FORCED: [[VECTOR_BODY]]:
4139
; FORCED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
4240
; FORCED-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[DST]], i32 [[INDEX]]
43-
; FORCED-NEXT: store <2 x i64> [[TMP1]], ptr [[TMP2]], align 4
41+
; FORCED-NEXT: store <2 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP2]], align 4
4442
; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
4543
; FORCED-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
4644
; FORCED-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
4745
; FORCED: [[MIDDLE_BLOCK]]:
48-
; FORCED-NEXT: br [[EXIT:label %.*]]
49-
; FORCED: [[SCALAR_PH:.*:]]
46+
; FORCED-NEXT: br label %[[EXIT:.*]]
47+
; FORCED: [[EXIT]]:
48+
; FORCED-NEXT: ret void
5049
;
5150
entry:
5251
br label %loop.body
@@ -99,10 +98,11 @@ define void @test_getVectorCallCost(ptr %dst, {float, float} %sv) {
9998
; FORCED-NEXT: store <2 x float> [[TMP2]], ptr [[TMP1]], align 4
10099
; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
101100
; FORCED-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
102-
; FORCED-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
101+
; FORCED-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
103102
; FORCED: [[MIDDLE_BLOCK]]:
104-
; FORCED-NEXT: br [[EXIT:label %.*]]
105-
; FORCED: [[SCALAR_PH:.*:]]
103+
; FORCED-NEXT: br label %[[EXIT:.*]]
104+
; FORCED: [[EXIT]]:
105+
; FORCED-NEXT: ret void
106106
;
107107
entry:
108108
br label %loop.body

llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,17 +17,15 @@ define void @widen_extractvalue(ptr %dst, {i64, i64} %sv) #0 {
1717
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 1000, [[TMP3]]
1818
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 1000, [[N_MOD_VF]]
1919
; CHECK-NEXT: [[EXTRACT0:%.*]] = extractvalue { i64, i64 } [[SV]], 0
20-
; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[EXTRACT0]], i64 0
21-
; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
2220
; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { i64, i64 } [[SV]], 1
23-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP10]], i64 0
21+
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[EXTRACT0]], [[TMP10]]
22+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP6]], i64 0
2423
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
25-
; CHECK-NEXT: [[TMP7:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT2]], [[BROADCAST_SPLAT2]]
2624
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
2725
; CHECK: [[VECTOR_BODY]]:
2826
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
2927
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[DST]], i32 [[INDEX]]
30-
; CHECK-NEXT: store <vscale x 2 x i64> [[TMP7]], ptr [[TMP8]], align 8
28+
; CHECK-NEXT: store <vscale x 2 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP8]], align 8
3129
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP3]]
3230
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
3331
; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]

llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -293,9 +293,9 @@ define void @test_phi_in_latch_redundant(ptr %dst, i32 %a) {
293293
; CHECK-NEXT: [[ENTRY:.*:]]
294294
; CHECK-NEXT: br label %[[VECTOR_PH:.*]]
295295
; CHECK: [[VECTOR_PH]]:
296-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[A]], i64 0
296+
; CHECK-NEXT: [[TMP0:%.*]] = xor i32 [[A]], -1
297+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP0]], i64 0
297298
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
298-
; CHECK-NEXT: [[TMP19:%.*]] = xor <vscale x 4 x i32> [[BROADCAST_SPLAT]], splat (i32 -1)
299299
; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
300300
; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 4 x i64> [[TMP6]], splat (i64 9)
301301
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP7]]
@@ -309,7 +309,7 @@ define void @test_phi_in_latch_redundant(ptr %dst, i32 %a) {
309309
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP9]], i64 0
310310
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
311311
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[DST]], <vscale x 4 x i64> [[VEC_IND]]
312-
; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP19]], <vscale x 4 x ptr> align 4 [[TMP16]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP8]])
312+
; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[BROADCAST_SPLAT]], <vscale x 4 x ptr> align 4 [[TMP16]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP8]])
313313
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP5]]
314314
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT2]]
315315
; CHECK-NEXT: [[TMP18:%.*]] = icmp eq i64 [[AVL_NEXT]], 0

llvm/test/Transforms/LoopVectorize/first-order-recurrence-with-uniform-ops.ll

Lines changed: 6 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -134,22 +134,18 @@ define i16 @for_phi_removed(ptr %src) {
134134
; UNROLL-NO-IC: [[VECTOR_BODY]]:
135135
; UNROLL-NO-IC-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
136136
; UNROLL-NO-IC-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC]], align 4
137-
; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0
138-
; UNROLL-NO-IC-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
139-
; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT]], zeroinitializer
140-
; UNROLL-NO-IC-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
141-
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = select i1 [[TMP4]], <4 x i16> splat (i16 1), <4 x i16> zeroinitializer
137+
; UNROLL-NO-IC-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
138+
; UNROLL-NO-IC-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i16 1, i16 0
142139
; UNROLL-NO-IC-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
143140
; UNROLL-NO-IC-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 104
144141
; UNROLL-NO-IC-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
145142
; UNROLL-NO-IC: [[MIDDLE_BLOCK]]:
146-
; UNROLL-NO-IC-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
147143
; UNROLL-NO-IC-NEXT: br label %[[SCALAR_PH:.*]]
148144
; UNROLL-NO-IC: [[SCALAR_PH]]:
149145
; UNROLL-NO-IC-NEXT: br label %[[LOOP:.*]]
150146
; UNROLL-NO-IC: [[LOOP]]:
151147
; UNROLL-NO-IC-NEXT: [[IV:%.*]] = phi i16 [ 104, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
152-
; UNROLL-NO-IC-NEXT: [[P:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
148+
; UNROLL-NO-IC-NEXT: [[P:%.*]] = phi i16 [ [[TMP2]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
153149
; UNROLL-NO-IC-NEXT: [[L:%.*]] = load i32, ptr [[SRC]], align 4
154150
; UNROLL-NO-IC-NEXT: [[C:%.*]] = icmp eq i32 [[L]], 0
155151
; UNROLL-NO-IC-NEXT: [[SEL]] = select i1 [[C]], i16 1, i16 0
@@ -200,22 +196,18 @@ define i16 @for_phi_removed(ptr %src) {
200196
; SINK-AFTER: [[VECTOR_BODY]]:
201197
; SINK-AFTER-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
202198
; SINK-AFTER-NEXT: [[TMP0:%.*]] = load i32, ptr [[SRC]], align 4
203-
; SINK-AFTER-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0
204-
; SINK-AFTER-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
205-
; SINK-AFTER-NEXT: [[TMP1:%.*]] = icmp eq <4 x i32> [[BROADCAST_SPLAT]], zeroinitializer
206-
; SINK-AFTER-NEXT: [[TMP4:%.*]] = extractelement <4 x i1> [[TMP1]], i32 0
207-
; SINK-AFTER-NEXT: [[TMP2:%.*]] = select i1 [[TMP4]], <4 x i16> splat (i16 1), <4 x i16> zeroinitializer
199+
; SINK-AFTER-NEXT: [[TMP1:%.*]] = icmp eq i32 [[TMP0]], 0
200+
; SINK-AFTER-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], i16 1, i16 0
208201
; SINK-AFTER-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
209202
; SINK-AFTER-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 108
210203
; SINK-AFTER-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
211204
; SINK-AFTER: [[MIDDLE_BLOCK]]:
212-
; SINK-AFTER-NEXT: [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP2]], i32 3
213205
; SINK-AFTER-NEXT: br label %[[SCALAR_PH:.*]]
214206
; SINK-AFTER: [[SCALAR_PH]]:
215207
; SINK-AFTER-NEXT: br label %[[LOOP:.*]]
216208
; SINK-AFTER: [[LOOP]]:
217209
; SINK-AFTER-NEXT: [[IV:%.*]] = phi i16 [ 108, %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ]
218-
; SINK-AFTER-NEXT: [[P:%.*]] = phi i16 [ [[VECTOR_RECUR_EXTRACT]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
210+
; SINK-AFTER-NEXT: [[P:%.*]] = phi i16 [ [[TMP2]], %[[SCALAR_PH]] ], [ [[SEL:%.*]], %[[LOOP]] ]
219211
; SINK-AFTER-NEXT: [[L:%.*]] = load i32, ptr [[SRC]], align 4
220212
; SINK-AFTER-NEXT: [[C:%.*]] = icmp eq i32 [[L]], 0
221213
; SINK-AFTER-NEXT: [[SEL]] = select i1 [[C]], i16 1, i16 0

0 commit comments

Comments
 (0)