Skip to content

Commit 02d9c71

Browse files
committed
[VPlan] Allow wide-stores in narrow to single scalar
Follow up on 132bacd ([VPlan] Also allow extracts as users when converting to single scalars.) to also allow WidenStore recipes as users when converting to single scalars. While at it, refine the vputils::isSingleScalar check to also check vputils::onlyFirstLaneUsed, and forbid zero users. The motivation for this patch is to ease consolidation of the logic to narrow to single scalar, and the consolidation will be tackled in a follow-up.
1 parent ead0e97 commit 02d9c71

File tree

6 files changed

+41
-46
lines changed

6 files changed

+41
-46
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1311,14 +1311,15 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) {
13111311
// Skip recipes that aren't single scalars or don't have only their
13121312
// scalar results used. In the latter case, we would introduce extra
13131313
// broadcasts.
1314-
if (!vputils::isSingleScalar(RepOrWidenR) ||
1314+
if ((!vputils::isSingleScalar(RepOrWidenR) &&
1315+
!vputils::onlyFirstLaneUsed(RepOrWidenR)) ||
1316+
RepOrWidenR->getNumUsers() == 0 ||
13151317
!all_of(RepOrWidenR->users(), [RepOrWidenR](const VPUser *U) {
1316-
return U->usesScalars(RepOrWidenR) ||
1318+
return U->usesScalars(RepOrWidenR) || isa<VPWidenStoreRecipe>(U) ||
13171319
match(cast<VPRecipeBase>(U),
13181320
m_ExtractLastElement(m_VPValue()));
13191321
}))
13201322
continue;
1321-
13221323
auto *Clone = new VPReplicateRecipe(RepOrWidenR->getUnderlyingInstr(),
13231324
RepOrWidenR->operands(),
13241325
true /*IsSingleScalar*/);

llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll

Lines changed: 23 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -428,48 +428,45 @@ define i32 @header_mask_and_invariant_compare(ptr %A, ptr %B, ptr %C, ptr %D, pt
428428
; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 [[TMP0]], [[N_MOD_VF]]
429429
; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]]
430430
; DEFAULT: [[VECTOR_BODY]]:
431-
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE37:.*]] ]
431+
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE35:.*]] ]
432432
; DEFAULT-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4, !alias.scope [[META8:![0-9]+]]
433433
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT28:%.*]] = insertelement <4 x i32> poison, i32 [[TMP9]], i64 0
434434
; DEFAULT-NEXT: [[BROADCAST_SPLAT29:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT28]], <4 x i32> poison, <4 x i32> zeroinitializer
435435
; DEFAULT-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4, !alias.scope [[META11:![0-9]+]]
436436
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP19]], i64 0
437437
; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
438438
; DEFAULT-NEXT: [[TMP6:%.*]] = or <4 x i32> [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT29]]
439-
; DEFAULT-NEXT: [[TMP7:%.*]] = load i32, ptr [[C]], align 4, !alias.scope [[META13:![0-9]+]]
440-
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT30:%.*]] = insertelement <4 x i32> poison, i32 [[TMP7]], i64 0
441-
; DEFAULT-NEXT: [[BROADCAST_SPLAT31:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT30]], <4 x i32> poison, <4 x i32> zeroinitializer
442-
; DEFAULT-NEXT: [[TMP8:%.*]] = icmp ugt <4 x i32> [[BROADCAST_SPLAT31]], [[TMP6]]
439+
; DEFAULT-NEXT: [[TMP10:%.*]] = load i32, ptr [[C]], align 4, !alias.scope [[META13:![0-9]+]]
440+
; DEFAULT-NEXT: [[TMP7:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0
441+
; DEFAULT-NEXT: [[TMP8:%.*]] = icmp ugt i32 [[TMP10]], [[TMP7]]
442+
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT36:%.*]] = insertelement <4 x i1> poison, i1 [[TMP8]], i64 0
443+
; DEFAULT-NEXT: [[BROADCAST_SPLAT37:%.*]] = shufflevector <4 x i1> [[BROADCAST_SPLATINSERT36]], <4 x i1> poison, <4 x i32> zeroinitializer
443444
; DEFAULT-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[D]], i64 [[INDEX]]
444-
; DEFAULT-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP8]], i32 0
445-
; DEFAULT-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
445+
; DEFAULT-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
446446
; DEFAULT: [[PRED_STORE_IF]]:
447447
; DEFAULT-NEXT: [[TMP11:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0
448448
; DEFAULT-NEXT: store i32 [[TMP11]], ptr [[E]], align 4, !alias.scope [[META15:![0-9]+]], !noalias [[META17:![0-9]+]]
449449
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE]]
450450
; DEFAULT: [[PRED_STORE_CONTINUE]]:
451-
; DEFAULT-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP8]], i32 1
452-
; DEFAULT-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF32:.*]], label %[[PRED_STORE_CONTINUE33:.*]]
453-
; DEFAULT: [[PRED_STORE_IF32]]:
451+
; DEFAULT-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF30:.*]], label %[[PRED_STORE_CONTINUE31:.*]]
452+
; DEFAULT: [[PRED_STORE_IF30]]:
454453
; DEFAULT-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0
455454
; DEFAULT-NEXT: store i32 [[TMP13]], ptr [[E]], align 4, !alias.scope [[META15]], !noalias [[META17]]
455+
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE31]]
456+
; DEFAULT: [[PRED_STORE_CONTINUE31]]:
457+
; DEFAULT-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF32:.*]], label %[[PRED_STORE_CONTINUE33:.*]]
458+
; DEFAULT: [[PRED_STORE_IF32]]:
459+
; DEFAULT-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0
460+
; DEFAULT-NEXT: store i32 [[TMP15]], ptr [[E]], align 4, !alias.scope [[META15]], !noalias [[META17]]
456461
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE33]]
457462
; DEFAULT: [[PRED_STORE_CONTINUE33]]:
458-
; DEFAULT-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP8]], i32 2
459-
; DEFAULT-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF34:.*]], label %[[PRED_STORE_CONTINUE35:.*]]
463+
; DEFAULT-NEXT: br i1 [[TMP8]], label %[[PRED_STORE_IF34:.*]], label %[[PRED_STORE_CONTINUE35]]
460464
; DEFAULT: [[PRED_STORE_IF34]]:
461-
; DEFAULT-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0
462-
; DEFAULT-NEXT: store i32 [[TMP15]], ptr [[E]], align 4, !alias.scope [[META15]], !noalias [[META17]]
463-
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE35]]
464-
; DEFAULT: [[PRED_STORE_CONTINUE35]]:
465-
; DEFAULT-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP8]], i32 3
466-
; DEFAULT-NEXT: br i1 [[TMP21]], label %[[PRED_STORE_IF36:.*]], label %[[PRED_STORE_CONTINUE37]]
467-
; DEFAULT: [[PRED_STORE_IF36]]:
468465
; DEFAULT-NEXT: [[TMP22:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0
469466
; DEFAULT-NEXT: store i32 [[TMP22]], ptr [[E]], align 4, !alias.scope [[META15]], !noalias [[META17]]
470-
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE37]]
471-
; DEFAULT: [[PRED_STORE_CONTINUE37]]:
472-
; DEFAULT-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> zeroinitializer, ptr [[TMP16]], i32 4, <4 x i1> [[TMP8]]), !alias.scope [[META19:![0-9]+]], !noalias [[META20:![0-9]+]]
467+
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE35]]
468+
; DEFAULT: [[PRED_STORE_CONTINUE35]]:
469+
; DEFAULT-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> zeroinitializer, ptr [[TMP16]], i32 4, <4 x i1> [[BROADCAST_SPLAT37]]), !alias.scope [[META19:![0-9]+]], !noalias [[META20:![0-9]+]]
473470
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
474471
; DEFAULT-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
475472
; DEFAULT-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
@@ -658,15 +655,15 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) {
658655
; COMMON-NEXT: store i8 6, ptr [[TMP6]], align 1
659656
; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE12]]
660657
; COMMON: [[PRED_STORE_CONTINUE12]]:
661-
; COMMON-NEXT: br i1 false, label %[[PRED_STORE_IF13:.*]], label %[[EXIT:.*]]
658+
; COMMON-NEXT: br i1 false, label %[[PRED_STORE_IF13:.*]], label %[[EXIT1:.*]]
662659
; COMMON: [[PRED_STORE_IF13]]:
663660
; COMMON-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[DST]], i64 7
664661
; COMMON-NEXT: store i8 7, ptr [[TMP7]], align 1
665-
; COMMON-NEXT: br label %[[EXIT]]
666-
; COMMON: [[EXIT]]:
662+
; COMMON-NEXT: br label %[[EXIT1]]
663+
; COMMON: [[EXIT1]]:
667664
; COMMON-NEXT: br label %[[MIDDLE_BLOCK:.*]]
668665
; COMMON: [[MIDDLE_BLOCK]]:
669-
; COMMON-NEXT: br [[EXIT1:label %.*]]
666+
; COMMON-NEXT: br [[EXIT:label %.*]]
670667
; COMMON: [[SCALAR_PH]]:
671668
;
672669
entry:

llvm/test/Transforms/LoopVectorize/AArch64/extractvalue-no-scalarization-required.ll

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
; CM: vector.ph:
1717
; CM: CLONE ir<%a> = extractvalue ir<%sv>
1818
; CM: CLONE ir<%b> = extractvalue ir<%sv>
19-
; CM: WIDEN ir<%add> = add ir<%a>, ir<%b>
19+
; CM: CLONE ir<%add> = add ir<%a>, ir<%b>
2020
; CM: Successor(s): vector loop
2121

2222
; CM: LV: Scalar loop costs: 5.
@@ -30,17 +30,15 @@ define void @test1(ptr %dst, {i64, i64} %sv) {
3030
; FORCED-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
3131
; FORCED: [[VECTOR_PH]]:
3232
; FORCED-NEXT: [[TMP0:%.*]] = extractvalue { i64, i64 } [[SV]], 0
33-
; FORCED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i64> poison, i64 [[TMP0]], i64 0
34-
; FORCED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT]], <2 x i64> poison, <2 x i32> zeroinitializer
3533
; FORCED-NEXT: [[TMP4:%.*]] = extractvalue { i64, i64 } [[SV]], 1
36-
; FORCED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP4]], i64 0
34+
; FORCED-NEXT: [[TMP5:%.*]] = add i64 [[TMP0]], [[TMP4]]
35+
; FORCED-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i64> poison, i64 [[TMP5]], i64 0
3736
; FORCED-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i64> [[BROADCAST_SPLATINSERT1]], <2 x i64> poison, <2 x i32> zeroinitializer
38-
; FORCED-NEXT: [[TMP1:%.*]] = add <2 x i64> [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT2]]
3937
; FORCED-NEXT: br label %[[VECTOR_BODY:.*]]
4038
; FORCED: [[VECTOR_BODY]]:
4139
; FORCED-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
4240
; FORCED-NEXT: [[TMP2:%.*]] = getelementptr i64, ptr [[DST]], i32 [[INDEX]]
43-
; FORCED-NEXT: store <2 x i64> [[TMP1]], ptr [[TMP2]], align 4
41+
; FORCED-NEXT: store <2 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP2]], align 4
4442
; FORCED-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
4543
; FORCED-NEXT: [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1000
4644
; FORCED-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]

llvm/test/Transforms/LoopVectorize/AArch64/sve-widen-extractvalue.ll

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -17,17 +17,15 @@ define void @widen_extractvalue(ptr %dst, {i64, i64} %sv) #0 {
1717
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i32 1000, [[TMP3]]
1818
; CHECK-NEXT: [[N_VEC:%.*]] = sub i32 1000, [[N_MOD_VF]]
1919
; CHECK-NEXT: [[EXTRACT0:%.*]] = extractvalue { i64, i64 } [[SV]], 0
20-
; CHECK-NEXT: [[DOTSPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[EXTRACT0]], i64 0
21-
; CHECK-NEXT: [[DOTSPLAT2:%.*]] = shufflevector <vscale x 2 x i64> [[DOTSPLATINSERT1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
2220
; CHECK-NEXT: [[TMP10:%.*]] = extractvalue { i64, i64 } [[SV]], 1
23-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP10]], i64 0
21+
; CHECK-NEXT: [[TMP6:%.*]] = add i64 [[EXTRACT0]], [[TMP10]]
22+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 2 x i64> poison, i64 [[TMP6]], i64 0
2423
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 2 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer
25-
; CHECK-NEXT: [[TMP7:%.*]] = add <vscale x 2 x i64> [[DOTSPLAT2]], [[BROADCAST_SPLAT2]]
2624
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
2725
; CHECK: [[VECTOR_BODY]]:
2826
; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
2927
; CHECK-NEXT: [[TMP8:%.*]] = getelementptr i64, ptr [[DST]], i32 [[INDEX]]
30-
; CHECK-NEXT: store <vscale x 2 x i64> [[TMP7]], ptr [[TMP8]], align 8
28+
; CHECK-NEXT: store <vscale x 2 x i64> [[BROADCAST_SPLAT2]], ptr [[TMP8]], align 8
3129
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], [[TMP3]]
3230
; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
3331
; CHECK-NEXT: br i1 [[TMP9]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]

llvm/test/Transforms/LoopVectorize/RISCV/dead-ops-cost.ll

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -304,9 +304,9 @@ define void @test_phi_in_latch_redundant(ptr %dst, i32 %a) {
304304
; CHECK-NEXT: [[ENTRY:.*:]]
305305
; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
306306
; CHECK: [[VECTOR_PH]]:
307-
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[A]], i64 0
307+
; CHECK-NEXT: [[TMP0:%.*]] = xor i32 [[A]], -1
308+
; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i32> poison, i32 [[TMP0]], i64 0
308309
; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i32> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer
309-
; CHECK-NEXT: [[TMP19:%.*]] = xor <vscale x 4 x i32> [[BROADCAST_SPLAT]], splat (i32 -1)
310310
; CHECK-NEXT: [[TMP6:%.*]] = call <vscale x 4 x i64> @llvm.stepvector.nxv4i64()
311311
; CHECK-NEXT: [[TMP7:%.*]] = mul <vscale x 4 x i64> [[TMP6]], splat (i64 9)
312312
; CHECK-NEXT: [[INDUCTION:%.*]] = add <vscale x 4 x i64> zeroinitializer, [[TMP7]]
@@ -320,7 +320,7 @@ define void @test_phi_in_latch_redundant(ptr %dst, i32 %a) {
320320
; CHECK-NEXT: [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <vscale x 4 x i64> poison, i64 [[TMP9]], i64 0
321321
; CHECK-NEXT: [[BROADCAST_SPLAT2:%.*]] = shufflevector <vscale x 4 x i64> [[BROADCAST_SPLATINSERT1]], <vscale x 4 x i64> poison, <vscale x 4 x i32> zeroinitializer
322322
; CHECK-NEXT: [[TMP16:%.*]] = getelementptr i32, ptr [[DST]], <vscale x 4 x i64> [[VEC_IND]]
323-
; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[TMP19]], <vscale x 4 x ptr> align 4 [[TMP16]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP8]])
323+
; CHECK-NEXT: call void @llvm.vp.scatter.nxv4i32.nxv4p0(<vscale x 4 x i32> [[BROADCAST_SPLAT]], <vscale x 4 x ptr> align 4 [[TMP16]], <vscale x 4 x i1> splat (i1 true), i32 [[TMP8]])
324324
; CHECK-NEXT: [[TMP17:%.*]] = zext i32 [[TMP8]] to i64
325325
; CHECK-NEXT: [[AVL_NEXT]] = sub nuw i64 [[AVL]], [[TMP17]]
326326
; CHECK-NEXT: [[VEC_IND_NEXT]] = add <vscale x 4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT2]]

llvm/test/Transforms/LoopVectorize/pr50686.ll

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -26,12 +26,13 @@ define void @m(ptr nocapture %p, ptr nocapture %p2, i32 %q) {
2626
; CHECK-NEXT: [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
2727
; CHECK-NEXT: [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer
2828
; CHECK-NEXT: [[TMP4:%.*]] = sub nsw <4 x i32> [[TMP2]], [[BROADCAST_SPLAT3]]
29-
; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[ARRAYIDX9_2]], align 4, !alias.scope [[META0]]
29+
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX9_2]], align 4, !alias.scope [[META0]]
30+
; CHECK-NEXT: [[TMP9:%.*]] = extractelement <4 x i32> [[TMP4]], i32 0
31+
; CHECK-NEXT: [[TMP5:%.*]] = sub nsw i32 [[TMP9]], [[TMP6]]
3032
; CHECK-NEXT: [[BROADCAST_SPLATINSERT4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i64 0
3133
; CHECK-NEXT: [[BROADCAST_SPLAT5:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT4]], <4 x i32> poison, <4 x i32> zeroinitializer
32-
; CHECK-NEXT: [[TMP6:%.*]] = sub nsw <4 x i32> [[TMP4]], [[BROADCAST_SPLAT5]]
3334
; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, ptr [[P]], i64 [[INDEX]]
34-
; CHECK-NEXT: store <4 x i32> [[TMP6]], ptr [[TMP7]], align 4, !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
35+
; CHECK-NEXT: store <4 x i32> [[BROADCAST_SPLAT5]], ptr [[TMP7]], align 4, !alias.scope [[META3:![0-9]+]], !noalias [[META0]]
3536
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
3637
; CHECK-NEXT: [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], 60
3738
; CHECK-NEXT: br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]

0 commit comments

Comments
 (0)