Skip to content

Commit 6ac6bc1

Browse files
committed
[VPlan] Narrow wide stores in narrowToSingleScalars
1 parent f9360e3 commit 6ac6bc1

File tree

82 files changed

+835
-1489
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

82 files changed

+835
-1489
lines changed

llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp

Lines changed: 30 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1390,33 +1390,52 @@ static void narrowToSingleScalarRecipes(VPlan &Plan) {
13901390
for (VPBasicBlock *VPBB : VPBlockUtils::blocksOnly<VPBasicBlock>(
13911391
vp_depth_first_shallow(Plan.getVectorLoopRegion()->getEntry()))) {
13921392
for (VPRecipeBase &R : make_early_inc_range(reverse(*VPBB))) {
1393-
if (!isa<VPWidenRecipe, VPWidenSelectRecipe, VPReplicateRecipe>(&R))
1393+
if (!isa<VPWidenRecipe, VPWidenSelectRecipe, VPWidenStoreRecipe,
1394+
VPReplicateRecipe>(&R))
13941395
continue;
1396+
13951397
auto *RepR = dyn_cast<VPReplicateRecipe>(&R);
13961398
if (RepR && (RepR->isSingleScalar() || RepR->isPredicated()))
13971399
continue;
1398-
1399-
auto *RepOrWidenR = cast<VPSingleDefRecipe>(&R);
1400-
if (RepR && isa<StoreInst>(RepR->getUnderlyingInstr()) &&
1401-
vputils::isSingleScalar(RepR->getOperand(1))) {
1402-
auto *Clone = new VPReplicateRecipe(
1403-
RepOrWidenR->getUnderlyingInstr(), RepOrWidenR->operands(),
1404-
true /*IsSingleScalar*/, nullptr /*Mask*/, *RepR /*Metadata*/);
1405-
Clone->insertBefore(RepOrWidenR);
1400+
auto *Store = dyn_cast<VPWidenStoreRecipe>(&R);
1401+
1402+
if ((RepR && isa<StoreInst>(RepR->getUnderlyingInstr()) &&
1403+
vputils::isSingleScalar(RepR->getOperand(1))) ||
1404+
(Store && !Store->isMasked() &&
1405+
vputils::isSingleScalar(Store->getStoredValue()))) {
1406+
StoreInst *UnderlyingInstr =
1407+
RepR ? cast<StoreInst>(RepR->getUnderlyingInstr())
1408+
: cast<StoreInst>(&Store->getIngredient());
1409+
SmallVector<VPValue *> Operands;
1410+
if (RepR)
1411+
append_range(Operands, RepR->operands());
1412+
else
1413+
Operands = {Store->getStoredValue(), Store->getAddr()};
1414+
VPValue *OperandToCheck =
1415+
RepR ? RepR->getOperand(1) : Store->getStoredValue();
1416+
VPIRMetadata Metadata =
1417+
RepR ? VPIRMetadata(*RepR) : VPIRMetadata(*Store);
1418+
auto *Clone = new VPReplicateRecipe(UnderlyingInstr, Operands,
1419+
true /*IsSingleScalar*/,
1420+
nullptr /*Mask*/, Metadata);
1421+
Clone->insertBefore(&R);
14061422
unsigned ExtractOpc =
1407-
vputils::isUniformAcrossVFsAndUFs(RepR->getOperand(1))
1423+
vputils::isUniformAcrossVFsAndUFs(OperandToCheck)
14081424
? VPInstruction::ExtractLastElement
14091425
: VPInstruction::ExtractLastLanePerPart;
14101426
auto *Ext = new VPInstruction(ExtractOpc, {Clone->getOperand(0)});
14111427
Ext->insertBefore(Clone);
14121428
Clone->setOperand(0, Ext);
1413-
RepR->eraseFromParent();
1429+
R.eraseFromParent();
14141430
continue;
14151431
}
14161432

14171433
// Skip recipes that aren't single scalars or don't have only their
14181434
// scalar results used. In the latter case, we would introduce extra
14191435
// broadcasts.
1436+
auto *RepOrWidenR = dyn_cast<VPSingleDefRecipe>(&R);
1437+
if (!RepOrWidenR)
1438+
continue;
14201439
if (!vputils::isSingleScalar(RepOrWidenR) ||
14211440
!all_of(RepOrWidenR->users(), [RepOrWidenR](const VPUser *U) {
14221441
return U->usesScalars(RepOrWidenR) ||

llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll

Lines changed: 30 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -251,37 +251,23 @@ define void @latch_branch_cost(ptr %dst) {
251251
; DEFAULT-LABEL: define void @latch_branch_cost(
252252
; DEFAULT-SAME: ptr [[DST:%.*]]) {
253253
; DEFAULT-NEXT: [[ITER_CHECK:.*:]]
254-
; DEFAULT-NEXT: br i1 false, label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
255-
; DEFAULT: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
256-
; DEFAULT-NEXT: br i1 false, label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
254+
; DEFAULT-NEXT: br label %[[VECTOR_PH:.*]]
257255
; DEFAULT: [[VECTOR_PH]]:
258256
; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]]
259257
; DEFAULT: [[VECTOR_BODY]]:
260-
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
258+
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT2:%.*]], %[[VECTOR_BODY]] ]
259+
; DEFAULT-NEXT: [[INDEX1:%.*]] = add i64 [[INDEX]], 1
261260
; DEFAULT-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX]]
262-
; DEFAULT-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP2]], i32 16
263-
; DEFAULT-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP2]], align 1
264-
; DEFAULT-NEXT: store <16 x i8> zeroinitializer, ptr [[TMP5]], align 1
265-
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 32
266-
; DEFAULT-NEXT: [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96
267-
; DEFAULT-NEXT: br i1 [[TMP4]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
268-
; DEFAULT: [[MIDDLE_BLOCK]]:
269-
; DEFAULT-NEXT: br i1 false, [[EXIT:label %.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
270-
; DEFAULT: [[VEC_EPILOG_ITER_CHECK]]:
271-
; DEFAULT-NEXT: br i1 false, label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF5:![0-9]+]]
272-
; DEFAULT: [[VEC_EPILOG_PH]]:
273-
; DEFAULT-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ 96, %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
274-
; DEFAULT-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
275-
; DEFAULT: [[VEC_EPILOG_VECTOR_BODY]]:
276-
; DEFAULT-NEXT: [[INDEX1:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT2:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
277261
; DEFAULT-NEXT: [[TMP8:%.*]] = getelementptr i8, ptr [[DST]], i64 [[INDEX1]]
278-
; DEFAULT-NEXT: store <4 x i8> zeroinitializer, ptr [[TMP8]], align 1
279-
; DEFAULT-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX1]], 4
262+
; DEFAULT-NEXT: store i8 0, ptr [[TMP2]], align 1
263+
; DEFAULT-NEXT: store i8 0, ptr [[TMP8]], align 1
264+
; DEFAULT-NEXT: [[INDEX_NEXT2]] = add nuw i64 [[INDEX]], 2
280265
; DEFAULT-NEXT: [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT2]], 100
281-
; DEFAULT-NEXT: br i1 [[TMP10]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
282-
; DEFAULT: [[VEC_EPILOG_MIDDLE_BLOCK]]:
283-
; DEFAULT-NEXT: br i1 true, [[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
284-
; DEFAULT: [[VEC_EPILOG_SCALAR_PH]]:
266+
; DEFAULT-NEXT: br i1 [[TMP10]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
267+
; DEFAULT: [[MIDDLE_BLOCK]]:
268+
; DEFAULT-NEXT: br label %[[EXIT:.*]]
269+
; DEFAULT: [[EXIT]]:
270+
; DEFAULT-NEXT: ret void
285271
;
286272
; PRED-LABEL: define void @latch_branch_cost(
287273
; PRED-SAME: ptr [[DST:%.*]]) {
@@ -430,14 +416,14 @@ define i32 @header_mask_and_invariant_compare(ptr %A, ptr %B, ptr %C, ptr %D, pt
430416
; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]]
431417
; DEFAULT: [[VECTOR_BODY]]:
432418
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[PRED_STORE_CONTINUE37:.*]] ]
433-
; DEFAULT-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4, !alias.scope [[META8:![0-9]+]]
419+
; DEFAULT-NEXT: [[TMP9:%.*]] = load i32, ptr [[A]], align 4, !alias.scope [[META5:![0-9]+]]
434420
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT28:%.*]] = insertelement <4 x i32> poison, i32 [[TMP9]], i64 0
435421
; DEFAULT-NEXT: [[BROADCAST_SPLAT29:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT28]], <4 x i32> poison, <4 x i32> zeroinitializer
436-
; DEFAULT-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4, !alias.scope [[META11:![0-9]+]]
422+
; DEFAULT-NEXT: [[TMP19:%.*]] = load i32, ptr [[B]], align 4, !alias.scope [[META8:![0-9]+]]
437423
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP19]], i64 0
438424
; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
439425
; DEFAULT-NEXT: [[TMP6:%.*]] = or <4 x i32> [[BROADCAST_SPLAT]], [[BROADCAST_SPLAT29]]
440-
; DEFAULT-NEXT: [[TMP7:%.*]] = load i32, ptr [[C]], align 4, !alias.scope [[META13:![0-9]+]]
426+
; DEFAULT-NEXT: [[TMP7:%.*]] = load i32, ptr [[C]], align 4, !alias.scope [[META10:![0-9]+]]
441427
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT30:%.*]] = insertelement <4 x i32> poison, i32 [[TMP7]], i64 0
442428
; DEFAULT-NEXT: [[BROADCAST_SPLAT31:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT30]], <4 x i32> poison, <4 x i32> zeroinitializer
443429
; DEFAULT-NEXT: [[TMP8:%.*]] = icmp ugt <4 x i32> [[BROADCAST_SPLAT31]], [[TMP6]]
@@ -446,34 +432,34 @@ define i32 @header_mask_and_invariant_compare(ptr %A, ptr %B, ptr %C, ptr %D, pt
446432
; DEFAULT-NEXT: br i1 [[TMP20]], label %[[PRED_STORE_IF:.*]], label %[[PRED_STORE_CONTINUE:.*]]
447433
; DEFAULT: [[PRED_STORE_IF]]:
448434
; DEFAULT-NEXT: [[TMP11:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0
449-
; DEFAULT-NEXT: store i32 [[TMP11]], ptr [[E]], align 4, !alias.scope [[META15:![0-9]+]], !noalias [[META17:![0-9]+]]
435+
; DEFAULT-NEXT: store i32 [[TMP11]], ptr [[E]], align 4, !alias.scope [[META12:![0-9]+]], !noalias [[META14:![0-9]+]]
450436
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE]]
451437
; DEFAULT: [[PRED_STORE_CONTINUE]]:
452438
; DEFAULT-NEXT: [[TMP12:%.*]] = extractelement <4 x i1> [[TMP8]], i32 1
453439
; DEFAULT-NEXT: br i1 [[TMP12]], label %[[PRED_STORE_IF32:.*]], label %[[PRED_STORE_CONTINUE33:.*]]
454440
; DEFAULT: [[PRED_STORE_IF32]]:
455441
; DEFAULT-NEXT: [[TMP13:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0
456-
; DEFAULT-NEXT: store i32 [[TMP13]], ptr [[E]], align 4, !alias.scope [[META15]], !noalias [[META17]]
442+
; DEFAULT-NEXT: store i32 [[TMP13]], ptr [[E]], align 4, !alias.scope [[META12]], !noalias [[META14]]
457443
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE33]]
458444
; DEFAULT: [[PRED_STORE_CONTINUE33]]:
459445
; DEFAULT-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP8]], i32 2
460446
; DEFAULT-NEXT: br i1 [[TMP14]], label %[[PRED_STORE_IF34:.*]], label %[[PRED_STORE_CONTINUE35:.*]]
461447
; DEFAULT: [[PRED_STORE_IF34]]:
462448
; DEFAULT-NEXT: [[TMP15:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0
463-
; DEFAULT-NEXT: store i32 [[TMP15]], ptr [[E]], align 4, !alias.scope [[META15]], !noalias [[META17]]
449+
; DEFAULT-NEXT: store i32 [[TMP15]], ptr [[E]], align 4, !alias.scope [[META12]], !noalias [[META14]]
464450
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE35]]
465451
; DEFAULT: [[PRED_STORE_CONTINUE35]]:
466452
; DEFAULT-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP8]], i32 3
467453
; DEFAULT-NEXT: br i1 [[TMP21]], label %[[PRED_STORE_IF36:.*]], label %[[PRED_STORE_CONTINUE37]]
468454
; DEFAULT: [[PRED_STORE_IF36]]:
469455
; DEFAULT-NEXT: [[TMP22:%.*]] = extractelement <4 x i32> [[TMP6]], i32 0
470-
; DEFAULT-NEXT: store i32 [[TMP22]], ptr [[E]], align 4, !alias.scope [[META15]], !noalias [[META17]]
456+
; DEFAULT-NEXT: store i32 [[TMP22]], ptr [[E]], align 4, !alias.scope [[META12]], !noalias [[META14]]
471457
; DEFAULT-NEXT: br label %[[PRED_STORE_CONTINUE37]]
472458
; DEFAULT: [[PRED_STORE_CONTINUE37]]:
473-
; DEFAULT-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> zeroinitializer, ptr align 4 [[TMP16]], <4 x i1> [[TMP8]]), !alias.scope [[META19:![0-9]+]], !noalias [[META20:![0-9]+]]
459+
; DEFAULT-NEXT: call void @llvm.masked.store.v4i32.p0(<4 x i32> zeroinitializer, ptr align 4 [[TMP16]], <4 x i1> [[TMP8]]), !alias.scope [[META16:![0-9]+]], !noalias [[META17:![0-9]+]]
474460
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
475461
; DEFAULT-NEXT: [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
476-
; DEFAULT-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
462+
; DEFAULT-NEXT: br i1 [[TMP18]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
477463
; DEFAULT: [[MIDDLE_BLOCK]]:
478464
; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
479465
; DEFAULT-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]]
@@ -550,7 +536,7 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 {
550536
; DEFAULT-NEXT: store <8 x double> [[TMP3]], ptr [[NEXT_GEP]], align 8
551537
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
552538
; DEFAULT-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
553-
; DEFAULT-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
539+
; DEFAULT-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
554540
; DEFAULT: [[MIDDLE_BLOCK]]:
555541
; DEFAULT-NEXT: br label %[[SCALAR_PH:.*]]
556542
; DEFAULT: [[SCALAR_PH]]:
@@ -660,16 +646,16 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) {
660646
; COMMON-NEXT: store i8 6, ptr [[TMP6]], align 1
661647
; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE12]]
662648
; COMMON: [[PRED_STORE_CONTINUE12]]:
663-
; COMMON-NEXT: br i1 false, label %[[PRED_STORE_IF13:.*]], label %[[EXIT:.*]]
649+
; COMMON-NEXT: br i1 false, label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
664650
; COMMON: [[PRED_STORE_IF13]]:
665651
; COMMON-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[DST]], i64 7
666652
; COMMON-NEXT: store i8 7, ptr [[TMP7]], align 1
667-
; COMMON-NEXT: br label %[[EXIT]]
653+
; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE14]]
654+
; COMMON: [[PRED_STORE_CONTINUE14]]:
655+
; COMMON-NEXT: br label %[[MIDDLE_BLOCK:.*]]
656+
; COMMON: [[MIDDLE_BLOCK]]:
657+
; COMMON-NEXT: br label %[[EXIT:.*]]
668658
; COMMON: [[EXIT]]:
669-
; COMMON-NEXT: br label %[[SCALAR_PH:.*]]
670-
; COMMON: [[SCALAR_PH]]:
671-
; COMMON-NEXT: br label %[[EXIT1:.*]]
672-
; COMMON: [[EXIT1]]:
673659
; COMMON-NEXT: ret void
674660
;
675661
entry:
@@ -866,7 +852,7 @@ define void @test_conditional_interleave_group (ptr noalias %src.1, ptr noalias
866852
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
867853
; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <8 x i64> [[VEC_IND]], splat (i64 8)
868854
; DEFAULT-NEXT: [[TMP80:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
869-
; DEFAULT-NEXT: br i1 [[TMP80]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
855+
; DEFAULT-NEXT: br i1 [[TMP80]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
870856
; DEFAULT: [[MIDDLE_BLOCK]]:
871857
; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
872858
; DEFAULT-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]]
@@ -1112,7 +1098,7 @@ define void @redundant_branch_and_tail_folding(ptr %dst, i1 %c) {
11121098
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
11131099
; DEFAULT-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[STEP_ADD]], splat (i64 4)
11141100
; DEFAULT-NEXT: [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], 16
1115-
; DEFAULT-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
1101+
; DEFAULT-NEXT: br i1 [[TMP3]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
11161102
; DEFAULT: [[MIDDLE_BLOCK]]:
11171103
; DEFAULT-NEXT: br label %[[SCALAR_PH:.*]]
11181104
; DEFAULT: [[SCALAR_PH]]:
@@ -1287,7 +1273,7 @@ define void @pred_udiv_select_cost(ptr %A, ptr %B, ptr %C, i64 %n, i8 %y) #1 {
12871273
; DEFAULT-NEXT: store <vscale x 4 x i8> [[TMP23]], ptr [[TMP24]], align 1
12881274
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
12891275
; DEFAULT-NEXT: [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1290-
; DEFAULT-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]]
1276+
; DEFAULT-NEXT: br i1 [[TMP25]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
12911277
; DEFAULT: [[MIDDLE_BLOCK]]:
12921278
; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP0]], [[N_VEC]]
12931279
; DEFAULT-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]]

llvm/test/Transforms/LoopVectorize/AArch64/divs-with-scalable-vfs.ll

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -7,10 +7,7 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) {
77
; CHECK-SAME: ptr [[DST:%.*]], i32 [[X:%.*]], i64 [[M:%.*]], i64 [[CONV6:%.*]], i64 [[N:%.*]]) #[[ATTR0:[0-9]+]] {
88
; CHECK-NEXT: [[ENTRY:.*]]:
99
; CHECK-NEXT: [[CONV61:%.*]] = zext i32 [[X]] to i64
10-
; CHECK-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
11-
; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i64 [[TMP10]], 2
12-
; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.umax.i64(i64 [[TMP1]], i64 8)
13-
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], [[TMP2]]
10+
; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N]], 2
1411
; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[SCALAR_PH:.*]], label %[[VECTOR_SCEVCHECK:.*]]
1512
; CHECK: [[VECTOR_SCEVCHECK]]:
1613
; CHECK-NEXT: [[TMP3:%.*]] = add i64 [[N]], -1
@@ -20,9 +17,7 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) {
2017
; CHECK-NEXT: [[TMP7:%.*]] = or i1 [[TMP5]], [[TMP6]]
2118
; CHECK-NEXT: br i1 [[TMP7]], label %[[SCALAR_PH]], label %[[VECTOR_PH:.*]]
2219
; CHECK: [[VECTOR_PH]]:
23-
; CHECK-NEXT: [[TMP8:%.*]] = call i64 @llvm.vscale.i64()
24-
; CHECK-NEXT: [[TMP9:%.*]] = mul nuw i64 [[TMP8]], 4
25-
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP9]]
20+
; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
2621
; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
2722
; CHECK-NEXT: [[TMP18:%.*]] = sdiv i64 [[M]], [[CONV6]]
2823
; CHECK-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP18]] to i32
@@ -31,17 +26,20 @@ define void @sdiv_feeding_gep(ptr %dst, i32 %x, i64 %M, i64 %conv6, i64 %N) {
3126
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
3227
; CHECK: [[VECTOR_BODY]]:
3328
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
29+
; CHECK-NEXT: [[TMP9:%.*]] = add i64 [[INDEX]], 1
3430
; CHECK-NEXT: [[TMP24:%.*]] = sub i64 [[INDEX]], [[TMP22]]
31+
; CHECK-NEXT: [[TMP11:%.*]] = sub i64 [[TMP9]], [[TMP22]]
3532
; CHECK-NEXT: [[TMP26:%.*]] = trunc i64 [[TMP24]] to i32
33+
; CHECK-NEXT: [[TMP13:%.*]] = trunc i64 [[TMP11]] to i32
3634
; CHECK-NEXT: [[TMP30:%.*]] = add i32 [[TMP28]], [[TMP26]]
35+
; CHECK-NEXT: [[TMP15:%.*]] = add i32 [[TMP28]], [[TMP13]]
3736
; CHECK-NEXT: [[TMP32:%.*]] = sext i32 [[TMP30]] to i64
37+
; CHECK-NEXT: [[TMP17:%.*]] = sext i32 [[TMP15]] to i64
3838
; CHECK-NEXT: [[TMP34:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP32]]
39-
; CHECK-NEXT: [[TMP37:%.*]] = call i64 @llvm.vscale.i64()
40-
; CHECK-NEXT: [[TMP38:%.*]] = shl nuw i64 [[TMP37]], 1
41-
; CHECK-NEXT: [[TMP39:%.*]] = getelementptr double, ptr [[TMP34]], i64 [[TMP38]]
42-
; CHECK-NEXT: store <vscale x 2 x double> zeroinitializer, ptr [[TMP34]], align 8
43-
; CHECK-NEXT: store <vscale x 2 x double> zeroinitializer, ptr [[TMP39]], align 8
44-
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP9]]
39+
; CHECK-NEXT: [[TMP19:%.*]] = getelementptr double, ptr [[DST]], i64 [[TMP17]]
40+
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP34]], align 8
41+
; CHECK-NEXT: store double 0.000000e+00, ptr [[TMP19]], align 8
42+
; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
4543
; CHECK-NEXT: [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
4644
; CHECK-NEXT: br i1 [[TMP40]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
4745
; CHECK: [[MIDDLE_BLOCK]]:

0 commit comments

Comments
 (0)