Skip to content

Commit f1e22b1

Browse files
committed
Enable max bandwidth for all cores but the N2.
1 parent 0eb5fb6 commit f1e22b1

13 files changed

+1186
-919
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -371,14 +371,16 @@ AArch64TTIImpl::getInlineCallPenalty(const Function *F, const CallBase &Call,
371371
bool AArch64TTIImpl::shouldMaximizeVectorBandwidth(
372372
TargetTransformInfo::RegisterKind K) const {
373373
assert(K != TargetTransformInfo::RGK_Scalar);
374-
switch (ST->getProcFamily()) {
375-
case AArch64Subtarget::NeoverseV2:
376-
case AArch64Subtarget::NeoverseV1:
377-
case AArch64Subtarget::NeoverseN1:
374+
375+
if (K == TargetTransformInfo::RGK_FixedWidthVector && ST->isNeonAvailable())
378376
return true;
377+
378+
switch (ST->getProcFamily()) {
379+
case AArch64Subtarget::NeoverseN2:
380+
return false;
379381
default:
380-
return (K == TargetTransformInfo::RGK_FixedWidthVector &&
381-
ST->isNeonAvailable());
382+
return (K == TargetTransformInfo::RGK_ScalableVector &&
383+
ST->isSVEorStreamingSVEAvailable());
382384
}
383385
}
384386

llvm/test/Transforms/LoopVectorize/AArch64/conditional-branches-cost.ll

Lines changed: 85 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -534,55 +534,111 @@ define void @multiple_exit_conditions(ptr %src, ptr noalias %dst) #1 {
534534
; DEFAULT-LABEL: define void @multiple_exit_conditions(
535535
; DEFAULT-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]]) #[[ATTR2:[0-9]+]] {
536536
; DEFAULT-NEXT: [[ENTRY:.*:]]
537-
; DEFAULT-NEXT: br label %[[VECTOR_PH:.*]]
537+
; DEFAULT-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
538+
; DEFAULT-NEXT: [[TMP6:%.*]] = shl nuw i64 [[TMP0]], 1
539+
; DEFAULT-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 257, [[TMP6]]
540+
; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH:.*]], label %[[VECTOR_MAIN_LOOP_ITER_CHECK:.*]]
541+
; DEFAULT: [[VECTOR_MAIN_LOOP_ITER_CHECK]]:
542+
; DEFAULT-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
543+
; DEFAULT-NEXT: [[TMP3:%.*]] = shl nuw i64 [[TMP2]], 4
544+
; DEFAULT-NEXT: [[MIN_ITERS_CHECK1:%.*]] = icmp ult i64 257, [[TMP3]]
545+
; DEFAULT-NEXT: br i1 [[MIN_ITERS_CHECK1]], label %[[VEC_EPILOG_PH:.*]], label %[[VECTOR_PH:.*]]
538546
; DEFAULT: [[VECTOR_PH]]:
539-
; DEFAULT-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 2048
547+
; DEFAULT-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
548+
; DEFAULT-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 16
549+
; DEFAULT-NEXT: [[N_MOD_VF:%.*]] = urem i64 257, [[TMP5]]
550+
; DEFAULT-NEXT: [[N_VEC:%.*]] = sub i64 257, [[N_MOD_VF]]
540551
; DEFAULT-NEXT: br label %[[VECTOR_BODY:.*]]
541552
; DEFAULT: [[VECTOR_BODY]]:
542553
; DEFAULT-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
543554
; DEFAULT-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
544555
; DEFAULT-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX]]
545556
; DEFAULT-NEXT: [[TMP1:%.*]] = load i16, ptr [[SRC]], align 2
546-
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <8 x i16> poison, i16 [[TMP1]], i64 0
547-
; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <8 x i16> [[BROADCAST_SPLATINSERT]], <8 x i16> poison, <8 x i32> zeroinitializer
548-
; DEFAULT-NEXT: [[TMP2:%.*]] = or <8 x i16> [[BROADCAST_SPLAT]], splat (i16 1)
549-
; DEFAULT-NEXT: [[TMP3:%.*]] = uitofp <8 x i16> [[TMP2]] to <8 x double>
550-
; DEFAULT-NEXT: store <8 x double> [[TMP3]], ptr [[NEXT_GEP]], align 8
551-
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
552-
; DEFAULT-NEXT: [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 256
553-
; DEFAULT-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
557+
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i16> poison, i16 [[TMP1]], i64 0
558+
; DEFAULT-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i16> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
559+
; DEFAULT-NEXT: [[TMP8:%.*]] = or <vscale x 4 x i16> [[BROADCAST_SPLAT]], splat (i16 1)
560+
; DEFAULT-NEXT: [[TMP9:%.*]] = uitofp <vscale x 4 x i16> [[TMP8]] to <vscale x 4 x double>
561+
; DEFAULT-NEXT: [[TMP10:%.*]] = call i64 @llvm.vscale.i64()
562+
; DEFAULT-NEXT: [[TMP11:%.*]] = shl nuw i64 [[TMP10]], 2
563+
; DEFAULT-NEXT: [[TMP12:%.*]] = getelementptr double, ptr [[NEXT_GEP]], i64 [[TMP11]]
564+
; DEFAULT-NEXT: [[TMP13:%.*]] = call i64 @llvm.vscale.i64()
565+
; DEFAULT-NEXT: [[TMP14:%.*]] = shl nuw i64 [[TMP13]], 3
566+
; DEFAULT-NEXT: [[TMP15:%.*]] = getelementptr double, ptr [[NEXT_GEP]], i64 [[TMP14]]
567+
; DEFAULT-NEXT: [[TMP16:%.*]] = call i64 @llvm.vscale.i64()
568+
; DEFAULT-NEXT: [[TMP17:%.*]] = mul nuw i64 [[TMP16]], 12
569+
; DEFAULT-NEXT: [[TMP18:%.*]] = getelementptr double, ptr [[NEXT_GEP]], i64 [[TMP17]]
570+
; DEFAULT-NEXT: store <vscale x 4 x double> [[TMP9]], ptr [[NEXT_GEP]], align 8
571+
; DEFAULT-NEXT: store <vscale x 4 x double> [[TMP9]], ptr [[TMP12]], align 8
572+
; DEFAULT-NEXT: store <vscale x 4 x double> [[TMP9]], ptr [[TMP15]], align 8
573+
; DEFAULT-NEXT: store <vscale x 4 x double> [[TMP9]], ptr [[TMP18]], align 8
574+
; DEFAULT-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
575+
; DEFAULT-NEXT: [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
576+
; DEFAULT-NEXT: br i1 [[TMP19]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
554577
; DEFAULT: [[MIDDLE_BLOCK]]:
555-
; DEFAULT-NEXT: br label %[[SCALAR_PH:.*]]
556-
; DEFAULT: [[SCALAR_PH]]:
578+
; DEFAULT-NEXT: [[CMP_N:%.*]] = icmp eq i64 257, [[N_VEC]]
579+
; DEFAULT-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[VEC_EPILOG_ITER_CHECK:.*]]
580+
; DEFAULT: [[VEC_EPILOG_ITER_CHECK]]:
581+
; DEFAULT-NEXT: [[TMP20:%.*]] = mul i64 [[N_VEC]], 8
582+
; DEFAULT-NEXT: [[IND_END:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP20]]
583+
; DEFAULT-NEXT: [[IND_END11:%.*]] = mul i64 [[N_VEC]], 2
584+
; DEFAULT-NEXT: [[MIN_EPILOG_ITERS_CHECK:%.*]] = icmp ult i64 [[N_MOD_VF]], [[TMP6]]
585+
; DEFAULT-NEXT: br i1 [[MIN_EPILOG_ITERS_CHECK]], label %[[VEC_EPILOG_SCALAR_PH]], label %[[VEC_EPILOG_PH]], !prof [[PROF24:![0-9]+]]
586+
; DEFAULT: [[VEC_EPILOG_PH]]:
587+
; DEFAULT-NEXT: [[VEC_EPILOG_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], %[[VEC_EPILOG_ITER_CHECK]] ], [ 0, %[[VECTOR_MAIN_LOOP_ITER_CHECK]] ]
588+
; DEFAULT-NEXT: [[TMP21:%.*]] = call i64 @llvm.vscale.i64()
589+
; DEFAULT-NEXT: [[TMP22:%.*]] = mul nuw i64 [[TMP21]], 2
590+
; DEFAULT-NEXT: [[N_MOD_VF2:%.*]] = urem i64 257, [[TMP22]]
591+
; DEFAULT-NEXT: [[N_VEC3:%.*]] = sub i64 257, [[N_MOD_VF2]]
592+
; DEFAULT-NEXT: [[TMP23:%.*]] = mul i64 [[N_VEC3]], 8
593+
; DEFAULT-NEXT: [[TMP24:%.*]] = getelementptr i8, ptr [[DST]], i64 [[TMP23]]
594+
; DEFAULT-NEXT: [[TMP25:%.*]] = mul i64 [[N_VEC3]], 2
595+
; DEFAULT-NEXT: br label %[[VEC_EPILOG_VECTOR_BODY:.*]]
596+
; DEFAULT: [[VEC_EPILOG_VECTOR_BODY]]:
597+
; DEFAULT-NEXT: [[INDEX4:%.*]] = phi i64 [ [[VEC_EPILOG_RESUME_VAL]], %[[VEC_EPILOG_PH]] ], [ [[INDEX_NEXT8:%.*]], %[[VEC_EPILOG_VECTOR_BODY]] ]
598+
; DEFAULT-NEXT: [[OFFSET_IDX1:%.*]] = mul i64 [[INDEX4]], 8
599+
; DEFAULT-NEXT: [[NEXT_GEP5:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX1]]
600+
; DEFAULT-NEXT: [[TMP26:%.*]] = load i16, ptr [[SRC]], align 2
601+
; DEFAULT-NEXT: [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <vscale x 2 x i16> poison, i16 [[TMP26]], i64 0
602+
; DEFAULT-NEXT: [[BROADCAST_SPLAT7:%.*]] = shufflevector <vscale x 2 x i16> [[BROADCAST_SPLATINSERT6]], <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
603+
; DEFAULT-NEXT: [[TMP27:%.*]] = or <vscale x 2 x i16> [[BROADCAST_SPLAT7]], splat (i16 1)
604+
; DEFAULT-NEXT: [[TMP28:%.*]] = uitofp <vscale x 2 x i16> [[TMP27]] to <vscale x 2 x double>
605+
; DEFAULT-NEXT: store <vscale x 2 x double> [[TMP28]], ptr [[NEXT_GEP5]], align 8
606+
; DEFAULT-NEXT: [[INDEX_NEXT8]] = add nuw i64 [[INDEX4]], [[TMP22]]
607+
; DEFAULT-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT8]], [[N_VEC3]]
608+
; DEFAULT-NEXT: br i1 [[TMP29]], label %[[VEC_EPILOG_MIDDLE_BLOCK:.*]], label %[[VEC_EPILOG_VECTOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
609+
; DEFAULT: [[VEC_EPILOG_MIDDLE_BLOCK]]:
610+
; DEFAULT-NEXT: [[CMP_N9:%.*]] = icmp eq i64 257, [[N_VEC3]]
611+
; DEFAULT-NEXT: br i1 [[CMP_N9]], [[EXIT]], label %[[VEC_EPILOG_SCALAR_PH]]
612+
; DEFAULT: [[VEC_EPILOG_SCALAR_PH]]:
557613
;
558614
; PRED-LABEL: define void @multiple_exit_conditions(
559615
; PRED-SAME: ptr [[SRC:%.*]], ptr noalias [[DST:%.*]]) #[[ATTR2:[0-9]+]] {
560616
; PRED-NEXT: [[ENTRY:.*:]]
561617
; PRED-NEXT: br label %[[VECTOR_PH:.*]]
562618
; PRED: [[VECTOR_PH]]:
563619
; PRED-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
564-
; PRED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 2
620+
; PRED-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 4
565621
; PRED-NEXT: [[TMP6:%.*]] = call i64 @llvm.vscale.i64()
566-
; PRED-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP6]], 1
622+
; PRED-NEXT: [[TMP7:%.*]] = shl nuw i64 [[TMP6]], 2
567623
; PRED-NEXT: [[TMP8:%.*]] = sub i64 257, [[TMP7]]
568624
; PRED-NEXT: [[TMP9:%.*]] = icmp ugt i64 257, [[TMP7]]
569625
; PRED-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], i64 [[TMP8]], i64 0
570-
; PRED-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 0, i64 257)
626+
; PRED-NEXT: [[ACTIVE_LANE_MASK_ENTRY:%.*]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 0, i64 257)
571627
; PRED-NEXT: br label %[[VECTOR_BODY:.*]]
572628
; PRED: [[VECTOR_BODY]]:
573629
; PRED-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
574-
; PRED-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 2 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ]
630+
; PRED-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 4 x i1> [ [[ACTIVE_LANE_MASK_ENTRY]], %[[VECTOR_PH]] ], [ [[ACTIVE_LANE_MASK_NEXT:%.*]], %[[VECTOR_BODY]] ]
575631
; PRED-NEXT: [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
576632
; PRED-NEXT: [[NEXT_GEP:%.*]] = getelementptr i8, ptr [[DST]], i64 [[OFFSET_IDX]]
577633
; PRED-NEXT: [[TMP12:%.*]] = load i16, ptr [[SRC]], align 2
578-
; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 2 x i16> poison, i16 [[TMP12]], i64 0
579-
; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 2 x i16> [[BROADCAST_SPLATINSERT]], <vscale x 2 x i16> poison, <vscale x 2 x i32> zeroinitializer
580-
; PRED-NEXT: [[TMP13:%.*]] = or <vscale x 2 x i16> [[BROADCAST_SPLAT]], splat (i16 1)
581-
; PRED-NEXT: [[TMP14:%.*]] = uitofp <vscale x 2 x i16> [[TMP13]] to <vscale x 2 x double>
582-
; PRED-NEXT: call void @llvm.masked.store.nxv2f64.p0(<vscale x 2 x double> [[TMP14]], ptr align 8 [[NEXT_GEP]], <vscale x 2 x i1> [[ACTIVE_LANE_MASK]])
634+
; PRED-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <vscale x 4 x i16> poison, i16 [[TMP12]], i64 0
635+
; PRED-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <vscale x 4 x i16> [[BROADCAST_SPLATINSERT]], <vscale x 4 x i16> poison, <vscale x 4 x i32> zeroinitializer
636+
; PRED-NEXT: [[TMP11:%.*]] = or <vscale x 4 x i16> [[BROADCAST_SPLAT]], splat (i16 1)
637+
; PRED-NEXT: [[TMP13:%.*]] = uitofp <vscale x 4 x i16> [[TMP11]] to <vscale x 4 x double>
638+
; PRED-NEXT: call void @llvm.masked.store.nxv4f64.p0(<vscale x 4 x double> [[TMP13]], ptr align 8 [[NEXT_GEP]], <vscale x 4 x i1> [[ACTIVE_LANE_MASK]])
583639
; PRED-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], [[TMP1]]
584-
; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 2 x i1> @llvm.get.active.lane.mask.nxv2i1.i64(i64 [[INDEX]], i64 [[TMP10]])
585-
; PRED-NEXT: [[TMP15:%.*]] = extractelement <vscale x 2 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
640+
; PRED-NEXT: [[ACTIVE_LANE_MASK_NEXT]] = call <vscale x 4 x i1> @llvm.get.active.lane.mask.nxv4i1.i64(i64 [[INDEX]], i64 [[TMP10]])
641+
; PRED-NEXT: [[TMP15:%.*]] = extractelement <vscale x 4 x i1> [[ACTIVE_LANE_MASK_NEXT]], i32 0
586642
; PRED-NEXT: [[TMP16:%.*]] = xor i1 [[TMP15]], true
587643
; PRED-NEXT: br i1 [[TMP16]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
588644
; PRED: [[MIDDLE_BLOCK]]:
@@ -660,16 +716,16 @@ define void @low_trip_count_fold_tail_scalarized_store(ptr %dst) {
660716
; COMMON-NEXT: store i8 6, ptr [[TMP6]], align 1
661717
; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE12]]
662718
; COMMON: [[PRED_STORE_CONTINUE12]]:
663-
; COMMON-NEXT: br i1 false, label %[[PRED_STORE_IF13:.*]], label %[[EXIT:.*]]
719+
; COMMON-NEXT: br i1 false, label %[[PRED_STORE_IF13:.*]], label %[[PRED_STORE_CONTINUE14:.*]]
664720
; COMMON: [[PRED_STORE_IF13]]:
665721
; COMMON-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[DST]], i64 7
666722
; COMMON-NEXT: store i8 7, ptr [[TMP7]], align 1
667-
; COMMON-NEXT: br label %[[EXIT]]
723+
; COMMON-NEXT: br label %[[PRED_STORE_CONTINUE14]]
724+
; COMMON: [[PRED_STORE_CONTINUE14]]:
725+
; COMMON-NEXT: br label %[[MIDDLE_BLOCK:.*]]
726+
; COMMON: [[MIDDLE_BLOCK]]:
727+
; COMMON-NEXT: br label %[[EXIT:.*]]
668728
; COMMON: [[EXIT]]:
669-
; COMMON-NEXT: br label %[[SCALAR_PH:.*]]
670-
; COMMON: [[SCALAR_PH]]:
671-
; COMMON-NEXT: br label %[[EXIT1:.*]]
672-
; COMMON: [[EXIT1]]:
673729
; COMMON-NEXT: ret void
674730
;
675731
entry:

0 commit comments

Comments
 (0)