Skip to content

Commit 5ebdc01

Browse files
committed
Add shouldMaximizeScalableVectorBandwidth to AArch64Subtarget
1 parent 68a526b commit 5ebdc01

File tree

4 files changed

+28
-21
lines changed

4 files changed

+28
-21
lines changed

llvm/lib/Target/AArch64/AArch64Subtarget.cpp

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -267,21 +267,28 @@ void AArch64Subtarget::initializeProperties(bool HasMinSize) {
267267
PrefLoopAlignment = Align(32);
268268
MaxBytesForLoopAlignment = 16;
269269
break;
270+
case NeoverseN2:
271+
ShouldMaximizeScalableVectorBandwidth = false;
272+
PrefFunctionAlignment = Align(16);
273+
PrefLoopAlignment = Align(32);
274+
MaxBytesForLoopAlignment = 16;
275+
VScaleForTuning = 1;
276+
break;
270277
case NeoverseV2:
271278
case NeoverseV3:
272279
CacheLineSize = 64;
273280
EpilogueVectorizationMinVF = 8;
274281
MaxInterleaveFactor = 4;
275282
ScatterOverhead = 13;
276283
[[fallthrough]];
277-
case NeoverseN2:
278284
case NeoverseN3:
279285
PrefFunctionAlignment = Align(16);
280286
PrefLoopAlignment = Align(32);
281287
MaxBytesForLoopAlignment = 16;
282288
VScaleForTuning = 1;
283289
break;
284290
case NeoverseV1:
291+
ShouldMaximizeScalableVectorBandwidth = false;
285292
PrefFunctionAlignment = Align(16);
286293
PrefLoopAlignment = Align(32);
287294
MaxBytesForLoopAlignment = 16;

llvm/lib/Target/AArch64/AArch64Subtarget.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
5757
#include "AArch64GenSubtargetInfo.inc"
5858

5959
unsigned EpilogueVectorizationMinVF = 16;
60+
bool ShouldMaximizeScalableVectorBandwidth = true;
6061
uint8_t MaxInterleaveFactor = 2;
6162
uint8_t VectorInsertExtractBaseCost = 2;
6263
uint16_t CacheLineSize = 0;
@@ -251,6 +252,9 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
251252
unsigned getEpilogueVectorizationMinVF() const {
252253
return EpilogueVectorizationMinVF;
253254
}
255+
bool shouldMaximizeScalableVectorBandwidth() const {
256+
return ShouldMaximizeScalableVectorBandwidth;
257+
}
254258
unsigned getMaxInterleaveFactor() const { return MaxInterleaveFactor; }
255259
unsigned getVectorInsertExtractBaseCost() const;
256260
unsigned getCacheLineSize() const override { return CacheLineSize; }

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -379,13 +379,9 @@ bool AArch64TTIImpl::shouldMaximizeVectorBandwidth(
379379
if (K == TargetTransformInfo::RGK_FixedWidthVector && ST->isNeonAvailable())
380380
return true;
381381

382-
switch (ST->getProcFamily()) {
383-
case AArch64Subtarget::NeoverseN2:
384-
return false;
385-
default:
386-
return K == TargetTransformInfo::RGK_ScalableVector &&
387-
ST->isSVEorStreamingSVEAvailable();
388-
}
382+
return K == TargetTransformInfo::RGK_ScalableVector &&
383+
ST->isSVEorStreamingSVEAvailable() &&
384+
ST->shouldMaximizeScalableVectorBandwidth();
389385
}
390386

391387
/// Calculate the cost of materializing a 64-bit value. This helper

llvm/test/Transforms/LoopVectorize/AArch64/scalable-vectorization.ll

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,19 @@
11
; REQUIRES: asserts
22
; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -passes=loop-vectorize -S -debug-only=loop-vectorize --disable-output -scalable-vectorization=off < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_DISABLED
33
; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -passes=loop-vectorize -S -debug-only=loop-vectorize --disable-output -scalable-vectorization=on < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_ON
4-
; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -passes=loop-vectorize -S -debug-only=loop-vectorize --disable-output -vectorizer-maximize-bandwidth -scalable-vectorization=on < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_ON_MAXBW
4+
; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -passes=loop-vectorize -S -debug-only=loop-vectorize --disable-output -vectorizer-maximize-bandwidth=false -scalable-vectorization=on < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_ON_NOMAXBW
55

66
; Test that the MaxVF for the following loop, that has no dependence distances,
77
; is calculated as vscale x 4 (max legal SVE vector size) or vscale x 16
88
; (maximized bandwidth for i8 in the loop).
99
define void @test0(ptr %a, ptr %b, ptr %c) #0 {
1010
; CHECK: LV: Checking a loop in 'test0'
11-
; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4
12-
; CHECK_SCALABLE_ON: LV: Selecting VF: 16
11+
; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 16
12+
; CHECK_SCALABLE_ON: LV: Selecting VF: vscale x 16
1313
; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
1414
; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 16
15-
; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 16
16-
; CHECK_SCALABLE_ON_MAXBW: LV: Selecting VF: vscale x 16
15+
; CHECK_SCALABLE_ON_NOMAXBW: LV: Found feasible scalable VF = vscale x 4
16+
; CHECK_SCALABLE_ON_NOMAXBW: LV: Selecting VF: vscale x 4
1717
entry:
1818
br label %loop
1919

@@ -43,8 +43,8 @@ define void @test1(ptr %a, ptr %b) #0 {
4343
; CHECK_SCALABLE_ON: LV: Selecting VF: 16
4444
; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
4545
; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 16
46-
; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 4
47-
; CHECK_SCALABLE_ON_MAXBW: LV: Selecting VF: 16
46+
; CHECK_SCALABLE_ON_NOMAXBW: LV: Found feasible scalable VF = vscale x 4
47+
; CHECK_SCALABLE_ON_NOMAXBW: LV: Selecting VF: vscale x 4
4848
entry:
4949
br label %loop
5050

@@ -75,8 +75,8 @@ define void @test2(ptr %a, ptr %b) #0 {
7575
; CHECK_SCALABLE_ON: LV: Selecting VF: 16
7676
; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
7777
; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 16
78-
; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 2
79-
; CHECK_SCALABLE_ON_MAXBW: LV: Selecting VF: 16
78+
; CHECK_SCALABLE_ON_NOMAXBW: LV: Found feasible scalable VF = vscale x 2
79+
; CHECK_SCALABLE_ON_NOMAXBW: LV: Selecting VF: 4
8080
entry:
8181
br label %loop
8282

@@ -107,8 +107,8 @@ define void @test3(ptr %a, ptr %b) #0 {
107107
; CHECK_SCALABLE_ON: LV: Selecting VF: 16
108108
; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
109109
; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 16
110-
; CHECK_SCALABLE_ON_MAXBW: LV: Found feasible scalable VF = vscale x 1
111-
; CHECK_SCALABLE_ON_MAXBW: LV: Selecting VF: 16
110+
; CHECK_SCALABLE_ON_NOMAXBW: LV: Found feasible scalable VF = vscale x 1
111+
; CHECK_SCALABLE_ON_NOMAXBW: LV: Selecting VF: 4
112112
entry:
113113
br label %loop
114114

@@ -140,8 +140,8 @@ define void @test4(ptr %a, ptr %b) #0 {
140140
; CHECK_SCALABLE_ON: LV: Selecting VF: 4
141141
; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
142142
; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4
143-
; CHECK_SCALABLE_ON_MAXBW-NOT: LV: Found feasible scalable VF
144-
; CHECK_SCALABLE_ON_MAXBW: LV: Selecting VF: 4
143+
; CHECK_SCALABLE_ON_NOMAXBW-NOT: LV: Found feasible scalable VF
144+
; CHECK_SCALABLE_ON_NOMAXBW: LV: Selecting VF: 4
145145
entry:
146146
br label %loop
147147

0 commit comments

Comments
 (0)