Skip to content

Commit 26e42c7

Browse files
authored
[CostModel][AArch64] Remove promotion cost for SVE bfloat arith supported with +sve-b16b16 (#167717)
The resulting costs are the same as the standard SVE costs for `half` types.
1 parent 42a1184 commit 26e42c7

File tree

3 files changed

+45
-22
lines changed

3 files changed

+45
-22
lines changed

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.cpp

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4167,12 +4167,15 @@ InstructionCost AArch64TTIImpl::getScalarizationOverhead(
41674167

41684168
std::optional<InstructionCost> AArch64TTIImpl::getFP16BF16PromoteCost(
41694169
Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info,
4170-
TTI::OperandValueInfo Op2Info, bool IncludeTrunc,
4170+
TTI::OperandValueInfo Op2Info, bool IncludeTrunc, bool CanUseSVE,
41714171
std::function<InstructionCost(Type *)> InstCost) const {
41724172
if (!Ty->getScalarType()->isHalfTy() && !Ty->getScalarType()->isBFloatTy())
41734173
return std::nullopt;
41744174
if (Ty->getScalarType()->isHalfTy() && ST->hasFullFP16())
41754175
return std::nullopt;
4176+
if (CanUseSVE && Ty->isScalableTy() && ST->hasSVEB16B16() &&
4177+
ST->isNonStreamingSVEorSME2Available())
4178+
return std::nullopt;
41764179

41774180
Type *PromotedTy = Ty->getWithNewType(Type::getFloatTy(Ty->getContext()));
41784181
InstructionCost Cost = getCastInstrCost(Instruction::FPExt, PromotedTy, Ty,
@@ -4214,6 +4217,8 @@ InstructionCost AArch64TTIImpl::getArithmeticInstrCost(
42144217
ISD == ISD::FDIV || ISD == ISD::FREM)
42154218
if (auto PromotedCost = getFP16BF16PromoteCost(
42164219
Ty, CostKind, Op1Info, Op2Info, /*IncludeTrunc=*/true,
4220+
// There is not native support for fdiv/frem even with +sve-b16b16.
4221+
/*CanUseSVE=*/ISD != ISD::FDIV && ISD != ISD::FREM,
42174222
[&](Type *PromotedTy) {
42184223
return getArithmeticInstrCost(Opcode, PromotedTy, CostKind,
42194224
Op1Info, Op2Info);
@@ -4628,7 +4633,8 @@ InstructionCost AArch64TTIImpl::getCmpSelInstrCost(
46284633
if (Opcode == Instruction::FCmp) {
46294634
if (auto PromotedCost = getFP16BF16PromoteCost(
46304635
ValTy, CostKind, Op1Info, Op2Info, /*IncludeTrunc=*/false,
4631-
[&](Type *PromotedTy) {
4636+
// TODO: Consider costing SVE FCMPs.
4637+
/*CanUseSVE=*/false, [&](Type *PromotedTy) {
46324638
InstructionCost Cost =
46334639
getCmpSelInstrCost(Opcode, PromotedTy, CondTy, VecPred,
46344640
CostKind, Op1Info, Op2Info);

llvm/lib/Target/AArch64/AArch64TargetTransformInfo.h

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -456,11 +456,10 @@ class AArch64TTIImpl final : public BasicTTIImplBase<AArch64TTIImpl> {
456456

457457
/// FP16 and BF16 operations are lowered to fptrunc(op(fpext, fpext) if the
458458
/// architecture features are not present.
459-
std::optional<InstructionCost>
460-
getFP16BF16PromoteCost(Type *Ty, TTI::TargetCostKind CostKind,
461-
TTI::OperandValueInfo Op1Info,
462-
TTI::OperandValueInfo Op2Info, bool IncludeTrunc,
463-
std::function<InstructionCost(Type *)> InstCost) const;
459+
std::optional<InstructionCost> getFP16BF16PromoteCost(
460+
Type *Ty, TTI::TargetCostKind CostKind, TTI::OperandValueInfo Op1Info,
461+
TTI::OperandValueInfo Op2Info, bool IncludeTrunc, bool CanUseSVE,
462+
std::function<InstructionCost(Type *)> InstCost) const;
464463

465464
InstructionCost
466465
getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,

llvm/test/Analysis/CostModel/AArch64/sve-arith-fp.ll

Lines changed: 33 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,17 @@ define void @fadd() {
3333
}
3434

3535
define void @fadd_bf16() {
36-
; CHECK-LABEL: 'fadd_bf16'
37-
; CHECK-NEXT: Cost Model: Found costs of RThru:11 CodeSize:1 Lat:3 SizeLat:1 for: %NXV4BF16 = fadd <vscale x 4 x bfloat> poison, poison
38-
; CHECK-NEXT: Cost Model: Found costs of RThru:27 CodeSize:1 Lat:3 SizeLat:1 for: %NXV8BF16 = fadd <vscale x 8 x bfloat> poison, poison
39-
; CHECK-NEXT: Cost Model: Found costs of RThru:54 CodeSize:1 Lat:3 SizeLat:1 for: %NXV16BF16 = fadd <vscale x 16 x bfloat> poison, poison
40-
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
36+
; CHECK-BASE-LABEL: 'fadd_bf16'
37+
; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:11 CodeSize:1 Lat:3 SizeLat:1 for: %NXV4BF16 = fadd <vscale x 4 x bfloat> poison, poison
38+
; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:27 CodeSize:1 Lat:3 SizeLat:1 for: %NXV8BF16 = fadd <vscale x 8 x bfloat> poison, poison
39+
; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:54 CodeSize:1 Lat:3 SizeLat:1 for: %NXV16BF16 = fadd <vscale x 16 x bfloat> poison, poison
40+
; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
41+
;
42+
; CHECK-BF16-LABEL: 'fadd_bf16'
43+
; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %NXV4BF16 = fadd <vscale x 4 x bfloat> poison, poison
44+
; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %NXV8BF16 = fadd <vscale x 8 x bfloat> poison, poison
45+
; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %NXV16BF16 = fadd <vscale x 16 x bfloat> poison, poison
46+
; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
4147
;
4248
%NXV4BF16 = fadd <vscale x 4 x bfloat> poison, poison
4349
%NXV8BF16 = fadd <vscale x 8 x bfloat> poison, poison
@@ -76,11 +82,17 @@ define void @fsub() {
7682
}
7783

7884
define void @fsub_bf16() {
79-
; CHECK-LABEL: 'fsub_bf16'
80-
; CHECK-NEXT: Cost Model: Found costs of RThru:11 CodeSize:1 Lat:3 SizeLat:1 for: %NXV4BF16 = fsub <vscale x 4 x bfloat> poison, poison
81-
; CHECK-NEXT: Cost Model: Found costs of RThru:27 CodeSize:1 Lat:3 SizeLat:1 for: %NXV8BF16 = fsub <vscale x 8 x bfloat> poison, poison
82-
; CHECK-NEXT: Cost Model: Found costs of RThru:54 CodeSize:1 Lat:3 SizeLat:1 for: %NXV16BF16 = fsub <vscale x 16 x bfloat> poison, poison
83-
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
85+
; CHECK-BASE-LABEL: 'fsub_bf16'
86+
; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:11 CodeSize:1 Lat:3 SizeLat:1 for: %NXV4BF16 = fsub <vscale x 4 x bfloat> poison, poison
87+
; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:27 CodeSize:1 Lat:3 SizeLat:1 for: %NXV8BF16 = fsub <vscale x 8 x bfloat> poison, poison
88+
; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:54 CodeSize:1 Lat:3 SizeLat:1 for: %NXV16BF16 = fsub <vscale x 16 x bfloat> poison, poison
89+
; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
90+
;
91+
; CHECK-BF16-LABEL: 'fsub_bf16'
92+
; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %NXV4BF16 = fsub <vscale x 4 x bfloat> poison, poison
93+
; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:1 CodeSize:1 Lat:3 SizeLat:1 for: %NXV8BF16 = fsub <vscale x 8 x bfloat> poison, poison
94+
; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %NXV16BF16 = fsub <vscale x 16 x bfloat> poison, poison
95+
; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
8496
;
8597
%NXV4BF16 = fsub <vscale x 4 x bfloat> poison, poison
8698
%NXV8BF16 = fsub <vscale x 8 x bfloat> poison, poison
@@ -160,11 +172,17 @@ define void @fmul() {
160172
}
161173

162174
define void @fmul_bf16() {
163-
; CHECK-LABEL: 'fmul_bf16'
164-
; CHECK-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:3 SizeLat:1 for: %NXV4BF16 = fmul <vscale x 4 x bfloat> poison, poison
165-
; CHECK-NEXT: Cost Model: Found costs of RThru:29 CodeSize:1 Lat:3 SizeLat:1 for: %NXV8BF16 = fmul <vscale x 8 x bfloat> poison, poison
166-
; CHECK-NEXT: Cost Model: Found costs of RThru:58 CodeSize:1 Lat:3 SizeLat:1 for: %NXV16BF16 = fmul <vscale x 16 x bfloat> poison, poison
167-
; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
175+
; CHECK-BASE-LABEL: 'fmul_bf16'
176+
; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:12 CodeSize:1 Lat:3 SizeLat:1 for: %NXV4BF16 = fmul <vscale x 4 x bfloat> poison, poison
177+
; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:29 CodeSize:1 Lat:3 SizeLat:1 for: %NXV8BF16 = fmul <vscale x 8 x bfloat> poison, poison
178+
; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:58 CodeSize:1 Lat:3 SizeLat:1 for: %NXV16BF16 = fmul <vscale x 16 x bfloat> poison, poison
179+
; CHECK-BASE-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
180+
;
181+
; CHECK-BF16-LABEL: 'fmul_bf16'
182+
; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %NXV4BF16 = fmul <vscale x 4 x bfloat> poison, poison
183+
; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:2 CodeSize:1 Lat:3 SizeLat:1 for: %NXV8BF16 = fmul <vscale x 8 x bfloat> poison, poison
184+
; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:4 CodeSize:1 Lat:3 SizeLat:1 for: %NXV16BF16 = fmul <vscale x 16 x bfloat> poison, poison
185+
; CHECK-BF16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
168186
;
169187
%NXV4BF16 = fmul <vscale x 4 x bfloat> poison, poison
170188
%NXV8BF16 = fmul <vscale x 8 x bfloat> poison, poison

0 commit comments

Comments
 (0)