Skip to content

Commit 658d9e5

Browse files
davemgreentru
authored andcommitted
[AArch64] Add some basic handling for bf16 constants.
This adds some basic handling for bf16 constants, attempting to treat them a lot like fp16 constants where it can. Zero immediates get lowered to FMOVH0, others either get lowered to FMOVWHr(MOVi32imm) or use FMOVHi if they can. Without fp16 they get expanded. This may not always be optimal, but fixes a gap in our lowering. See llvm/test/CodeGen/AArch64/f16-imm.ll for the equivalent fp16 test. Differential Revision: https://reviews.llvm.org/D156649 (cherry picked from commit 778fa4e)
1 parent e5f9e16 commit 658d9e5

File tree

5 files changed

+143
-4
lines changed

5 files changed

+143
-4
lines changed

llvm/lib/CodeGen/TargetLoweringBase.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -921,7 +921,7 @@ void TargetLoweringBase::initActions() {
921921
// Legal, in which case all fp constants are legal, or use isFPImmLegal()
922922
// to optimize expansions for certain constants.
923923
setOperationAction(ISD::ConstantFP,
924-
{MVT::f16, MVT::f32, MVT::f64, MVT::f80, MVT::f128},
924+
{MVT::bf16, MVT::f16, MVT::f32, MVT::f64, MVT::f80, MVT::f128},
925925
Expand);
926926

927927
// These library functions default to expand.

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1091,6 +1091,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
10911091

10921092
if (Subtarget->hasFullFP16()) {
10931093
setOperationAction(ISD::ConstantFP, MVT::f16, Legal);
1094+
setOperationAction(ISD::ConstantFP, MVT::bf16, Legal);
10941095

10951096
setOperationAction(ISD::SINT_TO_FP, MVT::v8i8, Custom);
10961097
setOperationAction(ISD::UINT_TO_FP, MVT::v8i8, Custom);
@@ -9757,7 +9758,7 @@ bool AArch64TargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT,
97579758
IsLegal = AArch64_AM::getFP64Imm(ImmInt) != -1 || Imm.isPosZero();
97589759
else if (VT == MVT::f32)
97599760
IsLegal = AArch64_AM::getFP32Imm(ImmInt) != -1 || Imm.isPosZero();
9760-
else if (VT == MVT::f16)
9761+
else if (VT == MVT::f16 || VT == MVT::bf16)
97619762
IsLegal =
97629763
(Subtarget->hasFullFP16() && AArch64_AM::getFP16Imm(ImmInt) != -1) ||
97639764
Imm.isPosZero();

llvm/lib/Target/AArch64/AArch64InstrFormats.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1306,6 +1306,11 @@ def fpimm16 : Operand<f16>,
13061306
let PrintMethod = "printFPImmOperand";
13071307
}
13081308

1309+
def fpimmbf16 : Operand<bf16>,
1310+
FPImmLeaf<bf16, [{
1311+
return AArch64_AM::getFP16Imm(Imm) != -1;
1312+
}], fpimm16XForm>;
1313+
13091314
def fpimm32 : Operand<f32>,
13101315
FPImmLeaf<f32, [{
13111316
return AArch64_AM::getFP32Imm(Imm) != -1;

llvm/lib/Target/AArch64/AArch64InstrInfo.td

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4355,16 +4355,23 @@ def FMOVS0 : Pseudo<(outs FPR32:$Rd), (ins), [(set f32:$Rd, (fpimm0))]>,
43554355
def FMOVD0 : Pseudo<(outs FPR64:$Rd), (ins), [(set f64:$Rd, (fpimm0))]>,
43564356
Sched<[WriteF]>;
43574357
}
4358+
43584359
// Similarly add aliases
43594360
def : InstAlias<"fmov $Rd, #0.0", (FMOVWHr FPR16:$Rd, WZR), 0>,
43604361
Requires<[HasFullFP16]>;
43614362
def : InstAlias<"fmov $Rd, #0.0", (FMOVWSr FPR32:$Rd, WZR), 0>;
43624363
def : InstAlias<"fmov $Rd, #0.0", (FMOVXDr FPR64:$Rd, XZR), 0>;
43634364

4364-
// Pattern for FP16 immediates
4365+
def : Pat<(bf16 fpimm0),
4366+
(FMOVH0)>;
4367+
4368+
// Pattern for FP16 and BF16 immediates
43654369
let Predicates = [HasFullFP16] in {
43664370
def : Pat<(f16 fpimm:$in),
4367-
(FMOVWHr (MOVi32imm (bitcast_fpimm_to_i32 f16:$in)))>;
4371+
(FMOVWHr (MOVi32imm (bitcast_fpimm_to_i32 f16:$in)))>;
4372+
4373+
def : Pat<(bf16 fpimm:$in),
4374+
(FMOVWHr (MOVi32imm (bitcast_fpimm_to_i32 bf16:$in)))>;
43684375
}
43694376

43704377
//===----------------------------------------------------------------------===//
@@ -4617,6 +4624,11 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1 in {
46174624
defm FMOV : FPMoveImmediate<"fmov">;
46184625
}
46194626

4627+
let Predicates = [HasFullFP16] in {
4628+
def : Pat<(bf16 fpimmbf16:$in),
4629+
(FMOVHi (fpimm16XForm bf16:$in))>;
4630+
}
4631+
46204632
//===----------------------------------------------------------------------===//
46214633
// Advanced SIMD two vector instructions.
46224634
//===----------------------------------------------------------------------===//

llvm/test/CodeGen/AArch64/bf16-imm.ll

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,121 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc < %s -mtriple=aarch64 -mattr=+fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-FP16
3+
; RUN: llc < %s -mtriple=aarch64 -mattr=-fullfp16 | FileCheck %s --check-prefixes=CHECK,CHECK-NOFP16
4+
5+
define bfloat @Const0() {
6+
; CHECK-LABEL: Const0:
7+
; CHECK: // %bb.0: // %entry
8+
; CHECK-NEXT: movi d0, #0000000000000000
9+
; CHECK-NEXT: ret
10+
entry:
11+
ret bfloat 0xR0000
12+
}
13+
14+
define bfloat @Const1() {
15+
; CHECK-FP16-LABEL: Const1:
16+
; CHECK-FP16: // %bb.0: // %entry
17+
; CHECK-FP16-NEXT: fmov h0, #1.00000000
18+
; CHECK-FP16-NEXT: ret
19+
;
20+
; CHECK-NOFP16-LABEL: Const1:
21+
; CHECK-NOFP16: // %bb.0: // %entry
22+
; CHECK-NOFP16-NEXT: adrp x8, .LCPI1_0
23+
; CHECK-NOFP16-NEXT: ldr h0, [x8, :lo12:.LCPI1_0]
24+
; CHECK-NOFP16-NEXT: ret
25+
entry:
26+
ret bfloat 0xR3C00
27+
}
28+
29+
define bfloat @Const2() {
30+
; CHECK-FP16-LABEL: Const2:
31+
; CHECK-FP16: // %bb.0: // %entry
32+
; CHECK-FP16-NEXT: fmov h0, #0.12500000
33+
; CHECK-FP16-NEXT: ret
34+
;
35+
; CHECK-NOFP16-LABEL: Const2:
36+
; CHECK-NOFP16: // %bb.0: // %entry
37+
; CHECK-NOFP16-NEXT: adrp x8, .LCPI2_0
38+
; CHECK-NOFP16-NEXT: ldr h0, [x8, :lo12:.LCPI2_0]
39+
; CHECK-NOFP16-NEXT: ret
40+
entry:
41+
ret bfloat 0xR3000
42+
}
43+
44+
define bfloat @Const3() {
45+
; CHECK-FP16-LABEL: Const3:
46+
; CHECK-FP16: // %bb.0: // %entry
47+
; CHECK-FP16-NEXT: fmov h0, #30.00000000
48+
; CHECK-FP16-NEXT: ret
49+
;
50+
; CHECK-NOFP16-LABEL: Const3:
51+
; CHECK-NOFP16: // %bb.0: // %entry
52+
; CHECK-NOFP16-NEXT: adrp x8, .LCPI3_0
53+
; CHECK-NOFP16-NEXT: ldr h0, [x8, :lo12:.LCPI3_0]
54+
; CHECK-NOFP16-NEXT: ret
55+
entry:
56+
ret bfloat 0xR4F80
57+
}
58+
59+
define bfloat @Const4() {
60+
; CHECK-FP16-LABEL: Const4:
61+
; CHECK-FP16: // %bb.0: // %entry
62+
; CHECK-FP16-NEXT: fmov h0, #31.00000000
63+
; CHECK-FP16-NEXT: ret
64+
;
65+
; CHECK-NOFP16-LABEL: Const4:
66+
; CHECK-NOFP16: // %bb.0: // %entry
67+
; CHECK-NOFP16-NEXT: adrp x8, .LCPI4_0
68+
; CHECK-NOFP16-NEXT: ldr h0, [x8, :lo12:.LCPI4_0]
69+
; CHECK-NOFP16-NEXT: ret
70+
entry:
71+
ret bfloat 0xR4FC0
72+
}
73+
74+
define bfloat @Const5() {
75+
; CHECK-FP16-LABEL: Const5:
76+
; CHECK-FP16: // %bb.0: // %entry
77+
; CHECK-FP16-NEXT: mov w8, #12272 // =0x2ff0
78+
; CHECK-FP16-NEXT: fmov h0, w8
79+
; CHECK-FP16-NEXT: ret
80+
;
81+
; CHECK-NOFP16-LABEL: Const5:
82+
; CHECK-NOFP16: // %bb.0: // %entry
83+
; CHECK-NOFP16-NEXT: adrp x8, .LCPI5_0
84+
; CHECK-NOFP16-NEXT: ldr h0, [x8, :lo12:.LCPI5_0]
85+
; CHECK-NOFP16-NEXT: ret
86+
entry:
87+
ret bfloat 0xR2FF0
88+
}
89+
90+
define bfloat @Const6() {
91+
; CHECK-FP16-LABEL: Const6:
92+
; CHECK-FP16: // %bb.0: // %entry
93+
; CHECK-FP16-NEXT: mov w8, #20417 // =0x4fc1
94+
; CHECK-FP16-NEXT: fmov h0, w8
95+
; CHECK-FP16-NEXT: ret
96+
;
97+
; CHECK-NOFP16-LABEL: Const6:
98+
; CHECK-NOFP16: // %bb.0: // %entry
99+
; CHECK-NOFP16-NEXT: adrp x8, .LCPI6_0
100+
; CHECK-NOFP16-NEXT: ldr h0, [x8, :lo12:.LCPI6_0]
101+
; CHECK-NOFP16-NEXT: ret
102+
entry:
103+
ret bfloat 0xR4FC1
104+
}
105+
106+
define bfloat @Const7() {
107+
; CHECK-FP16-LABEL: Const7:
108+
; CHECK-FP16: // %bb.0: // %entry
109+
; CHECK-FP16-NEXT: mov w8, #20480 // =0x5000
110+
; CHECK-FP16-NEXT: fmov h0, w8
111+
; CHECK-FP16-NEXT: ret
112+
;
113+
; CHECK-NOFP16-LABEL: Const7:
114+
; CHECK-NOFP16: // %bb.0: // %entry
115+
; CHECK-NOFP16-NEXT: adrp x8, .LCPI7_0
116+
; CHECK-NOFP16-NEXT: ldr h0, [x8, :lo12:.LCPI7_0]
117+
; CHECK-NOFP16-NEXT: ret
118+
entry:
119+
ret bfloat 0xR5000
120+
}
121+

0 commit comments

Comments
 (0)