Skip to content

Commit cbcf531

Browse files
authored
[RISCV] Expand bf16 FNEG/FABS/FCOPYSIGN (#108245)
The motivation for this is to start promoting bf16 ops to f32 so that we can mark bf16 as a supported type in RISCVTTIImpl::isElementTypeLegalForScalableVector and scalably-vectorize it. This starts with expanding the nodes that can't be promoted to f32 due to canonicalizing NaNs, similarly to f16 in #106652.
1 parent 2e18f63 commit cbcf531

File tree

4 files changed

+236
-0
lines changed

4 files changed

+236
-0
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1118,6 +1118,11 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
11181118
if (Subtarget.hasStdExtZfbfmin())
11191119
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
11201120
setOperationAction({ISD::LOAD, ISD::STORE}, VT, Custom);
1121+
1122+
setOperationAction(ISD::FNEG, VT, Expand);
1123+
setOperationAction(ISD::FABS, VT, Expand);
1124+
setOperationAction(ISD::FCOPYSIGN, VT, Expand);
1125+
11211126
// TODO: Promote to fp32.
11221127
}
11231128
}
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfbfmin -verify-machineinstrs | FileCheck %s
3+
; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfbfmin -verify-machineinstrs | FileCheck %s
4+
5+
define <vscale x 1 x bfloat> @nxv1bf16(<vscale x 1 x bfloat> %v) {
6+
; CHECK-LABEL: nxv1bf16:
7+
; CHECK: # %bb.0:
8+
; CHECK-NEXT: lui a0, 8
9+
; CHECK-NEXT: addi a0, a0, -1
10+
; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
11+
; CHECK-NEXT: vand.vx v8, v8, a0
12+
; CHECK-NEXT: ret
13+
%r = call <vscale x 1 x bfloat> @llvm.fabs.nxv1bf16(<vscale x 1 x bfloat> %v)
14+
ret <vscale x 1 x bfloat> %r
15+
}
16+
17+
define <vscale x 2 x bfloat> @nxv2bf16(<vscale x 2 x bfloat> %v) {
18+
; CHECK-LABEL: nxv2bf16:
19+
; CHECK: # %bb.0:
20+
; CHECK-NEXT: lui a0, 8
21+
; CHECK-NEXT: addi a0, a0, -1
22+
; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
23+
; CHECK-NEXT: vand.vx v8, v8, a0
24+
; CHECK-NEXT: ret
25+
%r = call <vscale x 2 x bfloat> @llvm.fabs.nxv2bf16(<vscale x 2 x bfloat> %v)
26+
ret <vscale x 2 x bfloat> %r
27+
}
28+
29+
define <vscale x 4 x bfloat> @nxv4bf16(<vscale x 4 x bfloat> %v) {
30+
; CHECK-LABEL: nxv4bf16:
31+
; CHECK: # %bb.0:
32+
; CHECK-NEXT: lui a0, 8
33+
; CHECK-NEXT: addi a0, a0, -1
34+
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
35+
; CHECK-NEXT: vand.vx v8, v8, a0
36+
; CHECK-NEXT: ret
37+
%r = call <vscale x 4 x bfloat> @llvm.fabs.nxv4bf16(<vscale x 4 x bfloat> %v)
38+
ret <vscale x 4 x bfloat> %r
39+
}
40+
41+
define <vscale x 8 x bfloat> @nxv8bf16(<vscale x 8 x bfloat> %v) {
42+
; CHECK-LABEL: nxv8bf16:
43+
; CHECK: # %bb.0:
44+
; CHECK-NEXT: lui a0, 8
45+
; CHECK-NEXT: addi a0, a0, -1
46+
; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
47+
; CHECK-NEXT: vand.vx v8, v8, a0
48+
; CHECK-NEXT: ret
49+
%r = call <vscale x 8 x bfloat> @llvm.fabs.nxv8bf16(<vscale x 8 x bfloat> %v)
50+
ret <vscale x 8 x bfloat> %r
51+
}
52+
53+
define <vscale x 16 x bfloat> @nxv16bf16(<vscale x 16 x bfloat> %v) {
54+
; CHECK-LABEL: nxv16bf16:
55+
; CHECK: # %bb.0:
56+
; CHECK-NEXT: lui a0, 8
57+
; CHECK-NEXT: addi a0, a0, -1
58+
; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
59+
; CHECK-NEXT: vand.vx v8, v8, a0
60+
; CHECK-NEXT: ret
61+
%r = call <vscale x 16 x bfloat> @llvm.fabs.nxv16bf16(<vscale x 16 x bfloat> %v)
62+
ret <vscale x 16 x bfloat> %r
63+
}
64+
65+
define <vscale x 32 x bfloat> @nxv32bf16(<vscale x 32 x bfloat> %v) {
66+
; CHECK-LABEL: nxv32bf16:
67+
; CHECK: # %bb.0:
68+
; CHECK-NEXT: lui a0, 8
69+
; CHECK-NEXT: addi a0, a0, -1
70+
; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
71+
; CHECK-NEXT: vand.vx v8, v8, a0
72+
; CHECK-NEXT: ret
73+
%r = call <vscale x 32 x bfloat> @llvm.fabs.nxv32bf16(<vscale x 32 x bfloat> %v)
74+
ret <vscale x 32 x bfloat> %r
75+
}
Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfbfmin -verify-machineinstrs | FileCheck %s
3+
; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfbfmin -verify-machineinstrs | FileCheck %s
4+
5+
define <vscale x 1 x bfloat> @nxv1bf16(<vscale x 1 x bfloat> %vm, <vscale x 1 x bfloat> %vs) {
6+
; CHECK-LABEL: nxv1bf16:
7+
; CHECK: # %bb.0:
8+
; CHECK-NEXT: lui a0, 8
9+
; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
10+
; CHECK-NEXT: vand.vx v9, v9, a0
11+
; CHECK-NEXT: addi a0, a0, -1
12+
; CHECK-NEXT: vand.vx v8, v8, a0
13+
; CHECK-NEXT: vor.vv v8, v8, v9
14+
; CHECK-NEXT: ret
15+
%r = call <vscale x 1 x bfloat> @llvm.copysign.nxv1bf16(<vscale x 1 x bfloat> %vm, <vscale x 1 x bfloat> %vs)
16+
ret <vscale x 1 x bfloat> %r
17+
}
18+
19+
define <vscale x 2 x bfloat> @nxv2bf16(<vscale x 2 x bfloat> %vm, <vscale x 2 x bfloat> %vs) {
20+
; CHECK-LABEL: nxv2bf16:
21+
; CHECK: # %bb.0:
22+
; CHECK-NEXT: lui a0, 8
23+
; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
24+
; CHECK-NEXT: vand.vx v9, v9, a0
25+
; CHECK-NEXT: addi a0, a0, -1
26+
; CHECK-NEXT: vand.vx v8, v8, a0
27+
; CHECK-NEXT: vor.vv v8, v8, v9
28+
; CHECK-NEXT: ret
29+
%r = call <vscale x 2 x bfloat> @llvm.copysign.nxv2bf16(<vscale x 2 x bfloat> %vm, <vscale x 2 x bfloat> %vs)
30+
ret <vscale x 2 x bfloat> %r
31+
}
32+
33+
define <vscale x 4 x bfloat> @nxv4bf16(<vscale x 4 x bfloat> %vm, <vscale x 4 x bfloat> %vs) {
34+
; CHECK-LABEL: nxv4bf16:
35+
; CHECK: # %bb.0:
36+
; CHECK-NEXT: lui a0, 8
37+
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
38+
; CHECK-NEXT: vand.vx v9, v9, a0
39+
; CHECK-NEXT: addi a0, a0, -1
40+
; CHECK-NEXT: vand.vx v8, v8, a0
41+
; CHECK-NEXT: vor.vv v8, v8, v9
42+
; CHECK-NEXT: ret
43+
%r = call <vscale x 4 x bfloat> @llvm.copysign.nxv4bf16(<vscale x 4 x bfloat> %vm, <vscale x 4 x bfloat> %vs)
44+
ret <vscale x 4 x bfloat> %r
45+
}
46+
47+
define <vscale x 8 x bfloat> @nxv8bf16(<vscale x 8 x bfloat> %vm, <vscale x 8 x bfloat> %vs) {
48+
; CHECK-LABEL: nxv8bf16:
49+
; CHECK: # %bb.0:
50+
; CHECK-NEXT: lui a0, 8
51+
; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
52+
; CHECK-NEXT: vand.vx v10, v10, a0
53+
; CHECK-NEXT: addi a0, a0, -1
54+
; CHECK-NEXT: vand.vx v8, v8, a0
55+
; CHECK-NEXT: vor.vv v8, v8, v10
56+
; CHECK-NEXT: ret
57+
%r = call <vscale x 8 x bfloat> @llvm.copysign.nxv8bf16(<vscale x 8 x bfloat> %vm, <vscale x 8 x bfloat> %vs)
58+
ret <vscale x 8 x bfloat> %r
59+
}
60+
61+
define <vscale x 16 x bfloat> @nxv16bf16(<vscale x 16 x bfloat> %vm, <vscale x 16 x bfloat> %vs) {
62+
; CHECK-LABEL: nxv16bf16:
63+
; CHECK: # %bb.0:
64+
; CHECK-NEXT: lui a0, 8
65+
; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
66+
; CHECK-NEXT: vand.vx v12, v12, a0
67+
; CHECK-NEXT: addi a0, a0, -1
68+
; CHECK-NEXT: vand.vx v8, v8, a0
69+
; CHECK-NEXT: vor.vv v8, v8, v12
70+
; CHECK-NEXT: ret
71+
%r = call <vscale x 16 x bfloat> @llvm.copysign.nxv16bf16(<vscale x 16 x bfloat> %vm, <vscale x 16 x bfloat> %vs)
72+
ret <vscale x 16 x bfloat> %r
73+
}
74+
75+
define <vscale x 32 x bfloat> @nxv32bf32(<vscale x 32 x bfloat> %vm, <vscale x 32 x bfloat> %vs) {
76+
; CHECK-LABEL: nxv32bf32:
77+
; CHECK: # %bb.0:
78+
; CHECK-NEXT: lui a0, 8
79+
; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
80+
; CHECK-NEXT: vand.vx v16, v16, a0
81+
; CHECK-NEXT: addi a0, a0, -1
82+
; CHECK-NEXT: vand.vx v8, v8, a0
83+
; CHECK-NEXT: vor.vv v8, v8, v16
84+
; CHECK-NEXT: ret
85+
%r = call <vscale x 32 x bfloat> @llvm.copysign.nxv32bf32(<vscale x 32 x bfloat> %vm, <vscale x 32 x bfloat> %vs)
86+
ret <vscale x 32 x bfloat> %r
87+
}
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: llc < %s -mtriple=riscv32 -mattr=+v,+zvfbfmin -verify-machineinstrs | FileCheck %s
3+
; RUN: llc < %s -mtriple=riscv64 -mattr=+v,+zvfbfmin -verify-machineinstrs | FileCheck %s
4+
5+
define <vscale x 1 x bfloat> @nxv1bf16(<vscale x 1 x bfloat> %va) {
6+
; CHECK-LABEL: nxv1bf16:
7+
; CHECK: # %bb.0:
8+
; CHECK-NEXT: lui a0, 8
9+
; CHECK-NEXT: vsetvli a1, zero, e16, mf4, ta, ma
10+
; CHECK-NEXT: vxor.vx v8, v8, a0
11+
; CHECK-NEXT: ret
12+
%vb = fneg <vscale x 1 x bfloat> %va
13+
ret <vscale x 1 x bfloat> %vb
14+
}
15+
16+
define <vscale x 2 x bfloat> @nxv2bf16(<vscale x 2 x bfloat> %va) {
17+
; CHECK-LABEL: nxv2bf16:
18+
; CHECK: # %bb.0:
19+
; CHECK-NEXT: lui a0, 8
20+
; CHECK-NEXT: vsetvli a1, zero, e16, mf2, ta, ma
21+
; CHECK-NEXT: vxor.vx v8, v8, a0
22+
; CHECK-NEXT: ret
23+
%vb = fneg <vscale x 2 x bfloat> %va
24+
ret <vscale x 2 x bfloat> %vb
25+
}
26+
27+
define <vscale x 4 x bfloat> @nxv4bf16(<vscale x 4 x bfloat> %va) {
28+
; CHECK-LABEL: nxv4bf16:
29+
; CHECK: # %bb.0:
30+
; CHECK-NEXT: lui a0, 8
31+
; CHECK-NEXT: vsetvli a1, zero, e16, m1, ta, ma
32+
; CHECK-NEXT: vxor.vx v8, v8, a0
33+
; CHECK-NEXT: ret
34+
%vb = fneg <vscale x 4 x bfloat> %va
35+
ret <vscale x 4 x bfloat> %vb
36+
}
37+
38+
define <vscale x 8 x bfloat> @nxv8bf16(<vscale x 8 x bfloat> %va) {
39+
; CHECK-LABEL: nxv8bf16:
40+
; CHECK: # %bb.0:
41+
; CHECK-NEXT: lui a0, 8
42+
; CHECK-NEXT: vsetvli a1, zero, e16, m2, ta, ma
43+
; CHECK-NEXT: vxor.vx v8, v8, a0
44+
; CHECK-NEXT: ret
45+
%vb = fneg <vscale x 8 x bfloat> %va
46+
ret <vscale x 8 x bfloat> %vb
47+
}
48+
49+
define <vscale x 16 x bfloat> @nxv16bf16(<vscale x 16 x bfloat> %va) {
50+
; CHECK-LABEL: nxv16bf16:
51+
; CHECK: # %bb.0:
52+
; CHECK-NEXT: lui a0, 8
53+
; CHECK-NEXT: vsetvli a1, zero, e16, m4, ta, ma
54+
; CHECK-NEXT: vxor.vx v8, v8, a0
55+
; CHECK-NEXT: ret
56+
%vb = fneg <vscale x 16 x bfloat> %va
57+
ret <vscale x 16 x bfloat> %vb
58+
}
59+
60+
define <vscale x 32 x bfloat> @nxv32bf16(<vscale x 32 x bfloat> %va) {
61+
; CHECK-LABEL: nxv32bf16:
62+
; CHECK: # %bb.0:
63+
; CHECK-NEXT: lui a0, 8
64+
; CHECK-NEXT: vsetvli a1, zero, e16, m8, ta, ma
65+
; CHECK-NEXT: vxor.vx v8, v8, a0
66+
; CHECK-NEXT: ret
67+
%vb = fneg <vscale x 32 x bfloat> %va
68+
ret <vscale x 32 x bfloat> %vb
69+
}

0 commit comments

Comments
 (0)