Skip to content

Commit 2319dde

Browse files
committed
Add tests for bf16/zvfbfmin
1 parent e43532f commit 2319dde

File tree

2 files changed

+303
-0
lines changed

2 files changed

+303
-0
lines changed
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \
3+
; RUN: -verify-machineinstrs < %s | FileCheck %s
4+
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \
5+
; RUN: -verify-machineinstrs < %s | FileCheck %s
6+
7+
define bfloat @vreduce_fmin_nxv4f16(<vscale x 4 x bfloat> %val) {
8+
; CHECK-LABEL: vreduce_fmin_nxv4f16:
9+
; CHECK: # %bb.0:
10+
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
11+
; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
12+
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
13+
; CHECK-NEXT: vfredmin.vs v8, v10, v10
14+
; CHECK-NEXT: vfmv.f.s fa5, v8
15+
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
16+
; CHECK-NEXT: ret
17+
%s = call bfloat @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x bfloat> %val)
18+
ret bfloat %s
19+
}
20+
21+
define bfloat @vreduce_fmax_nxv4f16(<vscale x 4 x bfloat> %val) {
22+
; CHECK-LABEL: vreduce_fmax_nxv4f16:
23+
; CHECK: # %bb.0:
24+
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
25+
; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
26+
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
27+
; CHECK-NEXT: vfredmax.vs v8, v10, v10
28+
; CHECK-NEXT: vfmv.f.s fa5, v8
29+
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
30+
; CHECK-NEXT: ret
31+
%s = call bfloat @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x bfloat> %val)
32+
ret bfloat %s
33+
}
34+
35+
define bfloat @vreduce_fmin_nnan_nxv4f16(<vscale x 4 x bfloat> %val) {
36+
; CHECK-LABEL: vreduce_fmin_nnan_nxv4f16:
37+
; CHECK: # %bb.0:
38+
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
39+
; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
40+
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
41+
; CHECK-NEXT: vfredmin.vs v8, v10, v10
42+
; CHECK-NEXT: vfmv.f.s fa5, v8
43+
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
44+
; CHECK-NEXT: ret
45+
%s = call nnan bfloat @llvm.vector.reduce.fmin.nxv4f16(<vscale x 4 x bfloat> %val)
46+
ret bfloat %s
47+
}
48+
49+
define bfloat @vreduce_fmax_nnan_nxv4f16(<vscale x 4 x bfloat> %val) {
50+
; CHECK-LABEL: vreduce_fmax_nnan_nxv4f16:
51+
; CHECK: # %bb.0:
52+
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
53+
; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
54+
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
55+
; CHECK-NEXT: vfredmax.vs v8, v10, v10
56+
; CHECK-NEXT: vfmv.f.s fa5, v8
57+
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
58+
; CHECK-NEXT: ret
59+
%s = call nnan bfloat @llvm.vector.reduce.fmax.nxv4f16(<vscale x 4 x bfloat> %val)
60+
ret bfloat %s
61+
}
62+
63+
define bfloat @vreduce_fminimum_nxv4f16(<vscale x 4 x bfloat> %val) {
64+
; CHECK-LABEL: vreduce_fminimum_nxv4f16:
65+
; CHECK: # %bb.0:
66+
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
67+
; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
68+
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
69+
; CHECK-NEXT: vmfne.vv v8, v10, v10
70+
; CHECK-NEXT: vcpop.m a0, v8
71+
; CHECK-NEXT: beqz a0, .LBB4_2
72+
; CHECK-NEXT: # %bb.1:
73+
; CHECK-NEXT: lui a0, 523264
74+
; CHECK-NEXT: fmv.w.x fa5, a0
75+
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
76+
; CHECK-NEXT: ret
77+
; CHECK-NEXT: .LBB4_2:
78+
; CHECK-NEXT: vfredmin.vs v8, v10, v10
79+
; CHECK-NEXT: vfmv.f.s fa5, v8
80+
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
81+
; CHECK-NEXT: ret
82+
%s = call bfloat @llvm.vector.reduce.fminimum.nxv4f16(<vscale x 4 x bfloat> %val)
83+
ret bfloat %s
84+
}
85+
86+
define bfloat @vreduce_fmaximum_nxv4f16(<vscale x 4 x bfloat> %val) {
87+
; CHECK-LABEL: vreduce_fmaximum_nxv4f16:
88+
; CHECK: # %bb.0:
89+
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
90+
; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
91+
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
92+
; CHECK-NEXT: vmfne.vv v8, v10, v10
93+
; CHECK-NEXT: vcpop.m a0, v8
94+
; CHECK-NEXT: beqz a0, .LBB5_2
95+
; CHECK-NEXT: # %bb.1:
96+
; CHECK-NEXT: lui a0, 523264
97+
; CHECK-NEXT: fmv.w.x fa5, a0
98+
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
99+
; CHECK-NEXT: ret
100+
; CHECK-NEXT: .LBB5_2:
101+
; CHECK-NEXT: vfredmax.vs v8, v10, v10
102+
; CHECK-NEXT: vfmv.f.s fa5, v8
103+
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
104+
; CHECK-NEXT: ret
105+
%s = call bfloat @llvm.vector.reduce.fmaximum.nxv4f16(<vscale x 4 x bfloat> %val)
106+
ret bfloat %s
107+
}
108+
109+
define bfloat @vreduce_fminimum_nnan_nxv4f16(<vscale x 4 x bfloat> %val) {
110+
; CHECK-LABEL: vreduce_fminimum_nnan_nxv4f16:
111+
; CHECK: # %bb.0:
112+
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
113+
; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
114+
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
115+
; CHECK-NEXT: vfredmin.vs v8, v10, v10
116+
; CHECK-NEXT: vfmv.f.s fa5, v8
117+
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
118+
; CHECK-NEXT: ret
119+
%s = call nnan bfloat @llvm.vector.reduce.fminimum.nxv4f16(<vscale x 4 x bfloat> %val)
120+
ret bfloat %s
121+
}
122+
123+
define bfloat @vreduce_fmaximum_nnan_nxv4f16(<vscale x 4 x bfloat> %val) {
124+
; CHECK-LABEL: vreduce_fmaximum_nnan_nxv4f16:
125+
; CHECK: # %bb.0:
126+
; CHECK-NEXT: vsetvli a0, zero, e16, m1, ta, ma
127+
; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
128+
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
129+
; CHECK-NEXT: vfredmax.vs v8, v10, v10
130+
; CHECK-NEXT: vfmv.f.s fa5, v8
131+
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
132+
; CHECK-NEXT: ret
133+
%s = call nnan bfloat @llvm.vector.reduce.fmaximum.nxv4f16(<vscale x 4 x bfloat> %val)
134+
ret bfloat %s
135+
}
136+
Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \
3+
; RUN: -verify-machineinstrs < %s | FileCheck %s
4+
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \
5+
; RUN: -verify-machineinstrs < %s | FileCheck %s
6+
7+
define bfloat @vpreduce_fmin_nxv4f16(bfloat %start, <vscale x 4 x bfloat> %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
8+
; CHECK-LABEL: vpreduce_fmin_nxv4f16:
9+
; CHECK: # %bb.0:
10+
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
11+
; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
12+
; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
13+
; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma
14+
; CHECK-NEXT: vfmv.s.f v8, fa5
15+
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
16+
; CHECK-NEXT: vfredmin.vs v8, v10, v8, v0.t
17+
; CHECK-NEXT: vfmv.f.s fa5, v8
18+
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
19+
; CHECK-NEXT: ret
20+
%s = call bfloat @llvm.vp.reduce.fmin.nxv4f16(bfloat %start, <vscale x 4 x bfloat> %val, <vscale x 4 x i1> %m, i32 %evl)
21+
ret bfloat %s
22+
}
23+
24+
define bfloat @vpreduce_fmax_nxv4f16(bfloat %start, <vscale x 4 x bfloat> %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
25+
; CHECK-LABEL: vpreduce_fmax_nxv4f16:
26+
; CHECK: # %bb.0:
27+
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
28+
; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
29+
; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
30+
; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma
31+
; CHECK-NEXT: vfmv.s.f v8, fa5
32+
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
33+
; CHECK-NEXT: vfredmax.vs v8, v10, v8, v0.t
34+
; CHECK-NEXT: vfmv.f.s fa5, v8
35+
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
36+
; CHECK-NEXT: ret
37+
%s = call bfloat @llvm.vp.reduce.fmax.nxv4f16(bfloat %start, <vscale x 4 x bfloat> %val, <vscale x 4 x i1> %m, i32 %evl)
38+
ret bfloat %s
39+
}
40+
41+
define bfloat @vpreduce_fmin_nnan_nxv4f16(bfloat %start, <vscale x 4 x bfloat> %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
42+
; CHECK-LABEL: vpreduce_fmin_nnan_nxv4f16:
43+
; CHECK: # %bb.0:
44+
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
45+
; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
46+
; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
47+
; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma
48+
; CHECK-NEXT: vfmv.s.f v8, fa5
49+
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
50+
; CHECK-NEXT: vfredmin.vs v8, v10, v8, v0.t
51+
; CHECK-NEXT: vfmv.f.s fa5, v8
52+
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
53+
; CHECK-NEXT: ret
54+
%s = call nnan bfloat @llvm.vp.reduce.fmin.nxv4f16(bfloat %start, <vscale x 4 x bfloat> %val, <vscale x 4 x i1> %m, i32 %evl)
55+
ret bfloat %s
56+
}
57+
58+
define bfloat @vpreduce_fmax_nnan_nxv4f16(bfloat %start, <vscale x 4 x bfloat> %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
59+
; CHECK-LABEL: vpreduce_fmax_nnan_nxv4f16:
60+
; CHECK: # %bb.0:
61+
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
62+
; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
63+
; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
64+
; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma
65+
; CHECK-NEXT: vfmv.s.f v8, fa5
66+
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
67+
; CHECK-NEXT: vfredmax.vs v8, v10, v8, v0.t
68+
; CHECK-NEXT: vfmv.f.s fa5, v8
69+
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
70+
; CHECK-NEXT: ret
71+
%s = call nnan bfloat @llvm.vp.reduce.fmax.nxv4f16(bfloat %start, <vscale x 4 x bfloat> %val, <vscale x 4 x i1> %m, i32 %evl)
72+
ret bfloat %s
73+
}
74+
75+
define bfloat @vpreduce_fminimum_nxv4f16(bfloat %start, <vscale x 4 x bfloat> %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
76+
; CHECK-LABEL: vpreduce_fminimum_nxv4f16:
77+
; CHECK: # %bb.0:
78+
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
79+
; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
80+
; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
81+
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
82+
; CHECK-NEXT: vmfne.vv v8, v10, v10, v0.t
83+
; CHECK-NEXT: feq.s a1, fa5, fa5
84+
; CHECK-NEXT: vcpop.m a2, v8, v0.t
85+
; CHECK-NEXT: xori a1, a1, 1
86+
; CHECK-NEXT: or a1, a2, a1
87+
; CHECK-NEXT: beqz a1, .LBB4_2
88+
; CHECK-NEXT: # %bb.1:
89+
; CHECK-NEXT: lui a0, 523264
90+
; CHECK-NEXT: fmv.w.x fa5, a0
91+
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
92+
; CHECK-NEXT: ret
93+
; CHECK-NEXT: .LBB4_2:
94+
; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma
95+
; CHECK-NEXT: vfmv.s.f v8, fa5
96+
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
97+
; CHECK-NEXT: vfredmin.vs v8, v10, v8, v0.t
98+
; CHECK-NEXT: vfmv.f.s fa5, v8
99+
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
100+
; CHECK-NEXT: ret
101+
%s = call bfloat @llvm.vp.reduce.fminimum.nxv4f16(bfloat %start, <vscale x 4 x bfloat> %val, <vscale x 4 x i1> %m, i32 %evl)
102+
ret bfloat %s
103+
}
104+
105+
define bfloat @vpreduce_fmaximum_nxv4f16(bfloat %start, <vscale x 4 x bfloat> %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
106+
; CHECK-LABEL: vpreduce_fmaximum_nxv4f16:
107+
; CHECK: # %bb.0:
108+
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
109+
; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
110+
; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
111+
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
112+
; CHECK-NEXT: vmfne.vv v8, v10, v10, v0.t
113+
; CHECK-NEXT: feq.s a1, fa5, fa5
114+
; CHECK-NEXT: vcpop.m a2, v8, v0.t
115+
; CHECK-NEXT: xori a1, a1, 1
116+
; CHECK-NEXT: or a1, a2, a1
117+
; CHECK-NEXT: beqz a1, .LBB5_2
118+
; CHECK-NEXT: # %bb.1:
119+
; CHECK-NEXT: lui a0, 523264
120+
; CHECK-NEXT: fmv.w.x fa5, a0
121+
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
122+
; CHECK-NEXT: ret
123+
; CHECK-NEXT: .LBB5_2:
124+
; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma
125+
; CHECK-NEXT: vfmv.s.f v8, fa5
126+
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
127+
; CHECK-NEXT: vfredmax.vs v8, v10, v8, v0.t
128+
; CHECK-NEXT: vfmv.f.s fa5, v8
129+
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
130+
; CHECK-NEXT: ret
131+
%s = call bfloat @llvm.vp.reduce.fmaximum.nxv4f16(bfloat %start, <vscale x 4 x bfloat> %val, <vscale x 4 x i1> %m, i32 %evl)
132+
ret bfloat %s
133+
}
134+
135+
define bfloat @vpreduce_fminimum_nnan_nxv4f16(bfloat %start, <vscale x 4 x bfloat> %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
136+
; CHECK-LABEL: vpreduce_fminimum_nnan_nxv4f16:
137+
; CHECK: # %bb.0:
138+
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
139+
; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
140+
; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
141+
; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma
142+
; CHECK-NEXT: vfmv.s.f v8, fa5
143+
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
144+
; CHECK-NEXT: vfredmin.vs v8, v10, v8, v0.t
145+
; CHECK-NEXT: vfmv.f.s fa5, v8
146+
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
147+
; CHECK-NEXT: ret
148+
%s = call nnan bfloat @llvm.vp.reduce.fminimum.nxv4f16(bfloat %start, <vscale x 4 x bfloat> %val, <vscale x 4 x i1> %m, i32 %evl)
149+
ret bfloat %s
150+
}
151+
152+
define bfloat @vpreduce_fmaximum_nnan_nxv4f16(bfloat %start, <vscale x 4 x bfloat> %val, <vscale x 4 x i1> %m, i32 zeroext %evl) {
153+
; CHECK-LABEL: vpreduce_fmaximum_nnan_nxv4f16:
154+
; CHECK: # %bb.0:
155+
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
156+
; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
157+
; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
158+
; CHECK-NEXT: vsetivli zero, 1, e32, m2, ta, ma
159+
; CHECK-NEXT: vfmv.s.f v8, fa5
160+
; CHECK-NEXT: vsetvli zero, a0, e32, m2, ta, ma
161+
; CHECK-NEXT: vfredmax.vs v8, v10, v8, v0.t
162+
; CHECK-NEXT: vfmv.f.s fa5, v8
163+
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
164+
; CHECK-NEXT: ret
165+
%s = call nnan bfloat @llvm.vp.reduce.fmaximum.nxv4f16(bfloat %start, <vscale x 4 x bfloat> %val, <vscale x 4 x i1> %m, i32 %evl)
166+
ret bfloat %s
167+
}

0 commit comments

Comments
 (0)