Skip to content

Commit 5e4cdd6

Browse files
authored
[VP][RISCV] Enable promotion on fixed-length vp intrinsics with zvfbfmin (#167541)
Following #112393, this aims to promote vp intrinsics for zvfbfmin without zvfbfa
1 parent b7a673c commit 5e4cdd6

File tree

2 files changed

+168
-1
lines changed

2 files changed

+168
-1
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1618,7 +1618,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
16181618
if (!isTypeLegal(F32VecVT))
16191619
continue;
16201620
setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1621-
// TODO: Promote VP ops to fp32.
1621+
setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
16221622
continue;
16231623
}
16241624

Lines changed: 167 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,167 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=riscv32 -mattr=+d,+zfbfmin,+zvfbfmin,+v -target-abi=ilp32d \
3+
; RUN: -verify-machineinstrs < %s | FileCheck %s
4+
; RUN: llc -mtriple=riscv64 -mattr=+d,+zfbfmin,+zvfbfmin,+v -target-abi=lp64d \
5+
; RUN: -verify-machineinstrs < %s | FileCheck %s
6+
7+
define bfloat @vpreduce_fmin_nxv4bf16(bfloat %start, <4 x bfloat> %val, <4 x i1> %m, i32 zeroext %evl) {
8+
; CHECK-LABEL: vpreduce_fmin_nxv4bf16:
9+
; CHECK: # %bb.0:
10+
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
11+
; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
12+
; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
13+
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
14+
; CHECK-NEXT: vfmv.s.f v8, fa5
15+
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
16+
; CHECK-NEXT: vfredmin.vs v8, v9, v8, v0.t
17+
; CHECK-NEXT: vfmv.f.s fa5, v8
18+
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
19+
; CHECK-NEXT: ret
20+
%s = call bfloat @llvm.vp.reduce.fmin.nxv4bf16(bfloat %start, <4 x bfloat> %val, <4 x i1> %m, i32 %evl)
21+
ret bfloat %s
22+
}
23+
24+
define bfloat @vpreduce_fmax_nxv4bf16(bfloat %start, <4 x bfloat> %val, <4 x i1> %m, i32 zeroext %evl) {
25+
; CHECK-LABEL: vpreduce_fmax_nxv4bf16:
26+
; CHECK: # %bb.0:
27+
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
28+
; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
29+
; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
30+
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
31+
; CHECK-NEXT: vfmv.s.f v8, fa5
32+
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
33+
; CHECK-NEXT: vfredmax.vs v8, v9, v8, v0.t
34+
; CHECK-NEXT: vfmv.f.s fa5, v8
35+
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
36+
; CHECK-NEXT: ret
37+
%s = call bfloat @llvm.vp.reduce.fmax.nxv4bf16(bfloat %start, <4 x bfloat> %val, <4 x i1> %m, i32 %evl)
38+
ret bfloat %s
39+
}
40+
41+
define bfloat @vpreduce_fmin_nnan_nxv4bf16(bfloat %start, <4 x bfloat> %val, <4 x i1> %m, i32 zeroext %evl) {
42+
; CHECK-LABEL: vpreduce_fmin_nnan_nxv4bf16:
43+
; CHECK: # %bb.0:
44+
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
45+
; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
46+
; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
47+
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
48+
; CHECK-NEXT: vfmv.s.f v8, fa5
49+
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
50+
; CHECK-NEXT: vfredmin.vs v8, v9, v8, v0.t
51+
; CHECK-NEXT: vfmv.f.s fa5, v8
52+
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
53+
; CHECK-NEXT: ret
54+
%s = call nnan bfloat @llvm.vp.reduce.fmin.nxv4bf16(bfloat %start, <4 x bfloat> %val, <4 x i1> %m, i32 %evl)
55+
ret bfloat %s
56+
}
57+
58+
define bfloat @vpreduce_fmax_nnan_nxv4bf16(bfloat %start, <4 x bfloat> %val, <4 x i1> %m, i32 zeroext %evl) {
59+
; CHECK-LABEL: vpreduce_fmax_nnan_nxv4bf16:
60+
; CHECK: # %bb.0:
61+
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
62+
; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
63+
; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
64+
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
65+
; CHECK-NEXT: vfmv.s.f v8, fa5
66+
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
67+
; CHECK-NEXT: vfredmax.vs v8, v9, v8, v0.t
68+
; CHECK-NEXT: vfmv.f.s fa5, v8
69+
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
70+
; CHECK-NEXT: ret
71+
%s = call nnan bfloat @llvm.vp.reduce.fmax.nxv4bf16(bfloat %start, <4 x bfloat> %val, <4 x i1> %m, i32 %evl)
72+
ret bfloat %s
73+
}
74+
75+
define bfloat @vpreduce_fminimum_nxv4bf16(bfloat %start, <4 x bfloat> %val, <4 x i1> %m, i32 zeroext %evl) {
76+
; CHECK-LABEL: vpreduce_fminimum_nxv4bf16:
77+
; CHECK: # %bb.0:
78+
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
79+
; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
80+
; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
81+
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
82+
; CHECK-NEXT: vmfne.vv v8, v9, v9, v0.t
83+
; CHECK-NEXT: feq.s a1, fa5, fa5
84+
; CHECK-NEXT: vcpop.m a2, v8, v0.t
85+
; CHECK-NEXT: xori a1, a1, 1
86+
; CHECK-NEXT: or a1, a2, a1
87+
; CHECK-NEXT: beqz a1, .LBB4_2
88+
; CHECK-NEXT: # %bb.1:
89+
; CHECK-NEXT: lui a0, 523264
90+
; CHECK-NEXT: fmv.w.x fa5, a0
91+
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
92+
; CHECK-NEXT: ret
93+
; CHECK-NEXT: .LBB4_2:
94+
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
95+
; CHECK-NEXT: vfmv.s.f v8, fa5
96+
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
97+
; CHECK-NEXT: vfredmin.vs v8, v9, v8, v0.t
98+
; CHECK-NEXT: vfmv.f.s fa5, v8
99+
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
100+
; CHECK-NEXT: ret
101+
%s = call bfloat @llvm.vp.reduce.fminimum.nxv4bf16(bfloat %start, <4 x bfloat> %val, <4 x i1> %m, i32 %evl)
102+
ret bfloat %s
103+
}
104+
105+
define bfloat @vpreduce_fmaximum_nxv4bf16(bfloat %start, <4 x bfloat> %val, <4 x i1> %m, i32 zeroext %evl) {
106+
; CHECK-LABEL: vpreduce_fmaximum_nxv4bf16:
107+
; CHECK: # %bb.0:
108+
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
109+
; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
110+
; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
111+
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
112+
; CHECK-NEXT: vmfne.vv v8, v9, v9, v0.t
113+
; CHECK-NEXT: feq.s a1, fa5, fa5
114+
; CHECK-NEXT: vcpop.m a2, v8, v0.t
115+
; CHECK-NEXT: xori a1, a1, 1
116+
; CHECK-NEXT: or a1, a2, a1
117+
; CHECK-NEXT: beqz a1, .LBB5_2
118+
; CHECK-NEXT: # %bb.1:
119+
; CHECK-NEXT: lui a0, 523264
120+
; CHECK-NEXT: fmv.w.x fa5, a0
121+
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
122+
; CHECK-NEXT: ret
123+
; CHECK-NEXT: .LBB5_2:
124+
; CHECK-NEXT: vsetivli zero, 1, e32, m1, ta, ma
125+
; CHECK-NEXT: vfmv.s.f v8, fa5
126+
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
127+
; CHECK-NEXT: vfredmax.vs v8, v9, v8, v0.t
128+
; CHECK-NEXT: vfmv.f.s fa5, v8
129+
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
130+
; CHECK-NEXT: ret
131+
%s = call bfloat @llvm.vp.reduce.fmaximum.nxv4bf16(bfloat %start, <4 x bfloat> %val, <4 x i1> %m, i32 %evl)
132+
ret bfloat %s
133+
}
134+
135+
define bfloat @vpreduce_fminimum_nnan_nxv4bf16(bfloat %start, <4 x bfloat> %val, <4 x i1> %m, i32 zeroext %evl) {
136+
; CHECK-LABEL: vpreduce_fminimum_nnan_nxv4bf16:
137+
; CHECK: # %bb.0:
138+
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
139+
; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
140+
; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
141+
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
142+
; CHECK-NEXT: vfmv.s.f v8, fa5
143+
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
144+
; CHECK-NEXT: vfredmin.vs v8, v9, v8, v0.t
145+
; CHECK-NEXT: vfmv.f.s fa5, v8
146+
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
147+
; CHECK-NEXT: ret
148+
%s = call nnan bfloat @llvm.vp.reduce.fminimum.nxv4bf16(bfloat %start, <4 x bfloat> %val, <4 x i1> %m, i32 %evl)
149+
ret bfloat %s
150+
}
151+
152+
define bfloat @vpreduce_fmaximum_nnan_nxv4bf16(bfloat %start, <4 x bfloat> %val, <4 x i1> %m, i32 zeroext %evl) {
153+
; CHECK-LABEL: vpreduce_fmaximum_nnan_nxv4bf16:
154+
; CHECK: # %bb.0:
155+
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
156+
; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8
157+
; CHECK-NEXT: fcvt.s.bf16 fa5, fa0
158+
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
159+
; CHECK-NEXT: vfmv.s.f v8, fa5
160+
; CHECK-NEXT: vsetvli zero, a0, e32, m1, ta, ma
161+
; CHECK-NEXT: vfredmax.vs v8, v9, v8, v0.t
162+
; CHECK-NEXT: vfmv.f.s fa5, v8
163+
; CHECK-NEXT: fcvt.bf16.s fa0, fa5
164+
; CHECK-NEXT: ret
165+
%s = call nnan bfloat @llvm.vp.reduce.fmaximum.nxv4bf16(bfloat %start, <4 x bfloat> %val, <4 x i1> %m, i32 %evl)
166+
ret bfloat %s
167+
}

0 commit comments

Comments
 (0)