Skip to content

Commit c1f7897

Browse files
committed
[VP][RISCV] Enable promotion on fixed-length vp intrinsics with zvfbfmin
Following #112393, this aims to promote vp intrinsics for zvfbfmin without zvfbfa
1 parent 8218055 commit c1f7897

19 files changed

+4378
-479
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1621,7 +1621,7 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
16211621
if (!isTypeLegal(F32VecVT))
16221622
continue;
16231623
setOperationPromotedToType(ZvfhminZvfbfminPromoteOps, VT, F32VecVT);
1624-
// TODO: Promote VP ops to fp32.
1624+
setOperationPromotedToType(ZvfhminZvfbfminPromoteVPOps, VT, F32VecVT);
16251625
continue;
16261626
}
16271627

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-ceil-vp.ll

Lines changed: 276 additions & 68 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-floor-vp.ll

Lines changed: 276 additions & 68 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-fmaximum-vp.ll

Lines changed: 184 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,189 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2-
; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+v,+m -target-abi=ilp32d \
2+
; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfh,+zvfbfmin,+v,+m -target-abi=ilp32d \
33
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
4-
; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+v,+m -target-abi=lp64d \
4+
; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfh,+zvfbfmin,+v,+m -target-abi=lp64d \
55
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFH
6-
; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+v,+m -target-abi=ilp32d \
6+
; RUN: llc -mtriple=riscv32 -mattr=+d,+zvfhmin,+zvfbfmin,+v,+m -target-abi=ilp32d \
77
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
8-
; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+v,+m -target-abi=lp64d \
8+
; RUN: llc -mtriple=riscv64 -mattr=+d,+zvfhmin,+zvfbfmin,+v,+m -target-abi=lp64d \
99
; RUN: -verify-machineinstrs < %s | FileCheck %s --check-prefixes=CHECK,ZVFHMIN
1010

11+
12+
define <2 x bfloat> @vmaximum_vv_v2bf16(<2 x bfloat> %va, <2 x bfloat> %vb, <2 x i1> %m, i32 zeroext %evl) {
13+
; CHECK-LABEL: vmaximum_vv_v2bf16:
14+
; CHECK: # %bb.0:
15+
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
16+
; CHECK-NEXT: vmv1r.v v10, v0
17+
; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v9, v0.t
18+
; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8, v0.t
19+
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
20+
; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t
21+
; CHECK-NEXT: vmerge.vvm v8, v9, v11, v0
22+
; CHECK-NEXT: vmv1r.v v0, v10
23+
; CHECK-NEXT: vmfeq.vv v0, v11, v11, v0.t
24+
; CHECK-NEXT: vmerge.vvm v9, v11, v9, v0
25+
; CHECK-NEXT: vmv1r.v v0, v10
26+
; CHECK-NEXT: vfmax.vv v9, v9, v8, v0.t
27+
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
28+
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t
29+
; CHECK-NEXT: ret
30+
%v = call <2 x bfloat> @llvm.vp.maximum.v2bf16(<2 x bfloat> %va, <2 x bfloat> %vb, <2 x i1> %m, i32 %evl)
31+
ret <2 x bfloat> %v
32+
}
33+
34+
define <2 x bfloat> @vmaximum_vv_v2bf16_unmasked(<2 x bfloat> %va, <2 x bfloat> %vb, i32 zeroext %evl) {
35+
; CHECK-LABEL: vmaximum_vv_v2bf16_unmasked:
36+
; CHECK: # %bb.0:
37+
; CHECK-NEXT: vsetvli zero, a0, e16, mf4, ta, ma
38+
; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
39+
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
40+
; CHECK-NEXT: vmfeq.vv v0, v10, v10
41+
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
42+
; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
43+
; CHECK-NEXT: vsetvli zero, zero, e32, mf2, ta, ma
44+
; CHECK-NEXT: vmerge.vvm v9, v10, v8, v0
45+
; CHECK-NEXT: vmfeq.vv v0, v8, v8
46+
; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
47+
; CHECK-NEXT: vfmax.vv v9, v8, v9
48+
; CHECK-NEXT: vsetvli zero, zero, e16, mf4, ta, ma
49+
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
50+
; CHECK-NEXT: ret
51+
%v = call <2 x bfloat> @llvm.vp.maximum.v2bf16(<2 x bfloat> %va, <2 x bfloat> %vb, <2 x i1> splat (i1 true), i32 %evl)
52+
ret <2 x bfloat> %v
53+
}
54+
55+
define <4 x bfloat> @vmaximum_vv_v4bf16(<4 x bfloat> %va, <4 x bfloat> %vb, <4 x i1> %m, i32 zeroext %evl) {
56+
; CHECK-LABEL: vmaximum_vv_v4bf16:
57+
; CHECK: # %bb.0:
58+
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
59+
; CHECK-NEXT: vmv1r.v v10, v0
60+
; CHECK-NEXT: vfwcvtbf16.f.f.v v11, v9, v0.t
61+
; CHECK-NEXT: vfwcvtbf16.f.f.v v9, v8, v0.t
62+
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
63+
; CHECK-NEXT: vmfeq.vv v0, v9, v9, v0.t
64+
; CHECK-NEXT: vmerge.vvm v8, v9, v11, v0
65+
; CHECK-NEXT: vmv1r.v v0, v10
66+
; CHECK-NEXT: vmfeq.vv v0, v11, v11, v0.t
67+
; CHECK-NEXT: vmerge.vvm v9, v11, v9, v0
68+
; CHECK-NEXT: vmv1r.v v0, v10
69+
; CHECK-NEXT: vfmax.vv v9, v9, v8, v0.t
70+
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
71+
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9, v0.t
72+
; CHECK-NEXT: ret
73+
%v = call <4 x bfloat> @llvm.vp.maximum.v4bf16(<4 x bfloat> %va, <4 x bfloat> %vb, <4 x i1> %m, i32 %evl)
74+
ret <4 x bfloat> %v
75+
}
76+
77+
define <4 x bfloat> @vmaximum_vv_v4bf16_unmasked(<4 x bfloat> %va, <4 x bfloat> %vb, i32 zeroext %evl) {
78+
; CHECK-LABEL: vmaximum_vv_v4bf16_unmasked:
79+
; CHECK: # %bb.0:
80+
; CHECK-NEXT: vsetvli zero, a0, e16, mf2, ta, ma
81+
; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
82+
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
83+
; CHECK-NEXT: vmfeq.vv v0, v10, v10
84+
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
85+
; CHECK-NEXT: vfwcvtbf16.f.f.v v8, v9
86+
; CHECK-NEXT: vsetvli zero, zero, e32, m1, ta, ma
87+
; CHECK-NEXT: vmerge.vvm v9, v10, v8, v0
88+
; CHECK-NEXT: vmfeq.vv v0, v8, v8
89+
; CHECK-NEXT: vmerge.vvm v8, v8, v10, v0
90+
; CHECK-NEXT: vfmax.vv v9, v8, v9
91+
; CHECK-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
92+
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v9
93+
; CHECK-NEXT: ret
94+
%v = call <4 x bfloat> @llvm.vp.maximum.v4bf16(<4 x bfloat> %va, <4 x bfloat> %vb, <4 x i1> splat (i1 true), i32 %evl)
95+
ret <4 x bfloat> %v
96+
}
97+
98+
define <8 x bfloat> @vmaximum_vv_v8bf16(<8 x bfloat> %va, <8 x bfloat> %vb, <8 x i1> %m, i32 zeroext %evl) {
99+
; CHECK-LABEL: vmaximum_vv_v8bf16:
100+
; CHECK: # %bb.0:
101+
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
102+
; CHECK-NEXT: vmv1r.v v10, v0
103+
; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9, v0.t
104+
; CHECK-NEXT: vfwcvtbf16.f.f.v v14, v8, v0.t
105+
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
106+
; CHECK-NEXT: vmfeq.vv v8, v14, v14, v0.t
107+
; CHECK-NEXT: vmv1r.v v0, v8
108+
; CHECK-NEXT: vmerge.vvm v16, v14, v12, v0
109+
; CHECK-NEXT: vmv1r.v v0, v10
110+
; CHECK-NEXT: vmfeq.vv v8, v12, v12, v0.t
111+
; CHECK-NEXT: vmv1r.v v0, v8
112+
; CHECK-NEXT: vmerge.vvm v8, v12, v14, v0
113+
; CHECK-NEXT: vmv1r.v v0, v10
114+
; CHECK-NEXT: vfmax.vv v12, v8, v16, v0.t
115+
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
116+
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12, v0.t
117+
; CHECK-NEXT: ret
118+
%v = call <8 x bfloat> @llvm.vp.maximum.v8bf16(<8 x bfloat> %va, <8 x bfloat> %vb, <8 x i1> %m, i32 %evl)
119+
ret <8 x bfloat> %v
120+
}
121+
122+
define <8 x bfloat> @vmaximum_vv_v8bf16_unmasked(<8 x bfloat> %va, <8 x bfloat> %vb, i32 zeroext %evl) {
123+
; CHECK-LABEL: vmaximum_vv_v8bf16_unmasked:
124+
; CHECK: # %bb.0:
125+
; CHECK-NEXT: vsetvli zero, a0, e16, m1, ta, ma
126+
; CHECK-NEXT: vfwcvtbf16.f.f.v v10, v8
127+
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
128+
; CHECK-NEXT: vmfeq.vv v0, v10, v10
129+
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
130+
; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v9
131+
; CHECK-NEXT: vsetvli zero, zero, e32, m2, ta, ma
132+
; CHECK-NEXT: vmerge.vvm v8, v10, v12, v0
133+
; CHECK-NEXT: vmfeq.vv v0, v12, v12
134+
; CHECK-NEXT: vmerge.vvm v10, v12, v10, v0
135+
; CHECK-NEXT: vfmax.vv v10, v10, v8
136+
; CHECK-NEXT: vsetvli zero, zero, e16, m1, ta, ma
137+
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v10
138+
; CHECK-NEXT: ret
139+
%v = call <8 x bfloat> @llvm.vp.maximum.v8bf16(<8 x bfloat> %va, <8 x bfloat> %vb, <8 x i1> splat (i1 true), i32 %evl)
140+
ret <8 x bfloat> %v
141+
}
142+
143+
define <16 x bfloat> @vmaximum_vv_v16bf16(<16 x bfloat> %va, <16 x bfloat> %vb, <16 x i1> %m, i32 zeroext %evl) {
144+
; CHECK-LABEL: vmaximum_vv_v16bf16:
145+
; CHECK: # %bb.0:
146+
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
147+
; CHECK-NEXT: vmv1r.v v12, v0
148+
; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10, v0.t
149+
; CHECK-NEXT: vfwcvtbf16.f.f.v v20, v8, v0.t
150+
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
151+
; CHECK-NEXT: vmfeq.vv v8, v20, v20, v0.t
152+
; CHECK-NEXT: vmv1r.v v0, v8
153+
; CHECK-NEXT: vmerge.vvm v24, v20, v16, v0
154+
; CHECK-NEXT: vmv1r.v v0, v12
155+
; CHECK-NEXT: vmfeq.vv v8, v16, v16, v0.t
156+
; CHECK-NEXT: vmv1r.v v0, v8
157+
; CHECK-NEXT: vmerge.vvm v8, v16, v20, v0
158+
; CHECK-NEXT: vmv1r.v v0, v12
159+
; CHECK-NEXT: vfmax.vv v16, v8, v24, v0.t
160+
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
161+
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v16, v0.t
162+
; CHECK-NEXT: ret
163+
%v = call <16 x bfloat> @llvm.vp.maximum.v16bf16(<16 x bfloat> %va, <16 x bfloat> %vb, <16 x i1> %m, i32 %evl)
164+
ret <16 x bfloat> %v
165+
}
166+
167+
define <16 x bfloat> @vmaximum_vv_v16bf16_unmasked(<16 x bfloat> %va, <16 x bfloat> %vb, i32 zeroext %evl) {
168+
; CHECK-LABEL: vmaximum_vv_v16bf16_unmasked:
169+
; CHECK: # %bb.0:
170+
; CHECK-NEXT: vsetvli zero, a0, e16, m2, ta, ma
171+
; CHECK-NEXT: vfwcvtbf16.f.f.v v12, v8
172+
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
173+
; CHECK-NEXT: vmfeq.vv v0, v12, v12
174+
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
175+
; CHECK-NEXT: vfwcvtbf16.f.f.v v16, v10
176+
; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma
177+
; CHECK-NEXT: vmerge.vvm v8, v12, v16, v0
178+
; CHECK-NEXT: vmfeq.vv v0, v16, v16
179+
; CHECK-NEXT: vmerge.vvm v12, v16, v12, v0
180+
; CHECK-NEXT: vfmax.vv v12, v12, v8
181+
; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma
182+
; CHECK-NEXT: vfncvtbf16.f.f.w v8, v12
183+
; CHECK-NEXT: ret
184+
%v = call <16 x bfloat> @llvm.vp.maximum.v16bf16(<16 x bfloat> %va, <16 x bfloat> %vb, <16 x i1> splat (i1 true), i32 %evl)
185+
ret <16 x bfloat> %v
186+
}
11187
declare <2 x half> @llvm.vp.maximum.v2f16(<2 x half>, <2 x half>, <2 x i1>, i32)
12188

13189
define <2 x half> @vfmax_vv_v2f16(<2 x half> %va, <2 x half> %vb, <2 x i1> %m, i32 zeroext %evl) {
@@ -607,10 +783,10 @@ define <32 x double> @vfmax_vv_v32f64(<32 x double> %va, <32 x double> %vb, <32
607783
; CHECK-NEXT: mv a0, a2
608784
; CHECK-NEXT: vsetivli zero, 2, e8, mf4, ta, ma
609785
; CHECK-NEXT: vslidedown.vi v7, v0, 2
610-
; CHECK-NEXT: bltu a2, a1, .LBB24_2
786+
; CHECK-NEXT: bltu a2, a1, .LBB32_2
611787
; CHECK-NEXT: # %bb.1:
612788
; CHECK-NEXT: li a0, 16
613-
; CHECK-NEXT: .LBB24_2:
789+
; CHECK-NEXT: .LBB32_2:
614790
; CHECK-NEXT: vmv1r.v v0, v6
615791
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
616792
; CHECK-NEXT: vmfeq.vv v26, v8, v8, v0.t
@@ -708,10 +884,10 @@ define <32 x double> @vfmax_vv_v32f64_unmasked(<32 x double> %va, <32 x double>
708884
; CHECK-NEXT: vle64.v v24, (a0)
709885
; CHECK-NEXT: li a1, 16
710886
; CHECK-NEXT: mv a0, a2
711-
; CHECK-NEXT: bltu a2, a1, .LBB25_2
887+
; CHECK-NEXT: bltu a2, a1, .LBB33_2
712888
; CHECK-NEXT: # %bb.1:
713889
; CHECK-NEXT: li a0, 16
714-
; CHECK-NEXT: .LBB25_2:
890+
; CHECK-NEXT: .LBB33_2:
715891
; CHECK-NEXT: vsetvli zero, a0, e64, m8, ta, ma
716892
; CHECK-NEXT: vmfeq.vv v0, v8, v8
717893
; CHECK-NEXT: vmfeq.vv v7, v24, v24

0 commit comments

Comments
 (0)