Skip to content

Commit cb61724

Browse files
mshockwavejaidTwtopperc
authored andcommitted
[RISCV] Add LLVM IR intrinsics and codegen for XSfvfexp* and XSfvfexpa* (llvm#164499)
This patch adds LLVM IR intrinsics and basic codegen support for the XSfvfexp* and XSfvfexpa* extensions. --------- Co-authored-by: Jesse Huang <[email protected]> Co-authored-by: Craig Topper <[email protected]>
1 parent 21f54f8 commit cb61724

File tree

10 files changed

+1111
-2
lines changed

10 files changed

+1111
-2
lines changed

llvm/include/llvm/IR/IntrinsicsRISCVXsf.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,10 @@ let TargetPrefix = "riscv" in {
162162
defm "" : RISCVSFCustomVC_XVV<["x", "i", "v", "f"]>;
163163
defm "" : RISCVSFCustomVC_XVW<["x", "i", "v", "f"]>;
164164

165+
// XSfvfexp* and XSfvfexpa*
166+
defm sf_vfexp : RISCVUnaryAA;
167+
defm sf_vfexpa : RISCVUnaryAA;
168+
165169
// XSfvqmaccdod
166170
def int_riscv_sf_vqmaccu_2x8x2 : RISCVSFCustomVMACC;
167171
def int_riscv_sf_vqmacc_2x8x2 : RISCVSFCustomVMACC;

llvm/lib/Target/RISCV/RISCVInstrInfoXSf.td

Lines changed: 79 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -218,11 +218,13 @@ let Predicates = [HasVendorXSfvcp], mayLoad = 0, mayStore = 0,
218218
}
219219

220220
let Predicates = [HasVendorXSfvfexpAny], DecoderNamespace = "XSfvector" in {
221-
def SF_VFEXP_V : VALUVs2<0b010011, 0b00111, OPFVV, "sf.vfexp.v">;
221+
def SF_VFEXP_V : VALUVs2<0b010011, 0b00111, OPFVV, "sf.vfexp.v">,
222+
SchedUnaryMC<"WriteSF_VFExp", "ReadSF_VFExp">;
222223
}
223224

224225
let Predicates = [HasVendorXSfvfexpa], DecoderNamespace = "XSfvector" in {
225-
def SF_VFEXPA_V : VALUVs2<0b010011, 0b00110, OPFVV, "sf.vfexpa.v">;
226+
def SF_VFEXPA_V : VALUVs2<0b010011, 0b00110, OPFVV, "sf.vfexpa.v">,
227+
SchedUnaryMC<"WriteSF_VFExpa", "ReadSF_VFExpa">;
226228
}
227229

228230
let Predicates = [HasVendorXSfvqmaccdod], DecoderNamespace = "XSfvector",
@@ -487,6 +489,48 @@ let Predicates = [HasVendorXSfvfnrclipxfqf], AltFmtType = IS_NOT_ALTFMT in {
487489
defm SF_VFNRCLIP_X_F_QF : VPseudoSiFiveVFNRCLIP;
488490
}
489491

492+
class VFExpSchedSEWSet<string mx, bit IsBF16, bit IsApprox> {
493+
defvar BaseSet = SchedSEWSet<mx, isF=1>.val;
494+
list<int> val = !if(IsBF16, !listremove(BaseSet, [32, 64]),
495+
!if(IsApprox, BaseSet, !listremove(BaseSet, [64])));
496+
}
497+
multiclass VPseudoVFExp_V<bit IsBF16 = false, bit IsApprox = false> {
498+
defvar SchedSuffix = !if(IsApprox, "VFExpa", "VFExp");
499+
500+
foreach m = MxListF in {
501+
defvar mx = m.MX;
502+
foreach e = VFExpSchedSEWSet<mx, IsBF16, IsApprox>.val in {
503+
let VLMul = m.value in {
504+
def "_V_" # mx # "_E" # e
505+
: VPseudoUnaryNoMask<m.vrclass, m.vrclass>,
506+
SchedUnary<"WriteSF_" # SchedSuffix, "ReadSF_" # SchedSuffix,
507+
mx, e, forcePassthruRead=true>;
508+
def "_V_" # mx # "_E" # e # "_MASK"
509+
: VPseudoUnaryMask<m.vrclass, m.vrclass>,
510+
RISCVMaskedPseudo<MaskIdx = 2>,
511+
SchedUnary<"WriteSF_" # SchedSuffix, "ReadSF_" # SchedSuffix,
512+
mx, e, forcePassthruRead=true>;
513+
}
514+
}
515+
}
516+
}
517+
518+
let Predicates = [HasVendorXSfvfbfexp16e], hasSideEffects = 0 in {
519+
let AltFmtType = IS_ALTFMT in {
520+
defm PseudoSF_VFEXP_ALT : VPseudoVFExp_V<IsBF16=true>;
521+
}
522+
}
523+
524+
let Predicates = [HasVendorXSfvfexpAnyFloat], hasSideEffects = 0 in {
525+
let AltFmtType = IS_NOT_ALTFMT in {
526+
defm PseudoSF_VFEXP : VPseudoVFExp_V;
527+
}
528+
}
529+
530+
let Predicates = [HasVendorXSfvfexpa], AltFmtType = IS_NOT_ALTFMT in {
531+
defm PseudoSF_VFEXPA : VPseudoVFExp_V<IsApprox=true>;
532+
}
533+
490534
// SDNode
491535
def SDT_SF_VC_V_X : SDTypeProfile<1, 4, [SDTCisVec<0>,
492536
SDTCisVT<1, XLenVT>,
@@ -893,3 +937,36 @@ let Predicates = [HasVendorXSfcease] in {
893937
let rs2 = 0b00101;
894938
}
895939
}
940+
941+
let Predicates = [HasVendorXSfvfbfexp16e] in {
942+
defm : VPatUnaryV_V<"int_riscv_sf_vfexp", "PseudoSF_VFEXP_ALT",
943+
AllBF16Vectors,
944+
isSEWAware=1>;
945+
}
946+
947+
let Predicates = [HasVendorXSfvfexp16e] in {
948+
defm : VPatUnaryV_V<"int_riscv_sf_vfexp", "PseudoSF_VFEXP",
949+
[VF16MF4, VF16MF2, VF16M1, VF16M2, VF16M4, VF16M8],
950+
isSEWAware=1>;
951+
}
952+
953+
let Predicates = [HasVendorXSfvfexp32e] in {
954+
defm : VPatUnaryV_V<"int_riscv_sf_vfexp", "PseudoSF_VFEXP",
955+
[VF32MF2, VF32M1, VF32M2, VF32M4, VF32M8], isSEWAware=1>;
956+
}
957+
958+
let Predicates = [HasVendorXSfvfexpa] in {
959+
defm : VPatUnaryV_V<"int_riscv_sf_vfexpa", "PseudoSF_VFEXPA",
960+
[VF32MF2, VF32M1, VF32M2, VF32M4, VF32M8], isSEWAware=1>;
961+
}
962+
963+
let Predicates = [HasVendorXSfvfexpa, HasVInstructionsF16] in {
964+
defm : VPatUnaryV_V<"int_riscv_sf_vfexpa", "PseudoSF_VFEXPA",
965+
[VF16MF4, VF16MF2, VF16M1, VF16M2, VF16M4, VF16M8],
966+
isSEWAware=1>;
967+
}
968+
969+
let Predicates = [HasVendorXSfvfexpa64e] in {
970+
defm : VPatUnaryV_V<"int_riscv_sf_vfexpa", "PseudoSF_VFEXPA",
971+
[VF64M1, VF64M2, VF64M4, VF64M8], isSEWAware=1>;
972+
}

llvm/lib/Target/RISCV/RISCVSchedSiFive7.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1588,6 +1588,10 @@ multiclass SiFive7SchedResources<int vlen, bit dualVALU,
15881588
//===----------------------------------------------------------------------===//
15891589
// Unsupported extensions
15901590
defm : UnsupportedSchedQ;
1591+
// TODO: scheduling info of XSfvfexp* and XSfvfexpa*
1592+
// for SiFive7 will be added in follow-up patches.
1593+
defm : UnsupportedSchedXSfvfexp;
1594+
defm : UnsupportedSchedXSfvfexpa;
15911595
defm : UnsupportedSchedZabha;
15921596
defm : UnsupportedSchedZbc;
15931597
defm : UnsupportedSchedZbkb;

llvm/lib/Target/RISCV/RISCVSchedule.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -523,6 +523,8 @@ include "RISCVScheduleZvk.td"
523523
// Vendor Extensions
524524
multiclass UnsupportedSchedXsf {
525525
defm : UnsupportedSchedXsfvcp;
526+
defm : UnsupportedSchedXSfvfexp;
527+
defm : UnsupportedSchedXSfvfexpa;
526528
defm : UnsupportedSchedXSfvfnrclipxfqf;
527529
defm : UnsupportedSchedXSfvfwmaccqqq;
528530
defm : UnsupportedSchedXSfvqmaccdod;

llvm/lib/Target/RISCV/RISCVScheduleXSf.td

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,3 +99,23 @@ defm : LMULWriteRes<"WriteSF_VFWMACC_QQQ", []>;
9999
defm : LMULReadAdvance<"ReadSF_VFWMACC_QQQ", 0>;
100100
} // Unsupported = true
101101
}
102+
103+
defm "" : LMULSEWSchedWritesF<"WriteSF_VFExp">;
104+
defm "" : LMULSEWSchedReadsF<"ReadSF_VFExp">;
105+
106+
multiclass UnsupportedSchedXSfvfexp {
107+
let Unsupported = true in {
108+
defm : LMULSEWWriteResF<"WriteSF_VFExp", []>;
109+
defm : LMULSEWReadAdvanceF<"ReadSF_VFExp", 0>;
110+
} // Unsupported = true
111+
}
112+
113+
defm "" : LMULSEWSchedWritesF<"WriteSF_VFExpa">;
114+
defm "" : LMULSEWSchedReadsF<"ReadSF_VFExpa">;
115+
116+
multiclass UnsupportedSchedXSfvfexpa {
117+
let Unsupported = true in {
118+
defm : LMULSEWWriteResF<"WriteSF_VFExpa", []>;
119+
defm : LMULSEWReadAdvanceF<"ReadSF_VFExpa", 0>;
120+
} // Unsupported = true
121+
}
Lines changed: 191 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,191 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
2+
; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+v,+zvfbfmin,+xsfvfbfexp16e \
3+
; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
4+
; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+v,+zvfbfmin,+xsfvfbfexp16e \
5+
; RUN: -verify-machineinstrs | FileCheck %s --check-prefixes=CHECK
6+
7+
define <vscale x 1 x bfloat> @intrinsic_sf_vfexp_v_nxv1bf16(<vscale x 1 x bfloat> %0, iXLen %1) nounwind {
8+
; CHECK-LABEL: intrinsic_sf_vfexp_v_nxv1bf16:
9+
; CHECK: # %bb.0: # %entry
10+
; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, ta, ma
11+
; CHECK-NEXT: sf.vfexp.v v8, v8
12+
; CHECK-NEXT: ret
13+
entry:
14+
%a = call <vscale x 1 x bfloat> @llvm.riscv.sf.vfexp.nxv1bf16(
15+
<vscale x 1 x bfloat> poison,
16+
<vscale x 1 x bfloat> %0,
17+
iXLen %1)
18+
19+
ret <vscale x 1 x bfloat> %a
20+
}
21+
22+
define <vscale x 2 x bfloat> @intrinsic_sf_vfexp_v_nxv2bf16(<vscale x 2 x bfloat> %0, iXLen %1) nounwind {
23+
; CHECK-LABEL: intrinsic_sf_vfexp_v_nxv2bf16:
24+
; CHECK: # %bb.0: # %entry
25+
; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, ta, ma
26+
; CHECK-NEXT: sf.vfexp.v v8, v8
27+
; CHECK-NEXT: ret
28+
entry:
29+
%a = call <vscale x 2 x bfloat> @llvm.riscv.sf.vfexp.nxv2bf16(
30+
<vscale x 2 x bfloat> poison,
31+
<vscale x 2 x bfloat> %0,
32+
iXLen %1)
33+
34+
ret <vscale x 2 x bfloat> %a
35+
}
36+
37+
define <vscale x 4 x bfloat> @intrinsic_sf_vfexp_v_nxv4bf16(<vscale x 4 x bfloat> %0, iXLen %1) nounwind {
38+
; CHECK-LABEL: intrinsic_sf_vfexp_v_nxv4bf16:
39+
; CHECK: # %bb.0: # %entry
40+
; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, ta, ma
41+
; CHECK-NEXT: sf.vfexp.v v8, v8
42+
; CHECK-NEXT: ret
43+
entry:
44+
%a = call <vscale x 4 x bfloat> @llvm.riscv.sf.vfexp.nxv4bf16(
45+
<vscale x 4 x bfloat> poison,
46+
<vscale x 4 x bfloat> %0,
47+
iXLen %1)
48+
49+
ret <vscale x 4 x bfloat> %a
50+
}
51+
52+
define <vscale x 8 x bfloat> @intrinsic_sf_vfexp_v_nxv8bf16(<vscale x 8 x bfloat> %0, iXLen %1) nounwind {
53+
; CHECK-LABEL: intrinsic_sf_vfexp_v_nxv8bf16:
54+
; CHECK: # %bb.0: # %entry
55+
; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, ta, ma
56+
; CHECK-NEXT: sf.vfexp.v v8, v8
57+
; CHECK-NEXT: ret
58+
entry:
59+
%a = call <vscale x 8 x bfloat> @llvm.riscv.sf.vfexp.nxv8bf16(
60+
<vscale x 8 x bfloat> poison,
61+
<vscale x 8 x bfloat> %0,
62+
iXLen %1)
63+
64+
ret <vscale x 8 x bfloat> %a
65+
}
66+
67+
define <vscale x 16 x bfloat> @intrinsic_sf_vfexp_v_nxv16bf16(<vscale x 16 x bfloat> %0, iXLen %1) nounwind {
68+
; CHECK-LABEL: intrinsic_sf_vfexp_v_nxv16bf16:
69+
; CHECK: # %bb.0: # %entry
70+
; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, ta, ma
71+
; CHECK-NEXT: sf.vfexp.v v8, v8
72+
; CHECK-NEXT: ret
73+
entry:
74+
%a = call <vscale x 16 x bfloat> @llvm.riscv.sf.vfexp.nxv16bf16(
75+
<vscale x 16 x bfloat> poison,
76+
<vscale x 16 x bfloat> %0,
77+
iXLen %1)
78+
79+
ret <vscale x 16 x bfloat> %a
80+
}
81+
82+
define <vscale x 32 x bfloat> @intrinsic_sf_vfexp_v_nxv32bf16(<vscale x 32 x bfloat> %0, iXLen %1) nounwind {
83+
; CHECK-LABEL: intrinsic_sf_vfexp_v_nxv32bf16:
84+
; CHECK: # %bb.0: # %entry
85+
; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, ta, ma
86+
; CHECK-NEXT: sf.vfexp.v v8, v8
87+
; CHECK-NEXT: ret
88+
entry:
89+
%a = call <vscale x 32 x bfloat> @llvm.riscv.sf.vfexp.nxv32bf16(
90+
<vscale x 32 x bfloat> poison,
91+
<vscale x 32 x bfloat> %0,
92+
iXLen %1)
93+
94+
ret <vscale x 32 x bfloat> %a
95+
}
96+
97+
define <vscale x 1 x bfloat> @intrinsic_sf_vfexp_mask_v_nxv1bf16(<vscale x 1 x bfloat> %0, <vscale x 1 x bfloat> %1, <vscale x 1 x i1> %m, iXLen %2) nounwind {
98+
; CHECK-LABEL: intrinsic_sf_vfexp_mask_v_nxv1bf16:
99+
; CHECK: # %bb.0: # %entry
100+
; CHECK-NEXT: vsetvli zero, a0, e16alt, mf4, tu, mu
101+
; CHECK-NEXT: sf.vfexp.v v8, v9, v0.t
102+
; CHECK-NEXT: ret
103+
entry:
104+
%a = call <vscale x 1 x bfloat> @llvm.riscv.sf.vfexp.mask.nxv1bf16(
105+
<vscale x 1 x bfloat> %0,
106+
<vscale x 1 x bfloat> %1,
107+
<vscale x 1 x i1> %m,
108+
iXLen %2, iXLen 0)
109+
110+
ret <vscale x 1 x bfloat> %a
111+
}
112+
113+
define <vscale x 2 x bfloat> @intrinsic_sf_vfexp_mask_v_nxv2bf16(<vscale x 2 x bfloat> %0, <vscale x 2 x bfloat> %1, <vscale x 2 x i1> %m, iXLen %2) nounwind {
114+
; CHECK-LABEL: intrinsic_sf_vfexp_mask_v_nxv2bf16:
115+
; CHECK: # %bb.0: # %entry
116+
; CHECK-NEXT: vsetvli zero, a0, e16alt, mf2, tu, mu
117+
; CHECK-NEXT: sf.vfexp.v v8, v9, v0.t
118+
; CHECK-NEXT: ret
119+
entry:
120+
%a = call <vscale x 2 x bfloat> @llvm.riscv.sf.vfexp.mask.nxv2bf16(
121+
<vscale x 2 x bfloat> %0,
122+
<vscale x 2 x bfloat> %1,
123+
<vscale x 2 x i1> %m,
124+
iXLen %2, iXLen 0)
125+
126+
ret <vscale x 2 x bfloat> %a
127+
}
128+
129+
define <vscale x 4 x bfloat> @intrinsic_sf_vfexp_mask_v_nxv4bf16(<vscale x 4 x bfloat> %0, <vscale x 4 x bfloat> %1, <vscale x 4 x i1> %m, iXLen %2) nounwind {
130+
; CHECK-LABEL: intrinsic_sf_vfexp_mask_v_nxv4bf16:
131+
; CHECK: # %bb.0: # %entry
132+
; CHECK-NEXT: vsetvli zero, a0, e16alt, m1, tu, mu
133+
; CHECK-NEXT: sf.vfexp.v v8, v9, v0.t
134+
; CHECK-NEXT: ret
135+
entry:
136+
%a = call <vscale x 4 x bfloat> @llvm.riscv.sf.vfexp.mask.nxv4bf16(
137+
<vscale x 4 x bfloat> %0,
138+
<vscale x 4 x bfloat> %1,
139+
<vscale x 4 x i1> %m,
140+
iXLen %2, iXLen 0)
141+
142+
ret <vscale x 4 x bfloat> %a
143+
}
144+
145+
define <vscale x 8 x bfloat> @intrinsic_sf_vfexp_mask_v_nxv8bf16(<vscale x 8 x bfloat> %0, <vscale x 8 x bfloat> %1, <vscale x 8 x i1> %m, iXLen %2) nounwind {
146+
; CHECK-LABEL: intrinsic_sf_vfexp_mask_v_nxv8bf16:
147+
; CHECK: # %bb.0: # %entry
148+
; CHECK-NEXT: vsetvli zero, a0, e16alt, m2, tu, mu
149+
; CHECK-NEXT: sf.vfexp.v v8, v10, v0.t
150+
; CHECK-NEXT: ret
151+
entry:
152+
%a = call <vscale x 8 x bfloat> @llvm.riscv.sf.vfexp.mask.nxv8bf16(
153+
<vscale x 8 x bfloat> %0,
154+
<vscale x 8 x bfloat> %1,
155+
<vscale x 8 x i1> %m,
156+
iXLen %2, iXLen 0)
157+
158+
ret <vscale x 8 x bfloat> %a
159+
}
160+
161+
define <vscale x 16 x bfloat> @intrinsic_sf_vfexp_mask_v_nxv16bf16(<vscale x 16 x bfloat> %0, <vscale x 16 x bfloat> %1, <vscale x 16 x i1> %m, iXLen %2) nounwind {
162+
; CHECK-LABEL: intrinsic_sf_vfexp_mask_v_nxv16bf16:
163+
; CHECK: # %bb.0: # %entry
164+
; CHECK-NEXT: vsetvli zero, a0, e16alt, m4, tu, mu
165+
; CHECK-NEXT: sf.vfexp.v v8, v12, v0.t
166+
; CHECK-NEXT: ret
167+
entry:
168+
%a = call <vscale x 16 x bfloat> @llvm.riscv.sf.vfexp.mask.nxv16bf16(
169+
<vscale x 16 x bfloat> %0,
170+
<vscale x 16 x bfloat> %1,
171+
<vscale x 16 x i1> %m,
172+
iXLen %2, iXLen 0)
173+
174+
ret <vscale x 16 x bfloat> %a
175+
}
176+
177+
define <vscale x 32 x bfloat> @intrinsic_sf_vfexp_mask_v_nxv32bf16(<vscale x 32 x bfloat> %0, <vscale x 32 x bfloat> %1, <vscale x 32 x i1> %m, iXLen %2) nounwind {
178+
; CHECK-LABEL: intrinsic_sf_vfexp_mask_v_nxv32bf16:
179+
; CHECK: # %bb.0: # %entry
180+
; CHECK-NEXT: vsetvli zero, a0, e16alt, m8, tu, mu
181+
; CHECK-NEXT: sf.vfexp.v v8, v16, v0.t
182+
; CHECK-NEXT: ret
183+
entry:
184+
%a = call <vscale x 32 x bfloat> @llvm.riscv.sf.vfexp.mask.nxv32bf16(
185+
<vscale x 32 x bfloat> %0,
186+
<vscale x 32 x bfloat> %1,
187+
<vscale x 32 x i1> %m,
188+
iXLen %2, iXLen 0)
189+
190+
ret <vscale x 32 x bfloat> %a
191+
}

0 commit comments

Comments
 (0)