Skip to content

Commit 82f4279

Browse files
committed
add support for tanh.approx.f16/f16x2
1 parent 9adcce1 commit 82f4279

File tree

1 file changed

+25
-3
lines changed

1 file changed

+25
-3
lines changed

llvm/lib/Target/NVPTX/NVPTXInstrInfo.td

Lines changed: 25 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@ class callSubtarget<string SubtargetMethod> : Predicate<"Subtarget->" # Subtarge
121121
def hasAtomAddF64 : Predicate<"Subtarget->hasAtomAddF64()">;
122122
def hasAtomScope : Predicate<"Subtarget->hasAtomScope()">;
123123
def hasAtomSemantics : Predicate<"Subtarget->hasAtomSemantics()">;
124+
def hasMemoryOrdering : Predicate<"Subtarget->hasMemoryOrdering()">;
124125
def hasAtomBitwise64 : Predicate<"Subtarget->hasAtomBitwise64()">;
125126
def hasAtomMinMax64 : Predicate<"Subtarget->hasAtomMinMax64()">;
126127
def hasAtomSwap128 : Predicate<"Subtarget->hasAtomSwap128()">;
@@ -1226,11 +1227,32 @@ def COS_APPROX_f32 :
12261227
BasicFlagsNVPTXInst<(outs B32:$dst), (ins B32:$src), (ins FTZFlag:$ftz),
12271228
"cos.approx$ftz.f32",
12281229
[(set f32:$dst, (UnaryOpAllowsApproxFn<fcos> f32:$src))]>;
1229-
def TANH_APPROX_f32 :
1230-
BasicNVPTXInst<(outs B32:$dst), (ins B32:$src), "tanh.approx.f32",
1231-
[(set f32:$dst, (UnaryOpAllowsApproxFn<ftanh> f32:$src))]>,
1230+
1231+
// NOTE: tanh.approx doesn't support the FTZ flag for f16/f16x2
1232+
def TANH_APPROX_f16 :
1233+
BasicNVPTXInst<(outs B16:$dst), (ins B16:$src), "tanh.approx.f16",
1234+
[(set f16:$dst, (UnaryOpAllowsApproxFn<ftanh> f16:$src))]>,
1235+
Requires<[hasPTX<70>, hasSM<75>]>;
1236+
1237+
def TANH_APPROX_f16x2 :
1238+
BasicNVPTXInst<(outs B32:$dst), (ins B32:$src), "tanh.approx.f16x2",
1239+
[(set v2f16:$dst, (UnaryOpAllowsApproxFn<ftanh> v2f16:$src))]>,
12321240
Requires<[hasPTX<70>, hasSM<75>]>;
12331241

1242+
def TANH_APPROX_f32 :
1243+
BasicFlagsNVPTXInst<(outs B32:$dst), (ins B32:$src), (ins FTZFlag:$ftz),
1244+
"tanh.approx$ftz.f32",
1245+
[(set f32:$dst, (UnaryOpAllowsApproxFn<ftanh> f32:$src))]>,
1246+
Requires<[hasPTX<70>, hasSM<75>]>;
1247+
1248+
// Patterns for NVVM tanh intrinsics
1249+
def : Pat<(f16 (int_nvvm_tanh_approx_f16 f16:$a)),
1250+
(TANH_APPROX_f16 f16:$a)>;
1251+
def : Pat<(v2f16 (int_nvvm_tanh_approx_f16x2 v2f16:$a)),
1252+
(TANH_APPROX_f16x2 v2f16:$a)>;
1253+
def : Pat<(f32 (int_nvvm_tanh_approx_f f32:$a)),
1254+
(TANH_APPROX_f32 f32:$a, 0)>;
1255+
12341256
//-----------------------------------
12351257
// Bitwise operations
12361258
//-----------------------------------

0 commit comments

Comments
 (0)