@@ -121,6 +121,7 @@ class callSubtarget<string SubtargetMethod> : Predicate<"Subtarget->" # Subtarge
121121def hasAtomAddF64 : Predicate<"Subtarget->hasAtomAddF64()">;
122122def hasAtomScope : Predicate<"Subtarget->hasAtomScope()">;
123123def hasAtomSemantics : Predicate<"Subtarget->hasAtomSemantics()">;
124+ def hasMemoryOrdering : Predicate<"Subtarget->hasMemoryOrdering()">;
124125def hasAtomBitwise64 : Predicate<"Subtarget->hasAtomBitwise64()">;
125126def hasAtomMinMax64 : Predicate<"Subtarget->hasAtomMinMax64()">;
126127def hasAtomSwap128 : Predicate<"Subtarget->hasAtomSwap128()">;
@@ -1226,11 +1227,32 @@ def COS_APPROX_f32 :
12261227 BasicFlagsNVPTXInst<(outs B32:$dst), (ins B32:$src), (ins FTZFlag:$ftz),
12271228 "cos.approx$ftz.f32",
12281229 [(set f32:$dst, (UnaryOpAllowsApproxFn<fcos> f32:$src))]>;
1229- def TANH_APPROX_f32 :
1230- BasicNVPTXInst<(outs B32:$dst), (ins B32:$src), "tanh.approx.f32",
1231- [(set f32:$dst, (UnaryOpAllowsApproxFn<ftanh> f32:$src))]>,
1230+
1231+ // NOTE: tanh.approx doesn't support the FTZ flag for f16/f16x2
1232+ def TANH_APPROX_f16 :
1233+ BasicNVPTXInst<(outs B16:$dst), (ins B16:$src), "tanh.approx.f16",
1234+ [(set f16:$dst, (UnaryOpAllowsApproxFn<ftanh> f16:$src))]>,
1235+ Requires<[hasPTX<70>, hasSM<75>]>;
1236+
1237+ def TANH_APPROX_f16x2 :
1238+ BasicNVPTXInst<(outs B32:$dst), (ins B32:$src), "tanh.approx.f16x2",
1239+ [(set v2f16:$dst, (UnaryOpAllowsApproxFn<ftanh> v2f16:$src))]>,
12321240 Requires<[hasPTX<70>, hasSM<75>]>;
12331241
1242+ def TANH_APPROX_f32 :
1243+ BasicFlagsNVPTXInst<(outs B32:$dst), (ins B32:$src), (ins FTZFlag:$ftz),
1244+ "tanh.approx$ftz.f32",
1245+ [(set f32:$dst, (UnaryOpAllowsApproxFn<ftanh> f32:$src))]>,
1246+ Requires<[hasPTX<70>, hasSM<75>]>;
1247+
1248+ // Patterns for NVVM tanh intrinsics
1249+ def : Pat<(f16 (int_nvvm_tanh_approx_f16 f16:$a)),
1250+ (TANH_APPROX_f16 f16:$a)>;
1251+ def : Pat<(v2f16 (int_nvvm_tanh_approx_f16x2 v2f16:$a)),
1252+ (TANH_APPROX_f16x2 v2f16:$a)>;
1253+ def : Pat<(f32 (int_nvvm_tanh_approx_f f32:$a)),
1254+ (TANH_APPROX_f32 f32:$a, 0)>;
1255+
12341256//-----------------------------------
12351257// Bitwise operations
12361258//-----------------------------------
0 commit comments