1616
1717// ================================================================================
1818// this file has been auto-generated, do not modify its contents!
19- // date: 2024-11-26 13:52:06.286983
20- // git hash: c4c6ac09808d14b5407afb06ecdecd235cd50ed3
19+ // date: 2024-11-26 14:20:49.081641
20+ // git hash: 76c695a4cc5b13b3d5841ac5085574a5b47a299c
2121// ================================================================================
2222
2323#ifndef KERNEL_FLOAT_MACROS_H
@@ -824,6 +824,13 @@ using default_policy = KERNEL_FLOAT_POLICY;
824824
825825namespace detail {
826826
827+ template <typename F, typename Output, typename ... Args>
828+ struct invoke_impl {
829+ KERNEL_FLOAT_INLINE static Output call (F fun, Args... args) {
830+ return fun (args...);
831+ }
832+ };
833+
827834//
828835template <typename Policy, typename F, size_t N, typename Output, typename ... Args>
829836struct apply_fallback_impl {
@@ -853,13 +860,6 @@ template<int Level, typename F, size_t N, typename Output, typename... Args>
853860struct apply_fallback_impl <approx_level_policy<Level>, F, N, Output, Args...>:
854861 apply_impl<approx_policy, F, N, Output, Args...> {};
855862
856- template <typename F, typename Output, typename ... Args>
857- struct invoke_impl {
858- KERNEL_FLOAT_INLINE static Output call (F fun, Args... args) {
859- return fun (args...);
860- }
861- };
862-
863863// Only for `accurate_policy` do we implement `apply_impl`, the others will fall back to `apply_base_impl`.
864864template <typename F, size_t N, typename Output, typename ... Args>
865865struct apply_impl <accurate_policy, F, N, Output, Args...> {
@@ -1416,7 +1416,7 @@ KERNEL_FLOAT_DEFINE_UNARY_FUN_FAST(rsqrt)
14161416 }
14171417
14181418KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_FUN (float , exp, __expf(input))
1419- KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_FUN(float , exp2, __exp2f(input))
1419+ // KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_FUN(float, exp2, __exp2f(input)) // Seems to be missing?
14201420KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_FUN(float , exp10, __exp10f(input))
14211421
14221422KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_FUN(float , log, __logf(input))
@@ -1442,19 +1442,21 @@ KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_FUN(float, tan, __tanf(input))
14421442KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX (double , rcp, " rcp.approx.ftz.f64" , " d" )
14431443KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(double , rsqrt, " rsqrt.approx.f64" , " d" )
14441444
1445+ KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float , exp2, " ex2.approx.f32" , " f" )
14451446KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float , sqrt, " sqrt.approx.f32" , " f" )
14461447KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float , rcp, " rcp.approx.f32" , " f" )
14471448KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float , rsqrt, " rsqrt.approx.f32" , " f" )
14481449KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float , tanh, " tanh.approx.f32" , " f" )
1450+
1451+ // These are no longer necessary due to the KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_FUN above
1452+ // KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float, sin, "sin.approx.f32", "f")
1453+ // KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float, cos, "cos.approx.f32", "f")
1454+ // KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float, log2, "lg2.approx.f32", "f")
14491455#endif
14501456
14511457#define KERNEL_FLOAT_FAST_F32_MAP (F ) \
14521458 F (exp) F(exp2) F(exp10) F(log) F(log2) F(log10) F(sin) F(cos) F(tan) F(rcp) F(rsqrt) F(sqrt)
14531459
1454- // KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float, sin, "sin.approx.f32", "f")
1455- // KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float, cos, "cos.approx.f32", "f")
1456- // KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float, exp2, "ex2.approx.f32", "f")
1457- // KERNEL_FLOAT_DEFINE_UNARY_FAST_IMPL_PTX(float, log2, "lg2.approx.f32", "f")
14581460#else
14591461#define KERNEL_FLOAT_FAST_F32_MAP (F )
14601462#endif
0 commit comments