Skip to content

Commit 5719e4b

Browse files
SuperFluffygnzlbg
authored andcommitted
Implement tanh
This implements tanh for packed vectors. This is primarily interesting when using sleef-sys for its simd implemenations of tanh. Since llvm does not contain tanh intrinsics, the libm implementation is used for primitives, and packed vectors are transmuted into slices before applying the libm tanh to each of its elements.
1 parent beae1a0 commit 5719e4b

File tree

6 files changed

+153
-1
lines changed

6 files changed

+153
-1
lines changed

Cargo.toml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ maintenance = { status = "experimental" }
2323
[dependencies]
2424
cfg-if = "^0.1.6"
2525
core_arch = { version = "^0.1.4", optional = true }
26+
libm = "0.1.2"
2627

2728
[features]
2829
default = []
@@ -39,4 +40,4 @@ optional = true
3940

4041
[target.wasm32-unknown-unknown.dev-dependencies]
4142
wasm-bindgen = "=0.2.19"
42-
wasm-bindgen-test = "=0.2.19"
43+
wasm-bindgen-test = "=0.2.19"

src/api.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -213,6 +213,7 @@ macro_rules! impl_f {
213213
impl_math_float_sin!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
214214
impl_math_float_sqrt!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
215215
impl_math_float_sqrte!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
216+
impl_math_float_tanh!([$elem_ty; $elem_n]: $tuple_id | $test_tt);
216217
impl_cmp_vertical!(
217218
[$elem_ty; $elem_n]: $tuple_id, $mask_ty, false, (1., 0.)
218219
| $test_tt

src/api/math/float.rs

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,9 @@ mod sqrt;
3939
#[macro_use]
4040
mod sqrte;
4141

42+
#[macro_use]
43+
mod tanh;
44+
4245
macro_rules! impl_float_category {
4346
([$elem_ty:ident; $elem_count:expr]: $id:ident, $mask_ty:ident) => {
4447
impl $id {

src/api/math/float/tanh.rs

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
//! Implements vertical (lane-wise) floating-point `tanh`.
2+
3+
macro_rules! impl_math_float_tanh {
4+
([$elem_ty:ident; $elem_count:expr]: $id:ident | $test_tt:tt) => {
5+
impl $id {
6+
/// Tanh.
7+
#[inline]
8+
pub fn tanh(self) -> Self {
9+
use crate::codegen::math::float::tanh::Tanh;
10+
Tanh::tanh(self)
11+
}
12+
}
13+
14+
test_if!{
15+
$test_tt:
16+
paste::item! {
17+
pub mod [<$id _math_tanh>] {
18+
use super::*;
19+
#[cfg_attr(not(target_arch = "wasm32"), test)] #[cfg_attr(target_arch = "wasm32", wasm_bindgen_test)]
20+
fn tanh() {
21+
let z = $id::splat(0 as $elem_ty);
22+
23+
assert_eq!(z, z.tanh());
24+
}
25+
}
26+
}
27+
}
28+
};
29+
}

src/codegen/math/float.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,3 +16,4 @@ crate mod sin_cos_pi;
1616
crate mod sin_pi;
1717
crate mod sqrt;
1818
crate mod sqrte;
19+
crate mod tanh;

src/codegen/math/float/tanh.rs

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
//! Vertical floating-point `tanh`
2+
#![allow(unused)]
3+
4+
// FIXME 64-bit 1 elem vectors tanh
5+
6+
use crate::*;
7+
8+
crate trait Tanh {
9+
fn tanh(self) -> Self;
10+
}
11+
12+
macro_rules! define_tanh {
13+
14+
($name:ident, $basetype:ty, $simdtype:ty, $lanes:expr, $trait:path) => {
15+
fn $name(x: $simdtype) -> $simdtype {
16+
use core::intrinsics::transmute;
17+
let mut buf: [$basetype; $lanes] = unsafe { transmute(x) };
18+
for elem in &mut buf {
19+
*elem = <$basetype as $trait>::tanh(*elem);
20+
}
21+
unsafe { transmute(buf) }
22+
}
23+
};
24+
25+
(f32 => $name:ident, $type:ty, $lanes:expr) => {
26+
define_tanh!($name, f32, $type, $lanes, libm::F32Ext);
27+
};
28+
29+
(f64 => $name:ident, $type:ty, $lanes:expr) => {
30+
define_tanh!($name, f64, $type, $lanes, libm::F64Ext);
31+
};
32+
}
33+
34+
// llvm does not seem to expose the hyperbolic versions of trigonometric functions;
35+
// we thus call the classical rust versions on all of them (which stem from cmath).
36+
define_tanh!(f32 => tanh_v2f32, f32x2, 2);
37+
define_tanh!(f32 => tanh_v4f32, f32x4, 4);
38+
define_tanh!(f32 => tanh_v8f32, f32x8, 8);
39+
define_tanh!(f32 => tanh_v16f32, f32x16, 16);
40+
41+
define_tanh!(f64 => tanh_v2f64, f64x2, 2);
42+
define_tanh!(f64 => tanh_v4f64, f64x4, 4);
43+
define_tanh!(f64 => tanh_v8f64, f64x8, 8);
44+
45+
fn tanh_f32(x: f32) -> f32 {
46+
libm::F32Ext::tanh(x)
47+
}
48+
49+
fn tanh_f64(x: f64) -> f64 {
50+
libm::F64Ext::tanh(x)
51+
}
52+
53+
gen_unary_impl_table!(Tanh, tanh);
54+
55+
cfg_if! {
56+
if #[cfg(target_arch = "s390x")] {
57+
// FIXME: https://github.com/rust-lang-nursery/packed_simd/issues/14
58+
impl_unary!(f32x2[f32; 2]: tanh_f32);
59+
impl_unary!(f32x4[f32; 4]: tanh_f32);
60+
impl_unary!(f32x8[f32; 8]: tanh_f32);
61+
impl_unary!(f32x16[f32; 16]: tanh_f32);
62+
63+
impl_unary!(f64x2[f64; 2]: tanh_f64);
64+
impl_unary!(f64x4[f64; 4]: tanh_f64);
65+
impl_unary!(f64x8[f64; 8]: tanh_f64);
66+
} else if #[cfg(all(target_arch = "x86_64", feature = "sleef-sys"))] {
67+
use sleef_sys::*;
68+
cfg_if! {
69+
if #[cfg(target_feature = "avx2")] {
70+
impl_unary!(f32x2[t => f32x4]: Sleef_tanhf4_u10avx2128);
71+
impl_unary!(f32x16[h => f32x8]: Sleef_tanhf8_u10avx2);
72+
impl_unary!(f64x8[h => f64x4]: Sleef_tanhd4_u10avx2);
73+
74+
impl_unary!(f32x4: Sleef_tanhf4_u10avx2128);
75+
impl_unary!(f32x8: Sleef_tanhf8_u10avx2);
76+
impl_unary!(f64x2: Sleef_tanhd2_u10avx2128);
77+
impl_unary!(f64x4: Sleef_tanhd4_u10avx2);
78+
} else if #[cfg(target_feature = "avx")] {
79+
impl_unary!(f32x2[t => f32x4]: Sleef_tanhf4_u10sse4);
80+
impl_unary!(f32x16[h => f32x8]: Sleef_tanhf8_u10avx);
81+
impl_unary!(f64x8[h => f64x4]: Sleef_tanhd4_u10avx);
82+
83+
impl_unary!(f32x4: Sleef_tanhf4_u10sse4);
84+
impl_unary!(f32x8: Sleef_tanhf8_u10avx);
85+
impl_unary!(f64x2: Sleef_tanhd2_u10sse4);
86+
impl_unary!(f64x4: Sleef_tanhd4_u10avx);
87+
} else if #[cfg(target_feature = "sse4.2")] {
88+
impl_unary!(f32x2[t => f32x4]: Sleef_tanhf4_u10sse4);
89+
impl_unary!(f32x16[q => f32x4]: Sleef_tanhf4_u10sse4);
90+
impl_unary!(f64x8[q => f64x2]: Sleef_tanhd2_u10sse4);
91+
92+
impl_unary!(f32x4: Sleef_tanhf4_u10sse4);
93+
impl_unary!(f32x8[h => f32x4]: Sleef_tanhf4_u10sse4);
94+
impl_unary!(f64x2: Sleef_tanhd2_u10sse4);
95+
impl_unary!(f64x4[h => f64x2]: Sleef_tanhd2_u10sse4);
96+
} else {
97+
impl_unary!(f32x2[f32; 2]: tanh_f32);
98+
impl_unary!(f32x16: tanh_v16f32);
99+
impl_unary!(f64x8: tanh_v8f64);
100+
101+
impl_unary!(f32x4: tanh_v4f32);
102+
impl_unary!(f32x8: tanh_v8f32);
103+
impl_unary!(f64x2: tanh_v2f64);
104+
impl_unary!(f64x4: tanh_v4f64);
105+
}
106+
}
107+
} else {
108+
impl_unary!(f32x2[f32; 2]: tanh_f32);
109+
impl_unary!(f32x4: tanh_v4f32);
110+
impl_unary!(f32x8: tanh_v8f32);
111+
impl_unary!(f32x16: tanh_v16f32);
112+
113+
impl_unary!(f64x2: tanh_v2f64);
114+
impl_unary!(f64x4: tanh_v4f64);
115+
impl_unary!(f64x8: tanh_v8f64);
116+
}
117+
}

0 commit comments

Comments
 (0)