|
4 | 4 | /// `$lanes` of float `$type`, which uses `$bits_ty` as its binary
|
5 | 5 | /// representation. Called from `define_float_vector!`.
|
6 | 6 | macro_rules! impl_float_vector {
|
7 |
| - { $name:ident, $type:ty, $bits_ty:ident, $mask_ty:ident, $mask_impl_ty:ident } => { |
| 7 | + { $name:ident, $type:ident, $bits_ty:ident, $mask_ty:ident, $mask_impl_ty:ident } => { |
8 | 8 | impl_vector! { $name, $type }
|
9 | 9 | impl_float_reductions! { $name, $type }
|
10 | 10 |
|
@@ -36,13 +36,44 @@ macro_rules! impl_float_vector {
|
36 | 36 | unsafe { crate::intrinsics::simd_fabs(self) }
|
37 | 37 | }
|
38 | 38 |
|
| 39 | + /// Fused multiply-add. Computes `(self * a) + b` with only one rounding error, |
| 40 | + /// yielding a more accurate result than an unfused multiply-add. |
| 41 | + /// |
| 42 | + /// Using `mul_add` *may* be more performant than an unfused multiply-add if the target |
| 43 | + /// architecture has a dedicated `fma` CPU instruction. However, this is not always |
| 44 | + /// true, and will be heavily dependent on designing algorithms with specific target |
| 45 | + /// hardware in mind. |
| 46 | + #[inline] |
| 47 | + pub fn mul_add(self, a: Self, b: Self) -> Self { |
| 48 | + unsafe { crate::intrinsics::simd_fma(self, a, b) } |
| 49 | + } |
| 50 | + |
39 | 51 | /// Produces a vector where every lane has the square root value
|
40 | 52 | /// of the equivalently-indexed lane in `self`
|
41 | 53 | #[inline]
|
42 | 54 | #[cfg(feature = "std")]
|
43 | 55 | pub fn sqrt(self) -> Self {
|
44 | 56 | unsafe { crate::intrinsics::simd_fsqrt(self) }
|
45 | 57 | }
|
| 58 | + |
| 59 | + /// Takes the reciprocal (inverse) of each lane, `1/x`. |
| 60 | + #[inline] |
| 61 | + pub fn recip(self) -> Self { |
| 62 | + Self::splat(1.0) / self |
| 63 | + } |
| 64 | + |
| 65 | + /// Converts each lane from radians to degrees. |
| 66 | + #[inline] |
| 67 | + pub fn to_degrees(self) -> Self { |
| 68 | + // to_degrees uses a special constant for better precision, so extract that constant |
| 69 | + self * Self::splat($type::to_degrees(1.)) |
| 70 | + } |
| 71 | + |
| 72 | + /// Converts each lane from degrees to radians. |
| 73 | + #[inline] |
| 74 | + pub fn to_radians(self) -> Self { |
| 75 | + self * Self::splat($type::to_radians(1.)) |
| 76 | + } |
46 | 77 | }
|
47 | 78 |
|
48 | 79 | impl<const LANES: usize> $name<LANES>
|
@@ -97,6 +128,67 @@ macro_rules! impl_float_vector {
|
97 | 128 | pub fn is_normal(self) -> crate::$mask_ty<LANES> {
|
98 | 129 | !(self.abs().lanes_eq(Self::splat(0.0)) | self.is_nan() | self.is_subnormal() | self.is_infinite())
|
99 | 130 | }
|
| 131 | + |
| 132 | + /// Replaces each lane with a number that represents its sign. |
| 133 | + /// |
| 134 | + /// * `1.0` if the number is positive, `+0.0`, or `INFINITY` |
| 135 | + /// * `-1.0` if the number is negative, `-0.0`, or `NEG_INFINITY` |
| 136 | + /// * `NAN` if the number is `NAN` |
| 137 | + #[inline] |
| 138 | + pub fn signum(self) -> Self { |
| 139 | + self.is_nan().select(Self::splat($type::NAN), Self::splat(1.0).copysign(self)) |
| 140 | + } |
| 141 | + |
| 142 | + /// Returns each lane with the magnitude of `self` and the sign of `sign`. |
| 143 | + /// |
| 144 | + /// If any lane is a `NAN`, then a `NAN` with the sign of `sign` is returned. |
| 145 | + #[inline] |
| 146 | + pub fn copysign(self, sign: Self) -> Self { |
| 147 | + let sign_bit = sign.to_bits() & Self::splat(-0.).to_bits(); |
| 148 | + let magnitude = self.to_bits() & !Self::splat(-0.).to_bits(); |
| 149 | + Self::from_bits(sign_bit | magnitude) |
| 150 | + } |
| 151 | + |
| 152 | + /// Returns the minimum of each lane. |
| 153 | + /// |
| 154 | + /// If one of the values is `NAN`, then the other value is returned. |
| 155 | + #[inline] |
| 156 | + pub fn min(self, other: Self) -> Self { |
| 157 | + // TODO consider using an intrinsic |
| 158 | + self.is_nan().select( |
| 159 | + other, |
| 160 | + self.lanes_ge(other).select(other, self) |
| 161 | + ) |
| 162 | + } |
| 163 | + |
| 164 | + /// Returns the maximum of each lane. |
| 165 | + /// |
| 166 | + /// If one of the values is `NAN`, then the other value is returned. |
| 167 | + #[inline] |
| 168 | + pub fn max(self, other: Self) -> Self { |
| 169 | + // TODO consider using an intrinsic |
| 170 | + self.is_nan().select( |
| 171 | + other, |
| 172 | + self.lanes_le(other).select(other, self) |
| 173 | + ) |
| 174 | + } |
| 175 | + |
| 176 | + /// Restrict each lane to a certain interval unless it is NaN. |
| 177 | + /// |
| 178 | + /// For each lane in `self`, returns the corresponding lane in `max` if the lane is |
| 179 | + /// greater than `max`, and the corresponding lane in `min` if the lane is less |
| 180 | + /// than `min`. Otherwise returns the lane in `self`. |
| 181 | + #[inline] |
| 182 | + pub fn clamp(self, min: Self, max: Self) -> Self { |
| 183 | + assert!( |
| 184 | + min.lanes_le(max).all(), |
| 185 | + "each lane in `min` must be less than or equal to the corresponding lane in `max`", |
| 186 | + ); |
| 187 | + let mut x = self; |
| 188 | + x = x.lanes_lt(min).select(min, x); |
| 189 | + x = x.lanes_gt(max).select(max, x); |
| 190 | + x |
| 191 | + } |
100 | 192 | }
|
101 | 193 | };
|
102 | 194 | }
|
|
0 commit comments