|
16 | 16 |
|
17 | 17 | //================================================================================ |
18 | 18 | // this file has been auto-generated, do not modify its contents! |
19 | | -// date: 2024-05-17 10:55:41.948281 |
20 | | -// git hash: 41246ab6db9fcc24639342c439e606ba143ee346 |
| 19 | +// date: 2024-05-17 11:44:08.292272 |
| 20 | +// git hash: c0c7d100e3ee5bc187211e3d76b1fccc73c2fa5e |
21 | 21 | //================================================================================ |
22 | 22 |
|
23 | 23 | #ifndef KERNEL_FLOAT_MACROS_H |
@@ -1890,6 +1890,7 @@ struct apply_fastmath_impl<ops::divide<T>, N, T, T, T> { |
1890 | 1890 | call(ops::divide<T> fun, T* result, const T* lhs, const T* rhs) { |
1891 | 1891 | T rhs_rcp[N]; |
1892 | 1892 |
|
| 1893 | + // Fast way to perform division is to multiply by the reciprocal |
1893 | 1894 | apply_fastmath_impl<ops::rcp<T>, N, T, T, T>::call({}, rhs_rcp, rhs); |
1894 | 1895 | apply_fastmath_impl<ops::multiply<T>, N, T, T, T>::call({}, result, lhs, rhs_rcp); |
1895 | 1896 | } |
@@ -3430,7 +3431,7 @@ struct fma<double> { |
3430 | 3431 | } // namespace ops |
3431 | 3432 |
|
3432 | 3433 | /** |
3433 | | - * Computes the result of `a * b + c`. This is done in a single operation if possible. |
| 3434 | + * Computes the result of `a * b + c`. This is done in a single operation if possible for the given vector type. |
3434 | 3435 | */ |
3435 | 3436 | template< |
3436 | 3437 | typename A, |
@@ -3739,6 +3740,21 @@ struct vector: public S { |
3739 | 3740 | KERNEL_FLOAT_INLINE void for_each(F fun) const { |
3740 | 3741 | return kernel_float::for_each(*this, std::move(fun)); |
3741 | 3742 | } |
| 3743 | + |
| 3744 | + /** |
| 3745 | + * Returns the result of `*this + lhs * rhs`. |
| 3746 | + * |
| 3747 | + * The operation is performed using a single `kernel_float::fma` call, which may be faster then perform |
| 3748 | + * the addition and multiplication separately. |
| 3749 | + */ |
| 3750 | + template< |
| 3751 | + typename L, |
| 3752 | + typename R, |
| 3753 | + typename T2 = promote_t<T, vector_value_type<L>, vector_value_type<R>>, |
| 3754 | + typename E2 = broadcast_extent<E, vector_extent_type<L>, vector_extent_type<R>>> |
| 3755 | + KERNEL_FLOAT_INLINE vector<T2, E2> fma(const L& lhs, const R& rhs) const { |
| 3756 | + return ::kernel_float::fma(lhs, rhs, *this); |
| 3757 | + } |
3742 | 3758 | }; |
3743 | 3759 |
|
3744 | 3760 | /** |
|
0 commit comments