Skip to content

Commit 687e37e

Browse files
authored
Merge pull request opencv#25892 from WanliZhong:v_sincos
Add support for v_sin and v_cos (Sine and Cosine) opencv#25892 This PR aims to implement `v_sincos(v_float16 x)`, `v_sincos(v_float32 x)` and `v_sincos(v_float64 x)`. Merged after opencv#25891 and opencv#26023 **NOTE:** Also, the patch changes already added `v_exp`, `v_log` and `v_erf` to pass parameters by reference instead of by value, to match API of other universal intrinsics. TODO: - [x] double and half float precision - [x] tests for them - [x] doc to explain the implementation ### Pull Request Readiness Checklist See details at https://github.com/opencv/opencv/wiki/How_to_contribute#making-a-good-pull-request - [x] I agree to contribute to the project under Apache 2 License. - [x] To the best of my knowledge, the proposed patch is not based on a code under GPL or another license that is incompatible with OpenCV - [x] The PR is proposed to the proper branch - [ ] There is a reference to the original bug report and related work - [ ] There is accuracy test, performance test and test data in opencv_extra repository, if applicable Patch to opencv_extra has the same branch name. - [ ] The feature is well documented and sample code can be built with the project CMake
1 parent 69803e7 commit 687e37e

14 files changed

+509
-69
lines changed

modules/core/include/opencv2/core/hal/intrin_avx.hpp

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3167,12 +3167,18 @@ inline void v_pack_store(hfloat* ptr, const v_float32x8& a)
31673167
inline void v256_cleanup() { _mm256_zeroall(); }
31683168

31693169
#include "intrin_math.hpp"
3170-
inline v_float32x8 v_exp(v_float32x8 x) { return v_exp_default_32f<v_float32x8, v_int32x8>(x); }
3171-
inline v_float32x8 v_log(v_float32x8 x) { return v_log_default_32f<v_float32x8, v_int32x8>(x); }
3172-
inline v_float32x8 v_erf(v_float32x8 x) { return v_erf_default_32f<v_float32x8, v_int32x8>(x); }
3173-
3174-
inline v_float64x4 v_exp(v_float64x4 x) { return v_exp_default_64f<v_float64x4, v_int64x4>(x); }
3175-
inline v_float64x4 v_log(v_float64x4 x) { return v_log_default_64f<v_float64x4, v_int64x4>(x); }
3170+
inline v_float32x8 v_exp(const v_float32x8& x) { return v_exp_default_32f<v_float32x8, v_int32x8>(x); }
3171+
inline v_float32x8 v_log(const v_float32x8& x) { return v_log_default_32f<v_float32x8, v_int32x8>(x); }
3172+
inline void v_sincos(const v_float32x8& x, v_float32x8& s, v_float32x8& c) { v_sincos_default_32f<v_float32x8, v_int32x8>(x, s, c); }
3173+
inline v_float32x8 v_sin(const v_float32x8& x) { return v_sin_default_32f<v_float32x8, v_int32x8>(x); }
3174+
inline v_float32x8 v_cos(const v_float32x8& x) { return v_cos_default_32f<v_float32x8, v_int32x8>(x); }
3175+
inline v_float32x8 v_erf(const v_float32x8& x) { return v_erf_default_32f<v_float32x8, v_int32x8>(x); }
3176+
3177+
inline v_float64x4 v_exp(const v_float64x4& x) { return v_exp_default_64f<v_float64x4, v_int64x4>(x); }
3178+
inline v_float64x4 v_log(const v_float64x4& x) { return v_log_default_64f<v_float64x4, v_int64x4>(x); }
3179+
inline void v_sincos(const v_float64x4& x, v_float64x4& s, v_float64x4& c) { v_sincos_default_64f<v_float64x4, v_int64x4>(x, s, c); }
3180+
inline v_float64x4 v_sin(const v_float64x4& x) { return v_sin_default_64f<v_float64x4, v_int64x4>(x); }
3181+
inline v_float64x4 v_cos(const v_float64x4& x) { return v_cos_default_64f<v_float64x4, v_int64x4>(x); }
31763182

31773183
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
31783184

modules/core/include/opencv2/core/hal/intrin_avx512.hpp

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3079,12 +3079,18 @@ inline int v_scan_forward(const v_float64x8& a) { return trailingZeros32(v_signm
30793079
inline void v512_cleanup() { _mm256_zeroall(); }
30803080

30813081
#include "intrin_math.hpp"
3082-
inline v_float32x16 v_exp(v_float32x16 x) { return v_exp_default_32f<v_float32x16, v_int32x16>(x); }
3083-
inline v_float32x16 v_log(v_float32x16 x) { return v_log_default_32f<v_float32x16, v_int32x16>(x); }
3084-
inline v_float32x16 v_erf(v_float32x16 x) { return v_erf_default_32f<v_float32x16, v_int32x16>(x); }
3085-
3086-
inline v_float64x8 v_exp(v_float64x8 x) { return v_exp_default_64f<v_float64x8, v_int64x8>(x); }
3087-
inline v_float64x8 v_log(v_float64x8 x) { return v_log_default_64f<v_float64x8, v_int64x8>(x); }
3082+
inline v_float32x16 v_exp(const v_float32x16& x) { return v_exp_default_32f<v_float32x16, v_int32x16>(x); }
3083+
inline v_float32x16 v_log(const v_float32x16& x) { return v_log_default_32f<v_float32x16, v_int32x16>(x); }
3084+
inline void v_sincos(const v_float32x16& x, v_float32x16& s, v_float32x16& c) { v_sincos_default_32f<v_float32x16, v_int32x16>(x, s, c); }
3085+
inline v_float32x16 v_sin(const v_float32x16& x) { return v_sin_default_32f<v_float32x16, v_int32x16>(x); }
3086+
inline v_float32x16 v_cos(const v_float32x16& x) { return v_cos_default_32f<v_float32x16, v_int32x16>(x); }
3087+
inline v_float32x16 v_erf(const v_float32x16& x) { return v_erf_default_32f<v_float32x16, v_int32x16>(x); }
3088+
3089+
inline v_float64x8 v_exp(const v_float64x8& x) { return v_exp_default_64f<v_float64x8, v_int64x8>(x); }
3090+
inline v_float64x8 v_log(const v_float64x8& x) { return v_log_default_64f<v_float64x8, v_int64x8>(x); }
3091+
inline void v_sincos(const v_float64x8& x, v_float64x8& s, v_float64x8& c) { v_sincos_default_64f<v_float64x8, v_int64x8>(x, s, c); }
3092+
inline v_float64x8 v_sin(const v_float64x8& x) { return v_sin_default_64f<v_float64x8, v_int64x8>(x); }
3093+
inline v_float64x8 v_cos(const v_float64x8& x) { return v_cos_default_64f<v_float64x8, v_int64x8>(x); }
30883094

30893095
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
30903096

modules/core/include/opencv2/core/hal/intrin_cpp.hpp

Lines changed: 35 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -264,7 +264,7 @@ Most of these operations return only one value.
264264
### Other math
265265
266266
- Some frequent operations: @ref v_sqrt, @ref v_invsqrt, @ref v_magnitude, @ref v_sqr_magnitude, @ref v_exp, @ref v_log,
267-
@ref v_erf
267+
@ref v_erf, @ref v_sin, @ref v_cos
268268
- Absolute values: @ref v_abs, @ref v_absdiff, @ref v_absdiffs
269269
270270
### Conversions
@@ -366,6 +366,7 @@ Floating point:
366366
|broadcast_element | x | |
367367
|exp | x | x |
368368
|log | x | x |
369+
|sin, cos | x | x |
369370
370371
@{ */
371372

@@ -745,10 +746,41 @@ OPENCV_HAL_IMPL_MATH_FUNC(v_log, std::log, _Tp)
745746
*/
746747
OPENCV_HAL_IMPL_MATH_FUNC(v_erf, std::erf, _Tp)
747748

748-
//! @cond IGNORED
749+
/**
750+
* @brief Compute sine \f$ sin(x) \f$ and cosine \f$ cos(x) \f$ of elements at the same time
751+
*
752+
* Only for floating point types. Core implementation steps:
753+
* 1. Input Normalization: Scale the periodicity from 2π to 4 and reduce the angle to the range \f$ [0, \frac{\pi}{4}] \f$ using periodicity and trigonometric identities.
754+
* 2. Polynomial Approximation for \f$ sin(x) \f$ and \f$ cos(x) \f$:
755+
* - For float16 and float32, use a Taylor series with 4 terms for sine and 5 terms for cosine.
756+
* - For float64, use a Taylor series with 7 terms for sine and 8 terms for cosine.
757+
* 3. Select Results: select and convert the final sine and cosine values for the original input angle.
758+
*
759+
* @note The precision of the calculation depends on the implementation and the data type of the input vector.
760+
*/
761+
template<typename _Tp, int n>
762+
inline void v_sincos(const v_reg<_Tp, n>& x, v_reg<_Tp, n>& s, v_reg<_Tp, n>& c)
763+
{
764+
for( int i = 0; i < n; i++ )
765+
{
766+
s.s[i] = std::sin(x.s[i]);
767+
c.s[i] = std::cos(x.s[i]);
768+
}
769+
}
770+
771+
/**
772+
* @brief Sine \f$ sin(x) \f$ of elements
773+
*
774+
* Only for floating point types. Core implementation the same as @ref v_sincos.
775+
*/
749776
OPENCV_HAL_IMPL_MATH_FUNC(v_sin, std::sin, _Tp)
777+
778+
/**
779+
* @brief Cosine \f$ cos(x) \f$ of elements
780+
*
781+
* Only for floating point types. Core implementation the same as @ref v_sincos.
782+
*/
750783
OPENCV_HAL_IMPL_MATH_FUNC(v_cos, std::cos, _Tp)
751-
//! @endcond
752784

753785
/** @brief Absolute value of elements
754786

modules/core/include/opencv2/core/hal/intrin_lasx.hpp

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -3014,12 +3014,18 @@ inline void v_pack_store(hfloat* ptr, const v_float32x8& a)
30143014
inline void v256_cleanup() {}
30153015

30163016
#include "intrin_math.hpp"
3017-
inline v_float32x8 v_exp(v_float32x8 x) { return v_exp_default_32f<v_float32x8, v_int32x8>(x); }
3018-
inline v_float32x8 v_log(v_float32x8 x) { return v_log_default_32f<v_float32x8, v_int32x8>(x); }
3019-
inline v_float32x8 v_erf(v_float32x8 x) { return v_erf_default_32f<v_float32x8, v_int32x8>(x); }
3020-
3021-
inline v_float64x4 v_exp(v_float64x4 x) { return v_exp_default_64f<v_float64x4, v_int64x4>(x); }
3022-
inline v_float64x4 v_log(v_float64x4 x) { return v_log_default_64f<v_float64x4, v_int64x4>(x); }
3017+
inline v_float32x8 v_exp(const v_float32x8& x) { return v_exp_default_32f<v_float32x8, v_int32x8>(x); }
3018+
inline v_float32x8 v_log(const v_float32x8& x) { return v_log_default_32f<v_float32x8, v_int32x8>(x); }
3019+
inline void v_sincos(const v_float32x8& x, v_float32x8& s, v_float32x8& c) { v_sincos_default_32f<v_float32x8, v_int32x8>(x, s, c); }
3020+
inline v_float32x8 v_sin(const v_float32x8& x) { return v_sin_default_32f<v_float32x8, v_int32x8>(x); }
3021+
inline v_float32x8 v_cos(const v_float32x8& x) { return v_cos_default_32f<v_float32x8, v_int32x8>(x); }
3022+
inline v_float32x8 v_erf(const v_float32x8& x) { return v_erf_default_32f<v_float32x8, v_int32x8>(x); }
3023+
3024+
inline v_float64x4 v_exp(const v_float64x4& x) { return v_exp_default_64f<v_float64x4, v_int64x4>(x); }
3025+
inline v_float64x4 v_log(const v_float64x4& x) { return v_log_default_64f<v_float64x4, v_int64x4>(x); }
3026+
inline void v_sincos(const v_float64x4& x, v_float64x4& s, v_float64x4& c) { v_sincos_default_64f<v_float64x4, v_int64x4>(x, s, c); }
3027+
inline v_float64x4 v_sin(const v_float64x4& x) { return v_sin_default_64f<v_float64x4, v_int64x4>(x); }
3028+
inline v_float64x4 v_cos(const v_float64x4& x) { return v_cos_default_64f<v_float64x4, v_int64x4>(x); }
30233029

30243030
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
30253031

modules/core/include/opencv2/core/hal/intrin_lsx.hpp

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2524,12 +2524,18 @@ inline void v_pack_store(hfloat* ptr, const v_float32x4& a)
25242524
inline void v_cleanup() {}
25252525

25262526
#include "intrin_math.hpp"
2527-
inline v_float32x4 v_exp(v_float32x4 x) { return v_exp_default_32f<v_float32x4, v_int32x4>(x); }
2528-
inline v_float32x4 v_log(v_float32x4 x) { return v_log_default_32f<v_float32x4, v_int32x4>(x); }
2529-
inline v_float32x4 v_erf(v_float32x4 x) { return v_erf_default_32f<v_float32x4, v_int32x4>(x); }
2530-
2531-
inline v_float64x2 v_exp(v_float64x2 x) { return v_exp_default_64f<v_float64x2, v_int64x2>(x); }
2532-
inline v_float64x2 v_log(v_float64x2 x) { return v_log_default_64f<v_float64x2, v_int64x2>(x); }
2527+
inline v_float32x4 v_exp(const v_float32x4& x) { return v_exp_default_32f<v_float32x4, v_int32x4>(x); }
2528+
inline v_float32x4 v_log(const v_float32x4& x) { return v_log_default_32f<v_float32x4, v_int32x4>(x); }
2529+
inline void v_sincos(const v_float32x4& x, v_float32x4& s, v_float32x4& c) { v_sincos_default_32f<v_float32x4, v_int32x4>(x, s, c); }
2530+
inline v_float32x4 v_sin(const v_float32x4& x) { return v_sin_default_32f<v_float32x4, v_int32x4>(x); }
2531+
inline v_float32x4 v_cos(const v_float32x4& x) { return v_cos_default_32f<v_float32x4, v_int32x4>(x); }
2532+
inline v_float32x4 v_erf(const v_float32x4& x) { return v_erf_default_32f<v_float32x4, v_int32x4>(x); }
2533+
2534+
inline v_float64x2 v_exp(const v_float64x2& x) { return v_exp_default_64f<v_float64x2, v_int64x2>(x); }
2535+
inline v_float64x2 v_log(const v_float64x2& x) { return v_log_default_64f<v_float64x2, v_int64x2>(x); }
2536+
inline void v_sincos(const v_float64x2& x, v_float64x2& s, v_float64x2& c) { v_sincos_default_64f<v_float64x2, v_int64x2>(x, s, c); }
2537+
inline v_float64x2 v_sin(const v_float64x2& x) { return v_sin_default_64f<v_float64x2, v_int64x2>(x); }
2538+
inline v_float64x2 v_cos(const v_float64x2& x) { return v_cos_default_64f<v_float64x2, v_int64x2>(x); }
25332539

25342540
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
25352541

0 commit comments

Comments
 (0)