@@ -174,4 +174,32 @@ NPY_FINLINE npyv_s64 npyv_min_s64(npyv_s64 a, npyv_s64 b)
174174 }
175175#endif
176176
177+ // trunc
178+ #ifdef NPY_HAVE_SSE41
179+ #define npyv_trunc_f32 (A ) _mm_round_ps(A, _MM_FROUND_TO_ZERO)
180+ #define npyv_trunc_f64 (A ) _mm_round_pd(A, _MM_FROUND_TO_ZERO)
181+ #else
182+ NPY_FINLINE npyv_f32 npyv_trunc_f32 (npyv_f32 a )
183+ {
184+ const npyv_f32 szero = _mm_set1_ps (-0.0f );
185+ npyv_s32 roundi = _mm_cvttps_epi32 (a );
186+ npyv_f32 trunc = _mm_cvtepi32_ps (roundi );
187+ // respect signed zero, e.g. -0.5 -> -0.0
188+ npyv_f32 rzero = _mm_or_ps (trunc , _mm_and_ps (a , szero ));
189+ // if overflow return a
190+ return npyv_select_f32 (_mm_cmpeq_epi32 (roundi , _mm_castps_si128 (szero )), a , rzero );
191+ }
192+ NPY_FINLINE npyv_f64 npyv_trunc_f64 (npyv_f64 a )
193+ {
194+ const npyv_f64 szero = _mm_set1_pd (-0.0 );
195+ const npyv_f64 one = _mm_set1_pd (1.0 );
196+ const npyv_f64 two_power_52 = _mm_set1_pd (0x10000000000000 );
197+ npyv_f64 abs_a = npyv_abs_f64 (a );
198+ // round by add magic number 2^52
199+ npyv_f64 abs_round = _mm_sub_pd (_mm_add_pd (abs_a , two_power_52 ), two_power_52 );
200+ npyv_f64 subtrahend = _mm_and_pd (_mm_cmpgt_pd (abs_round , abs_a ), one );
201+ return _mm_or_pd (_mm_sub_pd (abs_round , subtrahend ), _mm_and_pd (a , szero ));
202+ }
203+ #endif
204+
177205#endif // _NPY_SIMD_SSE_MATH_H
0 commit comments