Skip to content

Commit 4149a37

Browse files
howjmayseiko2plus
authored andcommitted
ENH: Add conversion for 512bit intrin to 256bit intrin
512-bits intrinsics sometimes not exist in some operations. We can construct the 512-bits intrinsics with two 256-bits intrinsics. Therfore, NPYV_IMPL_AVX512_FROM_AVX2_PS_1ARG and NPYV_IMPL_AVX512_FROM_AVX2_PD_1ARG are added. And the error in npyv512_combine_ps256 has been fixed too.
1 parent fa397e9 commit 4149a37

File tree

1 file changed

+21
-1
lines changed
  • numpy/core/src/common/simd/avx512

1 file changed

+21
-1
lines changed

numpy/core/src/common/simd/avx512/utils.h

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@
2626
#define npyv512_combine_ps256(A, B) _mm512_insertf32x8(_mm512_castps256_ps512(A), B, 1)
2727
#else
2828
#define npyv512_combine_ps256(A, B) \
29-
_mm512_castsi512_ps(npyv512_combine_si256(_mm512_castps_si512(A), _mm512_castps_si512(B)))
29+
_mm512_castsi512_ps(npyv512_combine_si256(_mm256_castps_si256(A), _mm256_castps_si256(B)))
3030
#endif
3131

3232
#define NPYV_IMPL_AVX512_FROM_AVX2_1ARG(FN_NAME, INTRIN) \
@@ -39,6 +39,26 @@
3939
return npyv512_combine_si256(l_a, h_a); \
4040
}
4141

42+
#define NPYV_IMPL_AVX512_FROM_AVX2_PS_1ARG(FN_NAME, INTRIN) \
43+
NPY_FINLINE __m512 FN_NAME(__m512 a) \
44+
{ \
45+
__m256 l_a = npyv512_lower_ps256(a); \
46+
__m256 h_a = npyv512_higher_ps256(a); \
47+
l_a = INTRIN(l_a); \
48+
h_a = INTRIN(h_a); \
49+
return npyv512_combine_ps256(l_a, h_a); \
50+
}
51+
52+
#define NPYV_IMPL_AVX512_FROM_AVX2_PD_1ARG(FN_NAME, INTRIN) \
53+
NPY_FINLINE __m512d FN_NAME(__m512d a) \
54+
{ \
55+
__m256d l_a = npyv512_lower_pd256(a); \
56+
__m256d h_a = npyv512_higher_pd256(a); \
57+
l_a = INTRIN(l_a); \
58+
h_a = INTRIN(h_a); \
59+
return npyv512_combine_pd256(l_a, h_a); \
60+
}
61+
4262
#define NPYV_IMPL_AVX512_FROM_AVX2_2ARG(FN_NAME, INTRIN) \
4363
NPY_FINLINE __m512i FN_NAME(__m512i a, __m512i b) \
4464
{ \

0 commit comments

Comments
 (0)