@@ -87,7 +87,7 @@ NPY_FINLINE npyv_f32 npyv_loadn_f32(const float *ptr, npy_intp stride)
8787#if 0 // slower
8888NPY_FINLINE npyv_u64 npyv_loadn_u64 (const npy_uint64 * ptr , npy_intp stride )
8989{
90- const __m256i idx = _mm256_setr_epi64x (0 , 1 * stride , 2 * stride , 3 * stride );
90+ const __m256i idx = npyv_set_s64 (0 , 1 * stride , 2 * stride , 3 * stride );
9191 return _mm256_i64gather_epi64 ((const void * )ptr , idx , 8 );
9292}
9393NPY_FINLINE npyv_s64 npyv_loadn_s64 (const npy_int64 * ptr , npy_intp stride )
@@ -212,8 +212,8 @@ npyv_loadn_till_s64(const npy_int64 *ptr, npy_intp stride, npy_uintp nlane, npy_
212212{
213213 assert (nlane > 0 );
214214 const __m256i vfill = npyv_setall_s64 (fill );
215- const __m256i idx = _mm256_setr_epi64x (0 , 1 * stride , 2 * stride , 3 * stride );
216- const __m256i steps = _mm256_setr_epi64x (0 , 1 , 2 , 3 );
215+ const __m256i idx = npyv_set_s64 (0 , 1 * stride , 2 * stride , 3 * stride );
216+ const __m256i steps = npyv_set_s64 (0 , 1 , 2 , 3 );
217217 __m256i vnlane = npyv_setall_s64 (nlane > 4 ? 4 : (int )nlane );
218218 __m256i mask = _mm256_cmpgt_epi64 (vnlane , steps );
219219 return _mm256_mask_i64gather_epi64 (vfill , (const void * )ptr , idx , mask , 8 );
@@ -238,7 +238,7 @@ NPY_FINLINE void npyv_store_till_s32(npy_int32 *ptr, npy_uintp nlane, npyv_s32 a
238238NPY_FINLINE void npyv_store_till_s64 (npy_int64 * ptr , npy_uintp nlane , npyv_s64 a )
239239{
240240 assert (nlane > 0 );
241- const __m256i steps = _mm256_setr_epi64x (0 , 1 , 2 , 3 );
241+ const __m256i steps = npyv_set_s64 (0 , 1 , 2 , 3 );
242242 __m256i vnlane = npyv_setall_s64 (nlane > 8 ? 8 : (int )nlane );
243243 __m256i mask = _mm256_cmpgt_epi64 (vnlane , steps );
244244 _mm256_maskstore_epi64 ((void * )ptr , mask , a );
0 commit comments