diff --git a/src/avx512-16bit-common.h b/src/avx512-16bit-common.h index 524ce7a..e1a76d3 100644 --- a/src/avx512-16bit-common.h +++ b/src/avx512-16bit-common.h @@ -14,11 +14,11 @@ struct avx512_16bit_swizzle_ops { __m512i v = vtype::cast_to(reg); if constexpr (scale == 2) { - std::vector arr + constexpr static uint16_t arr[] = {1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14, 17, 16, 19, 18, 21, 20, 23, 22, 25, 24, 27, 26, 29, 28, 31, 30}; - __m512i mask = _mm512_loadu_si512(arr.data()); + __m512i mask = _mm512_loadu_si512(arr); v = _mm512_permutexvar_epi16(mask, v); } else if constexpr (scale == 4) { @@ -48,27 +48,27 @@ struct avx512_16bit_swizzle_ops { if constexpr (scale == 2) { return swap_n(reg); } else if constexpr (scale == 4) { - std::vector arr + constexpr static uint16_t arr[] = {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12, 19, 18, 17, 16, 23, 22, 21, 20, 27, 26, 25, 24, 31, 30, 29, 28}; - __m512i mask = _mm512_loadu_si512(arr.data()); + __m512i mask = _mm512_loadu_si512(arr); v = _mm512_permutexvar_epi16(mask, v); } else if constexpr (scale == 8) { - std::vector arr + constexpr static int16_t arr[] = {7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8, 23, 22, 21, 20, 19, 18, 17, 16, 31, 30, 29, 28, 27, 26, 25, 24}; - __m512i mask = _mm512_loadu_si512(arr.data()); + __m512i mask = _mm512_loadu_si512(arr); v = _mm512_permutexvar_epi16(mask, v); } else if constexpr (scale == 16) { - std::vector arr + constexpr static uint16_t arr[] = {15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16}; - __m512i mask = _mm512_loadu_si512(arr.data()); + __m512i mask = _mm512_loadu_si512(arr); v = _mm512_permutexvar_epi16(mask, v); } else if constexpr (scale == 32) {