@@ -1389,7 +1389,7 @@ static const uint32_t iq1s_grid_us[2048] = {
13891389};
13901390#endif
13911391
1392- #ifndef HAVE_FANCY_SIMD
1392+ #if !(defined HAVE_FANCY_SIMD && defined __AVX512VPOPCNTDQ__)
13931393const uint64_t keven_signs[128] = {
13941394 0x0101010101010101, 0xff010101010101ff, 0xff0101010101ff01, 0x010101010101ffff,
13951395 0xff01010101ff0101, 0x0101010101ff01ff, 0x0101010101ffff01, 0xff01010101ffffff,
@@ -7574,7 +7574,7 @@ struct DequantizerIQ1BN {
75747574 _mm256_set_epi64x(0x0300010003000900, 0x1b00510001000300, 0x09001b0051000100, 0x030009001b005100),
75757575 };
75767576 const __m256i m3 = _mm256_set1_epi16(3);
7577- #ifdef HAVE_FANCY_SIMD
7577+ #if defined HAVE_FANCY_SIMD && defined __AVX512VBMI__
75787578 const __m256i bmask = _mm256_set_epi8(62, 60, 58, 56, 54, 52, 50, 48, 46, 44, 42, 40, 38, 36, 34, 32, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0);
75797579#endif
75807580
@@ -7585,7 +7585,7 @@ struct DequantizerIQ1BN {
75857585 auto val2 = _mm256_mulhi_epu16(_mm256_mullo_epi16(_mm256_shuffle_epi8(data, shuff[1]), mult[1]), m3);
75867586 auto val3 = _mm256_mulhi_epu16(_mm256_mullo_epi16(_mm256_shuffle_epi8(data, shuff[2]), mult[2]), m3);
75877587 auto val4 = _mm256_mulhi_epu16(_mm256_mullo_epi16(_mm256_shuffle_epi8(data, shuff[3]), mult[3]), m3);
7588- #ifdef HAVE_FANCY_SIMD
7588+ #if defined HAVE_FANCY_SIMD && defined __AVX512VBMI__
75897589 v1 = _mm256_permutex2var_epi8(val1, bmask, val2);
75907590 v2 = _mm256_permutex2var_epi8(val3, bmask, val4);
75917591#else
@@ -7866,7 +7866,7 @@ struct DequantizerIQ3S final : public BaseDequantizer<block_iq3_s> {
78667866};
78677867
78687868struct EvenSignHelper {
7869- #ifdef HAVE_FANCY_SIMD
7869+ #if defined HAVE_FANCY_SIMD && defined __AVX512VPOPCNTDQ__
78707870 union sbits_t {
78717871 __m128i vec;
78727872 __mmask32 mask[4];
@@ -7931,7 +7931,7 @@ struct DequantizerIQ3XXS final : public BaseDequantizer<block_iq3_xxs> {
79317931 }
79327932
79337933 IQK_ALWAYS_INLINE void sign_2_values(const uint16_t * signs, __m256i * values) const {
7934- #ifdef HAVE_FANCY_SIMD
7934+ #if defined HAVE_FANCY_SIMD && defined __AVX512VPOPCNTDQ__
79357935 esh.sign_2_values(MM256_SET_M128I(_mm_set1_epi32(signs[2] | (signs[3] << 16)), _mm_set1_epi32(signs[0] | (signs[1] << 16))), values);
79367936#else
79377937 esh.sign_value(signs[0] | (signs[1] << 16), values[0]);
@@ -8106,7 +8106,7 @@ struct DequantizerIQ2XS final : public BaseDequantizer<block_iq2_xs> {
81068106 value = _mm256_sign_epi8(value, _mm256_or_si256(signs, mone));
81078107 }
81088108 inline void sign_values(const __m256i& data, __m256i * values) const {
8109- #ifdef HAVE_FANCY_SIMD
8109+ #if defined HAVE_FANCY_SIMD && defined __AVX512VPOPCNTDQ__
81108110 auto partial_bits = _mm256_cvtepi16_epi8(_mm256_srli_epi16(data, 9));
81118111 auto pcnt = _mm_popcnt_epi8(partial_bits);
81128112 auto full_bits = _mm_or_si128(partial_bits, _mm_slli_epi16(_mm_and_si128(pcnt, _mm_set1_epi8(1)), 7));
@@ -8156,7 +8156,7 @@ struct DequantizerIQ2XS final : public BaseDequantizer<block_iq2_xs> {
81568156 constexpr static int minv = 43;
81578157
81588158 SimpleBits bits;
8159- #ifndef HAVE_FANCY_SIMD
8159+ #if !(defined HAVE_FANCY_SIMD && defined __AVX512VPOPCNTDQ__)
81608160 Helper helper;
81618161#endif
81628162 const __m256i idx_mask = _mm256_set1_epi16(511);
@@ -8201,7 +8201,7 @@ struct DequantizerIQ2XXS final : public BaseDequantizer<block_iq2_xxs> {
82018201 }
82028202
82038203 IQK_ALWAYS_INLINE void sign_values(const uint32_t * aux32, __m256i * values) const {
8204- #ifdef HAVE_FANCY_SIMD
8204+ #if defined HAVE_FANCY_SIMD && defined __AVX512VPOPCNTDQ__
82058205 esh.sign_2_values(MM256_SET_M128I(_mm_set1_epi32(aux32[3]), _mm_set1_epi32(aux32[1])), values+0);
82068206 esh.sign_2_values(MM256_SET_M128I(_mm_set1_epi32(aux32[7]), _mm_set1_epi32(aux32[5])), values+2);
82078207#else
0 commit comments