Skip to content

Commit 090eae4

Browse files
ikawrakowIwan Kawrakow
andauthored
Fix build for Xeon Gold 6226R (#390)
Co-authored-by: Iwan Kawrakow <[email protected]>
1 parent 6c23618 commit 090eae4

File tree

1 file changed

+8
-8
lines changed

1 file changed

+8
-8
lines changed

ggml/src/iqk/iqk_mul_mat.cpp

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1389,7 +1389,7 @@ static const uint32_t iq1s_grid_us[2048] = {
13891389
};
13901390
#endif
13911391

1392-
#ifndef HAVE_FANCY_SIMD
1392+
#if !(defined HAVE_FANCY_SIMD && defined __AVX512VPOPCNTDQ__)
13931393
const uint64_t keven_signs[128] = {
13941394
0x0101010101010101, 0xff010101010101ff, 0xff0101010101ff01, 0x010101010101ffff,
13951395
0xff01010101ff0101, 0x0101010101ff01ff, 0x0101010101ffff01, 0xff01010101ffffff,
@@ -7574,7 +7574,7 @@ struct DequantizerIQ1BN {
75747574
_mm256_set_epi64x(0x0300010003000900, 0x1b00510001000300, 0x09001b0051000100, 0x030009001b005100),
75757575
};
75767576
const __m256i m3 = _mm256_set1_epi16(3);
7577-
#ifdef HAVE_FANCY_SIMD
7577+
#if defined HAVE_FANCY_SIMD && defined __AVX512VBMI__
75787578
const __m256i bmask = _mm256_set_epi8(62, 60, 58, 56, 54, 52, 50, 48, 46, 44, 42, 40, 38, 36, 34, 32, 30, 28, 26, 24, 22, 20, 18, 16, 14, 12, 10, 8, 6, 4, 2, 0);
75797579
#endif
75807580

@@ -7585,7 +7585,7 @@ struct DequantizerIQ1BN {
75857585
auto val2 = _mm256_mulhi_epu16(_mm256_mullo_epi16(_mm256_shuffle_epi8(data, shuff[1]), mult[1]), m3);
75867586
auto val3 = _mm256_mulhi_epu16(_mm256_mullo_epi16(_mm256_shuffle_epi8(data, shuff[2]), mult[2]), m3);
75877587
auto val4 = _mm256_mulhi_epu16(_mm256_mullo_epi16(_mm256_shuffle_epi8(data, shuff[3]), mult[3]), m3);
7588-
#ifdef HAVE_FANCY_SIMD
7588+
#if defined HAVE_FANCY_SIMD && defined __AVX512VBMI__
75897589
v1 = _mm256_permutex2var_epi8(val1, bmask, val2);
75907590
v2 = _mm256_permutex2var_epi8(val3, bmask, val4);
75917591
#else
@@ -7866,7 +7866,7 @@ struct DequantizerIQ3S final : public BaseDequantizer<block_iq3_s> {
78667866
};
78677867

78687868
struct EvenSignHelper {
7869-
#ifdef HAVE_FANCY_SIMD
7869+
#if defined HAVE_FANCY_SIMD && defined __AVX512VPOPCNTDQ__
78707870
union sbits_t {
78717871
__m128i vec;
78727872
__mmask32 mask[4];
@@ -7931,7 +7931,7 @@ struct DequantizerIQ3XXS final : public BaseDequantizer<block_iq3_xxs> {
79317931
}
79327932

79337933
IQK_ALWAYS_INLINE void sign_2_values(const uint16_t * signs, __m256i * values) const {
7934-
#ifdef HAVE_FANCY_SIMD
7934+
#if defined HAVE_FANCY_SIMD && defined __AVX512VPOPCNTDQ__
79357935
esh.sign_2_values(MM256_SET_M128I(_mm_set1_epi32(signs[2] | (signs[3] << 16)), _mm_set1_epi32(signs[0] | (signs[1] << 16))), values);
79367936
#else
79377937
esh.sign_value(signs[0] | (signs[1] << 16), values[0]);
@@ -8106,7 +8106,7 @@ struct DequantizerIQ2XS final : public BaseDequantizer<block_iq2_xs> {
81068106
value = _mm256_sign_epi8(value, _mm256_or_si256(signs, mone));
81078107
}
81088108
inline void sign_values(const __m256i& data, __m256i * values) const {
8109-
#ifdef HAVE_FANCY_SIMD
8109+
#if defined HAVE_FANCY_SIMD && defined __AVX512VPOPCNTDQ__
81108110
auto partial_bits = _mm256_cvtepi16_epi8(_mm256_srli_epi16(data, 9));
81118111
auto pcnt = _mm_popcnt_epi8(partial_bits);
81128112
auto full_bits = _mm_or_si128(partial_bits, _mm_slli_epi16(_mm_and_si128(pcnt, _mm_set1_epi8(1)), 7));
@@ -8156,7 +8156,7 @@ struct DequantizerIQ2XS final : public BaseDequantizer<block_iq2_xs> {
81568156
constexpr static int minv = 43;
81578157

81588158
SimpleBits bits;
8159-
#ifndef HAVE_FANCY_SIMD
8159+
#if !(defined HAVE_FANCY_SIMD && defined __AVX512VPOPCNTDQ__)
81608160
Helper helper;
81618161
#endif
81628162
const __m256i idx_mask = _mm256_set1_epi16(511);
@@ -8201,7 +8201,7 @@ struct DequantizerIQ2XXS final : public BaseDequantizer<block_iq2_xxs> {
82018201
}
82028202

82038203
IQK_ALWAYS_INLINE void sign_values(const uint32_t * aux32, __m256i * values) const {
8204-
#ifdef HAVE_FANCY_SIMD
8204+
#if defined HAVE_FANCY_SIMD && defined __AVX512VPOPCNTDQ__
82058205
esh.sign_2_values(MM256_SET_M128I(_mm_set1_epi32(aux32[3]), _mm_set1_epi32(aux32[1])), values+0);
82068206
esh.sign_2_values(MM256_SET_M128I(_mm_set1_epi32(aux32[7]), _mm_set1_epi32(aux32[5])), values+2);
82078207
#else

0 commit comments

Comments
 (0)