Move kvalues_iq4nl definition to ggml-common.h

HungMingWu · HungMingWu · commit be09d983fed6 · 2025-02-10T15:27:09.000+08:00
Signed-off-by: HungMingWu &lt;u9089000@gmail.com&gt;
diff --git a/ggml/src/ggml-common.h b/ggml/src/ggml-common.h
@@ -1070,6 +1070,10 @@ GGML_TABLE_BEGIN(uint32_t, iq3s_grid, 512)
     0x0f090307, 0x0f090501, 0x0f090b01, 0x0f0b0505, 0x0f0b0905, 0x0f0d0105, 0x0f0d0703, 0x0f0f0101,
 GGML_TABLE_END()
 
+GGML_TABLE_BEGIN(int8_t, kvalues_iq4nl, 16)
+   -127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38,  53, 69, 89, 113,
+GGML_TABLE_END()
+
 #define NGRID_IQ1S 2048
 #define IQ1S_DELTA 0.125f
 #define IQ1M_DELTA 0.125f
diff --git a/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp b/ggml/src/ggml-cpu/ggml-cpu-aarch64.cpp
@@ -223,8 +223,6 @@ static inline __m256i mul_sum_i8_pairs_int32x8(const __m256i x, const __m256i y)
 }
 #endif
 
-static const int8_t kvalues_iq4nl[16] = {-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113};
-
 static void quantize_q8_0_4x4(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k) {
     assert(QK8_0 == 32);
     assert(k % QK8_0 == 0);
diff --git a/ggml/src/ggml-cpu/ggml-cpu-quants.c b/ggml/src/ggml-cpu/ggml-cpu-quants.c
@@ -1639,8 +1639,6 @@ void quantize_row_tq2_0(const float * restrict x, void * restrict vy, int64_t k)
     quantize_row_tq2_0_ref(x, y, k);
 }
 
-static const int8_t kvalues_iq4nl[16] = {-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113};
-
 //===================================== Q8_K ==============================================
 
 void quantize_row_q8_K(const float * restrict x, void * restrict y, int64_t k) {
diff --git a/ggml/src/ggml-cpu/llamafile/sgemm.cpp b/ggml/src/ggml-cpu/llamafile/sgemm.cpp
@@ -52,6 +52,8 @@
 #include "ggml-impl.h"
 #include "ggml-cpu-impl.h"
 #include "ggml-quants.h"
+#define GGML_COMMON_IMPL_CPP
+#include "ggml-common.h"
 
 #include <atomic>
 #include <array>
@@ -284,7 +286,6 @@ template <> inline __m256bh load(const float *p) {
 // CONSTANTS
 
 #if defined(__AVX__) || defined(__AVX2__) || defined(__AVX512F__)
-static const int8_t kvalues_iq4nl[16] = {-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113};
 static const __m128i iq4nlt = _mm_loadu_si128((const __m128i *) kvalues_iq4nl);
 #endif
 
diff --git a/ggml/src/ggml-cuda/common.cuh b/ggml/src/ggml-cuda/common.cuh
@@ -358,9 +358,6 @@ static __device__ __forceinline__ int ggml_cuda_dp4a(const int a, const int b, i
 #endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)
 }
 
-// TODO: move to ggml-common.h
-static constexpr __device__ int8_t kvalues_iq4nl[16] = {-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113};
-
 typedef void (*dequantize_kernel_t)(const void * vx, const int64_t ib, const int iqs, dfloat2 & v);
 
 static __device__ __forceinline__ float get_alibi_slope(
diff --git a/ggml/src/ggml-quants.c b/ggml/src/ggml-quants.c
@@ -2431,8 +2431,6 @@ void dequantize_row_iq1_m(const block_iq1_m * restrict x, float * restrict y, in
     }
 }
 
-static const int8_t kvalues_iq4nl[16] = {-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113};
-
 void dequantize_row_iq4_nl(const block_iq4_nl * restrict x, float * restrict y, int64_t k) {
     assert(k % QK4_NL == 0);
     const int64_t nb = k / QK4_NL;
diff --git a/ggml/src/ggml-sycl/common.hpp b/ggml/src/ggml-sycl/common.hpp
@@ -139,8 +139,6 @@ typedef sycl::float2 dfloat2;
 
 #define MMVQ_MAX_BATCH_SIZE  8
 
-static const int8_t kvalues_iq4nl[16]={-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113};
-
 static int g_all_sycl_device_count = -1;
 static bool g_ggml_backend_sycl_buffer_type_initialized = false;
 

Original file line number	Diff line number	Diff line change
`@@ -223,8 +223,6 @@ static inline __m256i mul_sum_i8_pairs_int32x8(const __m256i x, const __m256i y)`
`223`	`223`	`}`
`224`	`224`	`#endif`
`225`	`225`
`226`		`-static const int8_t kvalues_iq4nl[16] = {-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113};`
`227`		`-`
`228`	`226`	`static void quantize_q8_0_4x4(const float * GGML_RESTRICT x, void * GGML_RESTRICT vy, int64_t k) {`
`229`	`227`	`assert(QK8_0 == 32);`
`230`	`228`	`assert(k % QK8_0 == 0);`
Original file line number	Diff line number	Diff line change
`@@ -1639,8 +1639,6 @@ void quantize_row_tq2_0(const float * restrict x, void * restrict vy, int64_t k)`
`1639`	`1639`	`quantize_row_tq2_0_ref(x, y, k);`
`1640`	`1640`	`}`
`1641`	`1641`
`1642`		`-static const int8_t kvalues_iq4nl[16] = {-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113};`
`1643`		`-`
`1644`	`1642`	`//===================================== Q8_K ==============================================`
`1645`	`1643`
`1646`	`1644`	`void quantize_row_q8_K(const float * restrict x, void * restrict y, int64_t k) {`
Original file line number	Diff line number	Diff line change
`@@ -358,9 +358,6 @@ static __device__ __forceinline__ int ggml_cuda_dp4a(const int a, const int b, i`
`358`	`358`	`#endif // defined(GGML_USE_HIP) && defined(__HIP_PLATFORM_AMD__)`
`359`	`359`	`}`
`360`	`360`
`361`		`-// TODO: move to ggml-common.h`
`362`		`-static constexpr __device__ int8_t kvalues_iq4nl[16] = {-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113};`
`363`		`-`
`364`	`361`	`typedef void (dequantize_kernel_t)(const void vx, const int64_t ib, const int iqs, dfloat2 & v);`
`365`	`362`
`366`	`363`	`static __device__ __forceinline__ float get_alibi_slope(`
Original file line number	Diff line number	Diff line change
`@@ -2431,8 +2431,6 @@ void dequantize_row_iq1_m(const block_iq1_m * restrict x, float * restrict y, in`
`2431`	`2431`	`}`
`2432`	`2432`	`}`
`2433`	`2433`
`2434`		`-static const int8_t kvalues_iq4nl[16] = {-127, -104, -83, -65, -49, -35, -22, -10, 1, 13, 25, 38, 53, 69, 89, 113};`
`2435`		`-`
`2436`	`2434`	`void dequantize_row_iq4_nl(const block_iq4_nl * restrict x, float * restrict y, int64_t k) {`
`2437`	`2435`	`assert(k % QK4_NL == 0);`
`2438`	`2436`	`const int64_t nb = k / QK4_NL;`