@@ -684,13 +684,13 @@ static __global__ void dequantize_block_iq4_xs(const void * __restrict__ vx, dst
684684}
685685
686686template <typename dst_t >
687- static __global__ void dequantize_block_iq3_ks (const void * __restrict__ vx, dst_t * __restrict__ yy, int64_t n_per_row, int64_t row_size) {
687+ static __global__ void dequantize_block_iq3_ks_v1 (const void * __restrict__ vx, dst_t * __restrict__ yy, int64_t n_per_row, int64_t row_size) {
688688
689689 int64_t ii = blockIdx .x ;
690690 int64_t row = (QK_K * ii) / n_per_row;
691691 const char * cx = (const char *)vx + row * row_size;
692692 float scale = *(const float *)cx;
693- const block_iq3_ks * x = (const block_iq3_ks *)(cx + sizeof (float ));
693+ const block_iq3_ks_v1 * x = (const block_iq3_ks_v1 *)(cx + sizeof (float ));
694694 const int64_t i = ii - (row*n_per_row)/QK_K;
695695
696696 const int tid = threadIdx .x ;
@@ -1616,11 +1616,11 @@ static void dequantize_row_iq2_ks_cuda(const void * vx, dst_t * y, const int64_t
16161616}
16171617
16181618template <typename dst_t >
1619- static void dequantize_row_iq3_ks_cuda (const void * vx, dst_t * y, const int64_t nrows, const int64_t n_per_row, cudaStream_t stream) {
1619+ static void dequantize_row_iq3_ks_v1_cuda (const void * vx, dst_t * y, const int64_t nrows, const int64_t n_per_row, cudaStream_t stream) {
16201620 const int64_t k = nrows * n_per_row;
1621- const int64_t row_size = ggml_row_size (GGML_TYPE_IQ3_KS , n_per_row);
1621+ const int64_t row_size = ggml_row_size (GGML_TYPE_IQ3_KS_V1 , n_per_row);
16221622 const int nb = (k + QK_K - 1 ) / QK_K;
1623- dequantize_block_iq3_ks <<<nb, 32 , 0 , stream>>> (vx, y, n_per_row, row_size);
1623+ dequantize_block_iq3_ks_v1 <<<nb, 32 , 0 , stream>>> (vx, y, n_per_row, row_size);
16241624}
16251625
16261626template <typename dst_t >
@@ -1818,8 +1818,8 @@ to_bf16_cuda_t ggml_get_to_bf16_cuda(ggml_type type) {
18181818 return dequantize_row_iq2_k_cuda<nv_bfloat16>;
18191819 case GGML_TYPE_IQ3_K:
18201820 return dequantize_row_iq3_k_cuda<nv_bfloat16>;
1821- case GGML_TYPE_IQ3_KS :
1822- return dequantize_row_iq3_ks_cuda <nv_bfloat16>;
1821+ case GGML_TYPE_IQ3_KS_V1 :
1822+ return dequantize_row_iq3_ks_v1_cuda <nv_bfloat16>;
18231823 case GGML_TYPE_IQ4_KSS:
18241824 return dequantize_row_iq4_kss_cuda<nv_bfloat16>;
18251825 case GGML_TYPE_IQ4_KS:
@@ -1914,8 +1914,8 @@ to_fp16_cuda_t ggml_get_to_fp16_cuda(ggml_type type) {
19141914 return dequantize_row_iq4_xs_cuda;
19151915 case GGML_TYPE_IQ2_KS:
19161916 return dequantize_row_iq2_ks_cuda;
1917- case GGML_TYPE_IQ3_KS :
1918- return dequantize_row_iq3_ks_cuda ;
1917+ case GGML_TYPE_IQ3_KS_V1 :
1918+ return dequantize_row_iq3_ks_v1_cuda ;
19191919 case GGML_TYPE_IQ2_K:
19201920 return dequantize_row_iq2_k_cuda;
19211921 case GGML_TYPE_IQ3_K:
@@ -2018,8 +2018,8 @@ to_fp32_cuda_t ggml_get_to_fp32_cuda(ggml_type type) {
20182018 return dequantize_row_iq4_kss_cuda;
20192019 case GGML_TYPE_IQ2_KS:
20202020 return dequantize_row_iq2_ks_cuda;
2021- case GGML_TYPE_IQ3_KS :
2022- return dequantize_row_iq3_ks_cuda ;
2021+ case GGML_TYPE_IQ3_KS_V1 :
2022+ return dequantize_row_iq3_ks_v1_cuda ;
20232023 case GGML_TYPE_IQ2_K:
20242024 return dequantize_row_iq2_k_cuda;
20252025 case GGML_TYPE_IQ3_K:
0 commit comments