Skip to content

Commit 47d2663

Browse files
committed
Rename IQ3_KS (original) into IQ3_KS_V1
1 parent e7fb001 commit 47d2663

File tree

22 files changed

+87
-87
lines changed

22 files changed

+87
-87
lines changed

ggml/include/ggml.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -425,7 +425,7 @@ extern "C" {
425425
GGML_TYPE_IQ3_KT = 154,
426426
GGML_TYPE_IQ4_KT = 155,
427427

428-
GGML_TYPE_IQ3_KS = 196,
428+
GGML_TYPE_IQ3_KS_V1 = 196,
429429

430430
GGML_TYPE_Q4_0_R8 = 202,
431431
GGML_TYPE_Q5_0_R4 = 206,
@@ -514,7 +514,7 @@ extern "C" {
514514
GGML_FTYPE_MOSTLY_IQ3_KT = 143, // except 1d tensors
515515
GGML_FTYPE_MOSTLY_IQ4_KT = 144, // except 1d tensors
516516

517-
GGML_FTYPE_MOSTLY_IQ3_KS = 185, // except 1d tensors
517+
GGML_FTYPE_MOSTLY_IQ3_KS_V1 = 185, // except 1d tensors
518518
//
519519
GGML_FTYPE_MOSTLY_Q4_0_R8 = 202, // except 1d tensors
520520
GGML_FTYPE_MOSTLY_Q8_0_R8 = 207, // except 1d tensors

ggml/src/ggml-common.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -762,8 +762,8 @@ typedef struct {
762762
uint8_t scales[QK_K/32];
763763
uint8_t qs[QK_K/4];
764764
uint8_t qh[QK_K/8];
765-
} block_iq3_ks;
766-
static_assert(sizeof(block_iq3_ks) == QK_K/32 + QK_K/4 + QK_K/8, "wrong iq3_ks block size/padding");
765+
} block_iq3_ks_v1;
766+
static_assert(sizeof(block_iq3_ks_v1) == QK_K/32 + QK_K/4 + QK_K/8, "wrong iq3_ks_v1 block size/padding");
767767

768768
typedef struct {
769769
ggml_half d;

ggml/src/ggml-cpu/ggml-cpu-quants.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,7 @@ void quantize_row_iq4_xs (const float * GGML_RESTRICT x, void * GGML_RESTRICT y,
5454
// void quantize_row_iq4_k (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
5555
// void quantize_row_iq5_k (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
5656
// void quantize_row_iq5_ks (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
57-
// void quantize_row_iq3_ks (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
57+
// void quantize_row_iq3_ks_v1 (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
5858
// void quantize_row_iq6_k (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
5959
// void quantize_row_iq2_kt (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
6060
// void quantize_row_iq3_kt (const float * GGML_RESTRICT x, void * GGML_RESTRICT y, int64_t k);
@@ -98,7 +98,7 @@ void ggml_vec_dot_iq3_s_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const
9898
// void vec_dot_iq4_k_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
9999
// void vec_dot_iq5_k_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
100100
// void vec_dot_iq5_ks_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
101-
// void vec_dot_iq3_ks_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
101+
// void vec_dot_iq3_ks_v1_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
102102
// void vec_dot_iq6_k_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
103103
// void vec_dot_iq2_kt_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);
104104
// void vec_dot_iq3_kt_q8_K (int n, float * GGML_RESTRICT s, size_t bs, const void * GGML_RESTRICT vx, size_t bx, const void * GGML_RESTRICT vy, size_t by, int nrc);

ggml/src/ggml-cpu/ggml-cpu.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -632,9 +632,9 @@ static const struct ggml_type_traits_cpu type_traits_cpu[GGML_TYPE_COUNT] = {
632632
.vec_dot_type = GGML_TYPE_Q8_K,
633633
.nrows = 1,
634634
},
635-
[GGML_TYPE_IQ3_KS] = {
636-
.from_float = quantize_row_iq3_ks,
637-
.vec_dot = vec_dot_iq3_ks_q8_k,
635+
[GGML_TYPE_IQ3_KS_V1] = {
636+
.from_float = quantize_row_iq3_ks_v1,
637+
.vec_dot = vec_dot_iq3_ks_v1_q8_k,
638638
.vec_dot_type = GGML_TYPE_Q8_K,
639639
.nrows = 1,
640640
},

ggml/src/ggml-cpu/ops.cpp

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1744,7 +1744,7 @@ void ggml_compute_forward_add(
17441744
case GGML_TYPE_Q6_K:
17451745
case GGML_TYPE_TQ1_0:
17461746
case GGML_TYPE_TQ2_0:
1747-
case GGML_TYPE_IQ3_KS:
1747+
case GGML_TYPE_IQ3_KS_V1:
17481748
case GGML_TYPE_Q6_K_R4:
17491749
case GGML_TYPE_Q8_K_R8:
17501750
case GGML_TYPE_Q8_KR8:
@@ -2226,7 +2226,7 @@ void ggml_compute_forward_add1(
22262226
case GGML_TYPE_Q6_K:
22272227
case GGML_TYPE_TQ1_0:
22282228
case GGML_TYPE_TQ2_0:
2229-
case GGML_TYPE_IQ3_KS:
2229+
case GGML_TYPE_IQ3_KS_V1:
22302230
case GGML_TYPE_Q6_K_R4:
22312231
case GGML_TYPE_Q8_K_R8:
22322232
case GGML_TYPE_Q8_KR8:
@@ -2405,7 +2405,7 @@ void ggml_compute_forward_acc(
24052405
case GGML_TYPE_Q6_K:
24062406
case GGML_TYPE_TQ1_0:
24072407
case GGML_TYPE_TQ2_0:
2408-
case GGML_TYPE_IQ3_KS:
2408+
case GGML_TYPE_IQ3_KS_V1:
24092409
case GGML_TYPE_Q6_K_R4:
24102410
case GGML_TYPE_Q8_K_R8:
24112411
case GGML_TYPE_Q8_KR8:
@@ -5113,7 +5113,7 @@ void ggml_compute_forward_out_prod(
51135113
case GGML_TYPE_Q6_K:
51145114
case GGML_TYPE_TQ1_0:
51155115
case GGML_TYPE_TQ2_0:
5116-
case GGML_TYPE_IQ3_KS:
5116+
case GGML_TYPE_IQ3_KS_V1:
51175117
case GGML_TYPE_Q6_K_R4:
51185118
case GGML_TYPE_Q8_K_R8:
51195119
case GGML_TYPE_Q8_KR8:
@@ -5621,7 +5621,7 @@ void ggml_compute_forward_set(
56215621
case GGML_TYPE_Q6_K:
56225622
case GGML_TYPE_TQ1_0:
56235623
case GGML_TYPE_TQ2_0:
5624-
case GGML_TYPE_IQ3_KS:
5624+
case GGML_TYPE_IQ3_KS_V1:
56255625
case GGML_TYPE_Q6_K_R4:
56265626
case GGML_TYPE_Q8_K_R8:
56275627
case GGML_TYPE_Q8_KR8:
@@ -5934,7 +5934,7 @@ void ggml_compute_forward_get_rows(
59345934
case GGML_TYPE_Q6_K:
59355935
case GGML_TYPE_TQ1_0:
59365936
case GGML_TYPE_TQ2_0:
5937-
case GGML_TYPE_IQ3_KS:
5937+
case GGML_TYPE_IQ3_KS_V1:
59385938
case GGML_TYPE_Q6_K_R4:
59395939
case GGML_TYPE_Q8_K_R8:
59405940
case GGML_TYPE_Q8_KR8:
@@ -6688,7 +6688,7 @@ void ggml_compute_forward_clamp(
66886688
case GGML_TYPE_Q6_K:
66896689
case GGML_TYPE_TQ1_0:
66906690
case GGML_TYPE_TQ2_0:
6691-
case GGML_TYPE_IQ3_KS:
6691+
case GGML_TYPE_IQ3_KS_V1:
66926692
case GGML_TYPE_Q6_K_R4:
66936693
case GGML_TYPE_Q8_K_R8:
66946694
case GGML_TYPE_Q8_KR8:

ggml/src/ggml-cuda/common.cuh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -730,7 +730,7 @@ struct ggml_cuda_type_traits<GGML_TYPE_IQ4_KSS> {
730730
};
731731

732732
template<>
733-
struct ggml_cuda_type_traits<GGML_TYPE_IQ3_KS> {
733+
struct ggml_cuda_type_traits<GGML_TYPE_IQ3_KS_V1> {
734734
static constexpr int qk = QK_K;
735735
static constexpr int qr = QR4_XS;
736736
static constexpr int qi = QI4_XS;

ggml/src/ggml-cuda/convert.cu

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -684,13 +684,13 @@ static __global__ void dequantize_block_iq4_xs(const void * __restrict__ vx, dst
684684
}
685685

686686
template<typename dst_t>
687-
static __global__ void dequantize_block_iq3_ks(const void * __restrict__ vx, dst_t * __restrict__ yy, int64_t n_per_row, int64_t row_size) {
687+
static __global__ void dequantize_block_iq3_ks_v1(const void * __restrict__ vx, dst_t * __restrict__ yy, int64_t n_per_row, int64_t row_size) {
688688

689689
int64_t ii = blockIdx.x;
690690
int64_t row = (QK_K * ii) / n_per_row;
691691
const char * cx = (const char *)vx + row * row_size;
692692
float scale = *(const float *)cx;
693-
const block_iq3_ks * x = (const block_iq3_ks *)(cx + sizeof(float));
693+
const block_iq3_ks_v1 * x = (const block_iq3_ks_v1 *)(cx + sizeof(float));
694694
const int64_t i = ii - (row*n_per_row)/QK_K;
695695

696696
const int tid = threadIdx.x;
@@ -1616,11 +1616,11 @@ static void dequantize_row_iq2_ks_cuda(const void * vx, dst_t * y, const int64_t
16161616
}
16171617

16181618
template<typename dst_t>
1619-
static void dequantize_row_iq3_ks_cuda(const void * vx, dst_t * y, const int64_t nrows, const int64_t n_per_row, cudaStream_t stream) {
1619+
static void dequantize_row_iq3_ks_v1_cuda(const void * vx, dst_t * y, const int64_t nrows, const int64_t n_per_row, cudaStream_t stream) {
16201620
const int64_t k = nrows * n_per_row;
1621-
const int64_t row_size = ggml_row_size(GGML_TYPE_IQ3_KS, n_per_row);
1621+
const int64_t row_size = ggml_row_size(GGML_TYPE_IQ3_KS_V1, n_per_row);
16221622
const int nb = (k + QK_K - 1) / QK_K;
1623-
dequantize_block_iq3_ks<<<nb, 32, 0, stream>>>(vx, y, n_per_row, row_size);
1623+
dequantize_block_iq3_ks_v1<<<nb, 32, 0, stream>>>(vx, y, n_per_row, row_size);
16241624
}
16251625

16261626
template<typename dst_t>
@@ -1818,8 +1818,8 @@ to_bf16_cuda_t ggml_get_to_bf16_cuda(ggml_type type) {
18181818
return dequantize_row_iq2_k_cuda<nv_bfloat16>;
18191819
case GGML_TYPE_IQ3_K:
18201820
return dequantize_row_iq3_k_cuda<nv_bfloat16>;
1821-
case GGML_TYPE_IQ3_KS:
1822-
return dequantize_row_iq3_ks_cuda<nv_bfloat16>;
1821+
case GGML_TYPE_IQ3_KS_V1:
1822+
return dequantize_row_iq3_ks_v1_cuda<nv_bfloat16>;
18231823
case GGML_TYPE_IQ4_KSS:
18241824
return dequantize_row_iq4_kss_cuda<nv_bfloat16>;
18251825
case GGML_TYPE_IQ4_KS:
@@ -1914,8 +1914,8 @@ to_fp16_cuda_t ggml_get_to_fp16_cuda(ggml_type type) {
19141914
return dequantize_row_iq4_xs_cuda;
19151915
case GGML_TYPE_IQ2_KS:
19161916
return dequantize_row_iq2_ks_cuda;
1917-
case GGML_TYPE_IQ3_KS:
1918-
return dequantize_row_iq3_ks_cuda;
1917+
case GGML_TYPE_IQ3_KS_V1:
1918+
return dequantize_row_iq3_ks_v1_cuda;
19191919
case GGML_TYPE_IQ2_K:
19201920
return dequantize_row_iq2_k_cuda;
19211921
case GGML_TYPE_IQ3_K:
@@ -2018,8 +2018,8 @@ to_fp32_cuda_t ggml_get_to_fp32_cuda(ggml_type type) {
20182018
return dequantize_row_iq4_kss_cuda;
20192019
case GGML_TYPE_IQ2_KS:
20202020
return dequantize_row_iq2_ks_cuda;
2021-
case GGML_TYPE_IQ3_KS:
2022-
return dequantize_row_iq3_ks_cuda;
2021+
case GGML_TYPE_IQ3_KS_V1:
2022+
return dequantize_row_iq3_ks_v1_cuda;
20232023
case GGML_TYPE_IQ2_K:
20242024
return dequantize_row_iq2_k_cuda;
20252025
case GGML_TYPE_IQ3_K:

ggml/src/ggml-cuda/ggml-cuda.cu

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3433,7 +3433,7 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
34333433
case GGML_TYPE_IQ4_KSS:
34343434
case GGML_TYPE_IQ2_K:
34353435
case GGML_TYPE_IQ2_KS:
3436-
case GGML_TYPE_IQ3_KS:
3436+
case GGML_TYPE_IQ3_KS_V1:
34373437
case GGML_TYPE_IQ2_KT:
34383438
case GGML_TYPE_IQ3_KT:
34393439
case GGML_TYPE_IQ4_KT:

ggml/src/ggml-cuda/iqk_mmvq.cu

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1105,11 +1105,11 @@ __device__ __forceinline__ void vec_dot_iq3_k_q8_1(
11051105
__low2float(bq8_1[4*ib128+3].ds) * aux8[3] * (sh & 0x40 ? -1 : 1) * sumi[3]);
11061106
}
11071107

1108-
__device__ __forceinline__ void vec_dot_iq3_ks_q8_1(
1108+
__device__ __forceinline__ void vec_dot_iq3_ks_v1_q8_1(
11091109
const void * __restrict__ vbq, const block_q8_1 * __restrict__ bq8_1, const int & kbx, const int & iiqs, float * result) {
11101110

11111111
const float d = *(const float *)vbq;
1112-
const block_iq3_ks * bq3 = (const block_iq3_ks *)((const char *)vbq + sizeof(float)) + kbx;
1112+
const block_iq3_ks_v1 * bq3 = (const block_iq3_ks_v1 *)((const char *)vbq + sizeof(float)) + kbx;
11131113

11141114
int iqs = iiqs/4;
11151115
const int ib128 = iqs/4; // 0 or 1. 0 works on quants 0...127, 1 on quants 128...255
@@ -1283,11 +1283,11 @@ void mul_mat_vec_iq3_k_q8_1_cuda(
12831283
iqk_mul_mat_vec_q_cuda<GGML_TYPE_IQ3_K, VDR_IQ3_K_Q8_1_MMVQ, vec_dot_iq3_k_q8_1>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y, nrows_dst, stream);
12841284
}
12851285

1286-
void mul_mat_vec_iq3_ks_q8_1_cuda(
1286+
void mul_mat_vec_iq3_ks_v1_q8_1_cuda(
12871287
const void * vx, const void * vy, float * dst,
12881288
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst, cudaStream_t stream) {
12891289

1290-
iqk_mul_mat_vec_q_cuda<GGML_TYPE_IQ3_KS, VDR_IQ3_K_Q8_1_MMVQ, vec_dot_iq3_ks_q8_1>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y, nrows_dst, stream);
1290+
iqk_mul_mat_vec_q_cuda<GGML_TYPE_IQ3_KS_V1, VDR_IQ3_K_Q8_1_MMVQ, vec_dot_iq3_ks_v1_q8_1>(vx, vy, dst, ncols_x, nrows_x, nrows_y, ncols_y, nrows_dst, stream);
12911291
}
12921292

12931293
void mul_mat_vec_iq4_k_q8_1_cuda(

ggml/src/ggml-cuda/iqk_mmvq.cuh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ void mul_mat_vec_iq2_ks_q8_1_cuda(
3232
const void * vx, const void * vy, float * dst,
3333
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst, cudaStream_t stream);
3434

35-
void mul_mat_vec_iq3_ks_q8_1_cuda(
35+
void mul_mat_vec_iq3_ks_v1_q8_1_cuda(
3636
const void * vx, const void * vy, float * dst,
3737
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst, cudaStream_t stream);
3838

0 commit comments

Comments
 (0)