Skip to content

Commit 135132e

Browse files
committed
Various fixes
Add IQ3_KS to sd Fixing a few mismerges Update requirements.txt Update ggml-cuda.cu
1 parent 0e80b11 commit 135132e

File tree

6 files changed

+13
-9
lines changed

6 files changed

+13
-9
lines changed

ggml/src/ggml-cpu/ggml-cpu.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1807,8 +1807,8 @@ void ggml_compute_forward_mul_mat(
18071807
int64_t const vec_dot_num_rows = type_traits_cpu[src0->type].nrows;
18081808
// int64_t const matmul_num_cols = type_traits_cpu[type].ncols;
18091809
#if !GGML_USE_IQK_MULMAT
1810-
ggml_from_float_to_mat_t const from_float_to_mat = type_traits[vec_dot_type].from_float_to_mat;
1811-
int64_t const blck_size_interleave = type_traits[type].blck_size_interleave;
1810+
// ggml_from_float_to_mat_t const from_float_to_mat = type_traits[vec_dot_type].from_float_to_mat;
1811+
// int64_t const blck_size_interleave = type_traits[type].blck_size_interleave;
18121812
#endif
18131813

18141814
GGML_ASSERT(ne0 == ne01);

ggml/src/ggml-cuda/ggml-cuda.cu

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2214,12 +2214,12 @@ static void ggml_cuda_mul_mat(ggml_backend_cuda_context & ctx, const ggml_tensor
22142214
ggml_cuda_mul_mat_vec_nc(ctx, src0, src1, dst);
22152215

22162216
// } else if (!split && use_mul_mat_vec && dst->ne[3] == 1 && (src0->ne[1] < MMV_MAX_ROWS || any_gpus_without_fp16_mma)) {
2217-
} else if (!split && use_mul_mat_vec && (src0->ne[1] < MMV_MAX_ROWS || any_gpus_without_fp16_mma)) {
2217+
} else if (!split && use_mul_mat_vec && (src0->ne[1] <= MMV_MAX_ROWS || any_gpus_without_fp16_mma)) {
22182218
// the custom F16 vector kernel can be used over batched cuBLAS GEMM
22192219
// but this is only faster for GPUs without tensor cores or with a thin src0 matrix (particularly KQV in attention)
22202220
ggml_cuda_mul_mat_vec(ctx, src0, src1, dst);
22212221

2222-
} else if (!split && src0->type == GGML_TYPE_F16 && src1->ne[1] == 1 && dst->ne[3] == 1 && (src0->ne[1] < MMV_MAX_ROWS || any_gpus_without_fp16_mma)) {
2222+
} else if (!split && src0->type == GGML_TYPE_F16 && src1->ne[1] == 1 && dst->ne[3] == 1 && (src0->ne[1] <= MMV_MAX_ROWS || any_gpus_without_fp16_mma)) {
22232223
ggml_cuda_mul_mat_vec(ctx, src0, src1, dst);
22242224

22252225
} else if (!split && src0->type == GGML_TYPE_F16 && (src1->type == GGML_TYPE_F16 || !any_gpus_with_slow_fp16) &&

ggml/src/ggml-quants.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4999,12 +4999,12 @@ size_t quantize_iq1_m(const float * GGML_RESTRICT src, void * GGML_RESTRICT dst,
49994999
return nrow * nblock * sizeof(block_iq1_m);
50005000
}
50015001

5002-
void quantize_row_iq1_m_ref (const float * GGML_RESTRICT x, block_iq1_s * GGML_RESTRICT y, int64_t k) {
5002+
void quantize_row_iq1_m_ref (const float * GGML_RESTRICT x, block_iq1_m * GGML_RESTRICT y, int64_t k) {
50035003
int nblock = k/QK_K;
50045004
float qw[QK_K];
50055005
for (int j = 0; j < QK_K; ++j) qw[j] = 1;
50065006
for (int ibl = 0; ibl < nblock; ++ibl) {
5007-
quantize_iq1_s(x + ibl*QK_K, &y[ibl], 1, QK_K, qw);
5007+
quantize_iq1_m(x + ibl*QK_K, &y[ibl], 1, QK_K, qw);
50085008
}
50095009
}
50105010

ggml/src/ggml-quants.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,8 @@ GGML_API void quantize_row_q8_K64_ref(const float * GGML_RESTRICT x, block_q8_K6
3838
GGML_API void quantize_row_tq1_0_ref(const float * GGML_RESTRICT x, block_tq1_0 * GGML_RESTRICT y, int64_t k);
3939
GGML_API void quantize_row_tq2_0_ref(const float * GGML_RESTRICT x, block_tq2_0 * GGML_RESTRICT y, int64_t k);
4040

41-
GGML_API void quantize_row_iq1_s_ref(const float * GGML_RESTRICT x, block_iq2_xxs * GGML_RESTRICT y, int64_t k);
42-
GGML_API void quantize_row_iq1_m_ref(const float * GGML_RESTRICT x, block_iq2_xs * GGML_RESTRICT y, int64_t k);
41+
GGML_API void quantize_row_iq1_s_ref(const float * GGML_RESTRICT x, block_iq1_s * GGML_RESTRICT y, int64_t k);
42+
GGML_API void quantize_row_iq1_m_ref(const float * GGML_RESTRICT x, block_iq1_m * GGML_RESTRICT y, int64_t k);
4343
GGML_API void quantize_row_iq2_xxs_ref(const float * GGML_RESTRICT x, block_iq2_xxs * GGML_RESTRICT y, int64_t k);
4444
GGML_API void quantize_row_iq2_xs_ref (const float * GGML_RESTRICT x, block_iq2_xs * GGML_RESTRICT y, int64_t k);
4545
GGML_API void quantize_row_iq3_xxs_ref(const float * GGML_RESTRICT x, block_iq3_xxs * GGML_RESTRICT y, int64_t k);

otherarch/sdcpp/stable-diffusion.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ enum sd_type_t {
130130
SD_TYPE_IQ2_KT = 153,
131131
SD_TYPE_IQ3_KT = 154,
132132
SD_TYPE_IQ4_KT = 155,
133+
SD_TYPE_IQ3_KS = 156,
133134

134135
SD_TYPE_IQ3_KS_V1 = 196,
135136

requirements.txt

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
numpy>=1.26.4
22
sentencepiece>=0.2.0
33
transformers>=4.51.3
4-
gguf~=0.13.0
4+
gguf~=0.10.0
55
customtkinter>=5.2.2
66
protobuf>=4.21.12
77
psutil>=6.1.1
88
darkdetect>=0.8.0
9+
pdfplumber>=0.11.6
10+
PyMuPdf>=1.26.0
11+
tqdm>=4.67.1

0 commit comments

Comments
 (0)