Skip to content

Commit c7424c9

Browse files
authored
Merge branch 'ikawrakow:main' into main
2 parents 8780a09 + f76e985 commit c7424c9

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+2588
-2311
lines changed

ggml/src/CMakeLists.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -357,6 +357,8 @@ if (GGML_CUDA)
357357
list(APPEND GGML_SOURCES_CUDA ${SRCS})
358358
file(GLOB SRCS "ggml-cuda/template-instances/mmq*.cu")
359359
list(APPEND GGML_SOURCES_CUDA ${SRCS})
360+
file(GLOB SRCS "ggml-cuda/template-instances/mmvq-instance*.cu")
361+
list(APPEND GGML_SOURCES_CUDA ${SRCS})
360362

361363
if (GGML_CUDA_FA_ALL_QUANTS)
362364
file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*.cu")

ggml/src/ggml-cuda.cu

Lines changed: 61 additions & 169 deletions
Large diffs are not rendered by default.

ggml/src/ggml-cuda/iqk_mmvq.cu

Lines changed: 71 additions & 1497 deletions
Large diffs are not rendered by default.

ggml/src/ggml-cuda/iqk_mmvq.cuh

Lines changed: 2 additions & 122 deletions
Original file line numberDiff line numberDiff line change
@@ -6,127 +6,7 @@
66

77
#include "common.cuh"
88

9-
void mul_mat_vec_iq2_k_q8_1_cuda(
10-
const void * vx, const void * vy, float * dst, const char * ids_data,
11-
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
12-
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);
9+
struct mmvq_args;
1310

14-
void mul_mat_vec_iq3_k_q8_1_cuda(
15-
const void * vx, const void * vy, float * dst, const char * ids_data,
16-
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
17-
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);
11+
void iqk_mul_mat_vec_q(ggml_type type, const mmvq_args & args, cudaStream_t stream);
1812

19-
void mul_mat_vec_iq2_kl_q8_1_cuda(
20-
const void * vx, const void * vy, float * dst, const char * ids_data,
21-
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
22-
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);
23-
24-
void mul_mat_vec_iq3_ks_q8_1_cuda(
25-
const void * vx, const void * vy, float * dst, const char * ids_data,
26-
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
27-
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);
28-
29-
void mul_mat_vec_iq4_k_q8_1_cuda(
30-
const void * vx, const void * vy, float * dst, const char * ids_data,
31-
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
32-
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);
33-
34-
void mul_mat_vec_iq5_k_q8_1_cuda(
35-
const void * vx, const void * vy, float * dst, const char * ids_data,
36-
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
37-
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);
38-
39-
void mul_mat_vec_iq5_ks_q8_1_cuda(
40-
const void * vx, const void * vy, float * dst, const char * ids_data,
41-
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
42-
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);
43-
44-
void mul_mat_vec_iq6_k_q8_1_cuda(
45-
const void * vx, const void * vy, float * dst, const char * ids_data,
46-
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
47-
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);
48-
49-
void mul_mat_vec_iq4_ks_q8_1_cuda(
50-
const void * vx, const void * vy, float * dst, const char * ids_data,
51-
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
52-
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);
53-
54-
void mul_mat_vec_iq4_kss_q8_1_cuda(
55-
const void * vx, const void * vy, float * dst, const char * ids_data,
56-
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
57-
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);
58-
59-
void mul_mat_vec_iq2_ks_q8_1_cuda(
60-
const void * vx, const void * vy, float * dst, const char * ids_data,
61-
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
62-
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);
63-
64-
void mul_mat_vec_iq1_bn_q8_1_cuda(
65-
const void * vx, const void * vy, float * dst, const char * ids_data,
66-
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
67-
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);
68-
69-
void mul_mat_vec_iq2_bn_q8_1_cuda(
70-
const void * vx, const void * vy, float * dst, const char * ids_data,
71-
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
72-
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);
73-
74-
void mul_mat_vec_iq2_k_r4_q8_1_cuda(
75-
const void * vx, const void * vy, float * dst, const char * ids_data,
76-
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
77-
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);
78-
79-
void mul_mat_vec_iq3_k_r4_q8_1_cuda(
80-
const void * vx, const void * vy, float * dst, const char * ids_data,
81-
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
82-
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);
83-
84-
void mul_mat_vec_iq4_k_r4_q8_1_cuda(
85-
const void * vx, const void * vy, float * dst, const char * ids_data,
86-
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
87-
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);
88-
89-
void mul_mat_vec_iq5_k_r4_q8_1_cuda(
90-
const void * vx, const void * vy, float * dst, const char * ids_data,
91-
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
92-
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);
93-
94-
void mul_mat_vec_iq4_ks_r4_q8_1_cuda(
95-
const void * vx, const void * vy, float * dst, const char * ids_data,
96-
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
97-
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);
98-
99-
void mul_mat_vec_iq5_ks_r4_q8_1_cuda(
100-
const void * vx, const void * vy, float * dst, const char * ids_data,
101-
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
102-
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);
103-
104-
void mul_mat_vec_iq1_s_r4_q8_1_cuda(
105-
const void * vx, const void * vy, float * dst, const char * ids_data,
106-
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
107-
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);
108-
109-
void mul_mat_vec_iq1_m_r4_q8_1_cuda(
110-
const void * vx, const void * vy, float * dst, const char * ids_data,
111-
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
112-
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);
113-
114-
void mul_mat_vec_iq1_kt_q8_1_cuda(
115-
const void * vx, const void * vy, float * dst, const char * ids_data,
116-
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
117-
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);
118-
119-
void mul_mat_vec_iq2_kt_q8_1_cuda(
120-
const void * vx, const void * vy, float * dst, const char * ids_data,
121-
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
122-
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);
123-
124-
void mul_mat_vec_iq3_kt_q8_1_cuda(
125-
const void * vx, const void * vy, float * dst, const char * ids_data,
126-
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
127-
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);
128-
129-
void mul_mat_vec_iq4_kt_q8_1_cuda(
130-
const void * vx, const void * vy, float * dst, const char * ids_data,
131-
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
132-
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);

0 commit comments

Comments
 (0)