Skip to content

Commit fc6fb76

Browse files
committed
Revert "CUDA: fuse ffn_up*unary_op(ffn_gate) for MMVQ (V2) (ikawrakow#864)"
This reverts commit f76e985.
1 parent 8d72972 commit fc6fb76

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

55 files changed

+2311
-2588
lines changed

ggml/src/CMakeLists.txt

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -357,8 +357,6 @@ if (GGML_CUDA)
357357
list(APPEND GGML_SOURCES_CUDA ${SRCS})
358358
file(GLOB SRCS "ggml-cuda/template-instances/mmq*.cu")
359359
list(APPEND GGML_SOURCES_CUDA ${SRCS})
360-
file(GLOB SRCS "ggml-cuda/template-instances/mmvq-instance*.cu")
361-
list(APPEND GGML_SOURCES_CUDA ${SRCS})
362360

363361
if (GGML_CUDA_FA_ALL_QUANTS)
364362
file(GLOB SRCS "ggml-cuda/template-instances/fattn-vec*.cu")

ggml/src/ggml-cuda.cu

Lines changed: 169 additions & 61 deletions
Large diffs are not rendered by default.

ggml/src/ggml-cuda/iqk_mmvq.cu

Lines changed: 1497 additions & 71 deletions
Large diffs are not rendered by default.

ggml/src/ggml-cuda/iqk_mmvq.cuh

Lines changed: 122 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,127 @@
66

77
#include "common.cuh"
88

9-
struct mmvq_args;
9+
void mul_mat_vec_iq2_k_q8_1_cuda(
10+
const void * vx, const void * vy, float * dst, const char * ids_data,
11+
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
12+
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);
1013

11-
void iqk_mul_mat_vec_q(ggml_type type, const mmvq_args & args, cudaStream_t stream);
14+
void mul_mat_vec_iq3_k_q8_1_cuda(
15+
const void * vx, const void * vy, float * dst, const char * ids_data,
16+
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
17+
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);
1218

19+
void mul_mat_vec_iq2_kl_q8_1_cuda(
20+
const void * vx, const void * vy, float * dst, const char * ids_data,
21+
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
22+
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);
23+
24+
void mul_mat_vec_iq3_ks_q8_1_cuda(
25+
const void * vx, const void * vy, float * dst, const char * ids_data,
26+
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
27+
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);
28+
29+
void mul_mat_vec_iq4_k_q8_1_cuda(
30+
const void * vx, const void * vy, float * dst, const char * ids_data,
31+
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
32+
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);
33+
34+
void mul_mat_vec_iq5_k_q8_1_cuda(
35+
const void * vx, const void * vy, float * dst, const char * ids_data,
36+
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
37+
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);
38+
39+
void mul_mat_vec_iq5_ks_q8_1_cuda(
40+
const void * vx, const void * vy, float * dst, const char * ids_data,
41+
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
42+
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);
43+
44+
void mul_mat_vec_iq6_k_q8_1_cuda(
45+
const void * vx, const void * vy, float * dst, const char * ids_data,
46+
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
47+
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);
48+
49+
void mul_mat_vec_iq4_ks_q8_1_cuda(
50+
const void * vx, const void * vy, float * dst, const char * ids_data,
51+
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
52+
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);
53+
54+
void mul_mat_vec_iq4_kss_q8_1_cuda(
55+
const void * vx, const void * vy, float * dst, const char * ids_data,
56+
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
57+
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);
58+
59+
void mul_mat_vec_iq2_ks_q8_1_cuda(
60+
const void * vx, const void * vy, float * dst, const char * ids_data,
61+
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
62+
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);
63+
64+
void mul_mat_vec_iq1_bn_q8_1_cuda(
65+
const void * vx, const void * vy, float * dst, const char * ids_data,
66+
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
67+
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);
68+
69+
void mul_mat_vec_iq2_bn_q8_1_cuda(
70+
const void * vx, const void * vy, float * dst, const char * ids_data,
71+
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
72+
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);
73+
74+
void mul_mat_vec_iq2_k_r4_q8_1_cuda(
75+
const void * vx, const void * vy, float * dst, const char * ids_data,
76+
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
77+
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);
78+
79+
void mul_mat_vec_iq3_k_r4_q8_1_cuda(
80+
const void * vx, const void * vy, float * dst, const char * ids_data,
81+
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
82+
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);
83+
84+
void mul_mat_vec_iq4_k_r4_q8_1_cuda(
85+
const void * vx, const void * vy, float * dst, const char * ids_data,
86+
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
87+
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);
88+
89+
void mul_mat_vec_iq5_k_r4_q8_1_cuda(
90+
const void * vx, const void * vy, float * dst, const char * ids_data,
91+
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
92+
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);
93+
94+
void mul_mat_vec_iq4_ks_r4_q8_1_cuda(
95+
const void * vx, const void * vy, float * dst, const char * ids_data,
96+
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
97+
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);
98+
99+
void mul_mat_vec_iq5_ks_r4_q8_1_cuda(
100+
const void * vx, const void * vy, float * dst, const char * ids_data,
101+
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
102+
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);
103+
104+
void mul_mat_vec_iq1_s_r4_q8_1_cuda(
105+
const void * vx, const void * vy, float * dst, const char * ids_data,
106+
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
107+
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);
108+
109+
void mul_mat_vec_iq1_m_r4_q8_1_cuda(
110+
const void * vx, const void * vy, float * dst, const char * ids_data,
111+
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
112+
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);
113+
114+
void mul_mat_vec_iq1_kt_q8_1_cuda(
115+
const void * vx, const void * vy, float * dst, const char * ids_data,
116+
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
117+
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);
118+
119+
void mul_mat_vec_iq2_kt_q8_1_cuda(
120+
const void * vx, const void * vy, float * dst, const char * ids_data,
121+
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
122+
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);
123+
124+
void mul_mat_vec_iq3_kt_q8_1_cuda(
125+
const void * vx, const void * vy, float * dst, const char * ids_data,
126+
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
127+
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);
128+
129+
void mul_mat_vec_iq4_kt_q8_1_cuda(
130+
const void * vx, const void * vy, float * dst, const char * ids_data,
131+
const int ncols_x, const int nrows_x, const int nrows_y, const int ncols_y, const int nrows_dst,
132+
const int ne2, const uint64_t nb02, const uint64_t nb12, const uint64_t nb2, const int64_t ids_nb0, cudaStream_t stream);

0 commit comments

Comments
 (0)