@@ -402,6 +402,7 @@ struct ggml_backend_opencl_context {
402
402
cl_program program_conv_2d_f16_f32;
403
403
cl_program program_tsembd;
404
404
cl_program program_mul_mv_id_q4_0_f32_8x_flat;
405
+ cl_program program_mul_mv_id_q8_0_f32;
405
406
cl_program program_mul_mv_id_mxfp4_f32;
406
407
cl_program program_mul_mm_f32_f32_l4_lm;
407
408
cl_program program_mul_mm_f16_f32_l4_lm;
@@ -470,6 +471,7 @@ struct ggml_backend_opencl_context {
470
471
cl_kernel kernel_conv_2d_f16_f32;
471
472
cl_kernel kernel_timestep_embedding;
472
473
cl_kernel kernel_mul_mv_id_q4_0_f32_8x_flat;
474
+ cl_kernel kernel_mul_mv_id_q8_0_f32;
473
475
cl_kernel kernel_mul_mv_id_mxfp4_f32;
474
476
cl_kernel kernel_mul_mm_f32_f32_l4_lm;
475
477
cl_kernel kernel_mul_mm_f16_f32_l4_lm;
@@ -1729,6 +1731,22 @@ static void load_cl_kernels(ggml_backend_opencl_context *backend_ctx, ggml_cl_ve
1729
1731
GGML_LOG_CONT (" ." );
1730
1732
}
1731
1733
1734
+ // mul_mv_id_q8_0_f32
1735
+ {
1736
+ #ifdef GGML_OPENCL_EMBED_KERNELS
1737
+ const std::string kernel_src {
1738
+ #include " mul_mv_id_q8_0_f32.cl.h"
1739
+ };
1740
+ #else
1741
+ const std::string kernel_src = read_file (" mul_mv_id_q8_0_f32.cl" );
1742
+ #endif
1743
+ backend_ctx->program_mul_mv_id_q8_0_f32 =
1744
+ build_program_from_source (backend_ctx->context , backend_ctx->device , kernel_src.c_str (), compile_opts);
1745
+
1746
+ CL_CHECK ((backend_ctx->kernel_mul_mv_id_q8_0_f32 = clCreateKernel (backend_ctx->program_mul_mv_id_q8_0_f32 , " kernel_mul_mv_id_q8_0_f32" , &err), err));
1747
+ GGML_LOG_CONT (" ." );
1748
+ }
1749
+
1732
1750
// mul_mv_id_mxfp4_f32
1733
1751
{
1734
1752
#ifdef GGML_OPENCL_EMBED_KERNELS
@@ -2748,6 +2766,7 @@ static bool ggml_opencl_supports_op(ggml_backend_dev_t dev, const struct ggml_te
2748
2766
return false ;
2749
2767
case GGML_OP_MUL_MAT_ID:
2750
2768
if (op->src [0 ]->type == GGML_TYPE_Q4_0 ||
2769
+ op->src [0 ]->type == GGML_TYPE_Q8_0 ||
2751
2770
op->src [0 ]->type == GGML_TYPE_MXFP4) {
2752
2771
if (op->src [1 ]->type == GGML_TYPE_F32) {
2753
2772
return ggml_is_contiguous (op->src [0 ]) && ggml_is_contiguous (op->src [1 ]);
@@ -6997,6 +7016,46 @@ static void ggml_cl_mul_mat_id(ggml_backend_t backend, const ggml_tensor * src0,
6997
7016
6998
7017
break ;
6999
7018
}
7019
+ case GGML_TYPE_Q8_0: {
7020
+ kernel = backend_ctx->kernel_mul_mv_id_q8_0_f32 ;
7021
+
7022
+ if (backend_ctx->gpu_family == INTEL) {
7023
+ sgs = 16 ;
7024
+ nsg = 2 ;
7025
+ ndst = 4 ;
7026
+ } else if (backend_ctx->gpu_family == ADRENO) {
7027
+ sgs = 64 ;
7028
+ nsg = 2 ;
7029
+ ndst = 8 ;
7030
+ ndst = 4 ;
7031
+ } else {
7032
+ GGML_ASSERT (false && " TODO: Unknown GPU" );
7033
+ }
7034
+
7035
+ CL_CHECK (clSetKernelArg (kernel, 0 , sizeof (cl_mem), &extra0->data_device ));
7036
+ CL_CHECK (clSetKernelArg (kernel, 1 , sizeof (cl_ulong), &offset0));
7037
+ CL_CHECK (clSetKernelArg (kernel, 2 , sizeof (cl_mem), &extra1->data_device ));
7038
+ CL_CHECK (clSetKernelArg (kernel, 3 , sizeof (cl_ulong), &offset1));
7039
+ CL_CHECK (clSetKernelArg (kernel, 4 , sizeof (cl_mem), &extra2->data_device ));
7040
+ CL_CHECK (clSetKernelArg (kernel, 5 , sizeof (cl_ulong), &offset2));
7041
+ CL_CHECK (clSetKernelArg (kernel, 6 , sizeof (cl_mem), &extrad->data_device ));
7042
+ CL_CHECK (clSetKernelArg (kernel, 7 , sizeof (cl_ulong), &offsetd));
7043
+ CL_CHECK (clSetKernelArg (kernel, 8 , sizeof (int ), &ne00));
7044
+ CL_CHECK (clSetKernelArg (kernel, 9 , sizeof (int ), &ne01));
7045
+ CL_CHECK (clSetKernelArg (kernel, 10 , sizeof (cl_ulong), &nb01));
7046
+ CL_CHECK (clSetKernelArg (kernel, 11 , sizeof (cl_ulong), &nb02));
7047
+ CL_CHECK (clSetKernelArg (kernel, 12 , sizeof (int ), &ne11));
7048
+ CL_CHECK (clSetKernelArg (kernel, 13 , sizeof (int ), &ne12));
7049
+ CL_CHECK (clSetKernelArg (kernel, 14 , sizeof (cl_ulong), &nb11));
7050
+ CL_CHECK (clSetKernelArg (kernel, 15 , sizeof (cl_ulong), &nb12));
7051
+ CL_CHECK (clSetKernelArg (kernel, 16 , sizeof (int ), &ne20));
7052
+ CL_CHECK (clSetKernelArg (kernel, 17 , sizeof (int ), &ne21));
7053
+ CL_CHECK (clSetKernelArg (kernel, 18 , sizeof (cl_ulong), &nb21));
7054
+ CL_CHECK (clSetKernelArg (kernel, 19 , sizeof (int ), &ne0));
7055
+ CL_CHECK (clSetKernelArg (kernel, 20 , sizeof (int ), &ne1));
7056
+
7057
+ break ;
7058
+ }
7000
7059
case GGML_TYPE_MXFP4: {
7001
7060
kernel = backend_ctx->kernel_mul_mv_id_mxfp4_f32 ;
7002
7061
0 commit comments