@@ -403,6 +403,7 @@ struct ggml_backend_opencl_context {
403
403
cl_program program_conv_2d_f16_f32;
404
404
cl_program program_tsembd;
405
405
cl_program program_mul_mv_id_q4_0_f32_8x_flat;
406
+ cl_program program_mul_mv_id_q8_0_f32;
406
407
cl_program program_mul_mv_id_mxfp4_f32;
407
408
cl_program program_mul_mv_id_mxfp4_f32_flat;
408
409
cl_program program_mul_mm_f32_f32_l4_lm;
@@ -473,6 +474,7 @@ struct ggml_backend_opencl_context {
473
474
cl_kernel kernel_conv_2d_f16_f32;
474
475
cl_kernel kernel_timestep_embedding;
475
476
cl_kernel kernel_mul_mv_id_q4_0_f32_8x_flat;
477
+ cl_kernel kernel_mul_mv_id_q8_0_f32;
476
478
cl_kernel kernel_mul_mv_id_mxfp4_f32;
477
479
cl_kernel kernel_mul_mv_id_mxfp4_f32_flat;
478
480
cl_kernel kernel_mul_mm_f32_f32_l4_lm;
@@ -1751,6 +1753,22 @@ static void load_cl_kernels(ggml_backend_opencl_context *backend_ctx, ggml_cl_ve
1751
1753
GGML_LOG_CONT (" ." );
1752
1754
}
1753
1755
1756
+ // mul_mv_id_q8_0_f32
1757
+ {
1758
+ #ifdef GGML_OPENCL_EMBED_KERNELS
1759
+ const std::string kernel_src {
1760
+ #include " mul_mv_id_q8_0_f32.cl.h"
1761
+ };
1762
+ #else
1763
+ const std::string kernel_src = read_file (" mul_mv_id_q8_0_f32.cl" );
1764
+ #endif
1765
+ backend_ctx->program_mul_mv_id_q8_0_f32 =
1766
+ build_program_from_source (backend_ctx->context , backend_ctx->device , kernel_src.c_str (), compile_opts);
1767
+
1768
+ CL_CHECK ((backend_ctx->kernel_mul_mv_id_q8_0_f32 = clCreateKernel (backend_ctx->program_mul_mv_id_q8_0_f32 , " kernel_mul_mv_id_q8_0_f32" , &err), err));
1769
+ GGML_LOG_CONT (" ." );
1770
+ }
1771
+
1754
1772
// mul_mv_id_mxfp4_f32
1755
1773
{
1756
1774
#ifdef GGML_OPENCL_EMBED_KERNELS
@@ -2831,6 +2849,7 @@ static bool ggml_opencl_supports_op(ggml_backend_dev_t dev, const struct ggml_te
2831
2849
return false ;
2832
2850
case GGML_OP_MUL_MAT_ID:
2833
2851
if (op->src [0 ]->type == GGML_TYPE_Q4_0 ||
2852
+ op->src [0 ]->type == GGML_TYPE_Q8_0 ||
2834
2853
op->src [0 ]->type == GGML_TYPE_MXFP4) {
2835
2854
if (op->src [1 ]->type == GGML_TYPE_F32) {
2836
2855
return ggml_is_contiguous (op->src [0 ]) && ggml_is_contiguous (op->src [1 ]);
@@ -7260,6 +7279,46 @@ static void ggml_cl_mul_mat_id(ggml_backend_t backend, const ggml_tensor * src0,
7260
7279
7261
7280
break ;
7262
7281
}
7282
+ case GGML_TYPE_Q8_0: {
7283
+ kernel = backend_ctx->kernel_mul_mv_id_q8_0_f32 ;
7284
+
7285
+ if (backend_ctx->gpu_family == INTEL) {
7286
+ sgs = 16 ;
7287
+ nsg = 2 ;
7288
+ ndst = 4 ;
7289
+ } else if (backend_ctx->gpu_family == ADRENO) {
7290
+ sgs = 64 ;
7291
+ nsg = 2 ;
7292
+ ndst = 8 ;
7293
+ ndst = 4 ;
7294
+ } else {
7295
+ GGML_ASSERT (false && " TODO: Unknown GPU" );
7296
+ }
7297
+
7298
+ CL_CHECK (clSetKernelArg (kernel, 0 , sizeof (cl_mem), &extra0->data_device ));
7299
+ CL_CHECK (clSetKernelArg (kernel, 1 , sizeof (cl_ulong), &offset0));
7300
+ CL_CHECK (clSetKernelArg (kernel, 2 , sizeof (cl_mem), &extra1->data_device ));
7301
+ CL_CHECK (clSetKernelArg (kernel, 3 , sizeof (cl_ulong), &offset1));
7302
+ CL_CHECK (clSetKernelArg (kernel, 4 , sizeof (cl_mem), &extra2->data_device ));
7303
+ CL_CHECK (clSetKernelArg (kernel, 5 , sizeof (cl_ulong), &offset2));
7304
+ CL_CHECK (clSetKernelArg (kernel, 6 , sizeof (cl_mem), &extrad->data_device ));
7305
+ CL_CHECK (clSetKernelArg (kernel, 7 , sizeof (cl_ulong), &offsetd));
7306
+ CL_CHECK (clSetKernelArg (kernel, 8 , sizeof (int ), &ne00));
7307
+ CL_CHECK (clSetKernelArg (kernel, 9 , sizeof (int ), &ne01));
7308
+ CL_CHECK (clSetKernelArg (kernel, 10 , sizeof (cl_ulong), &nb01));
7309
+ CL_CHECK (clSetKernelArg (kernel, 11 , sizeof (cl_ulong), &nb02));
7310
+ CL_CHECK (clSetKernelArg (kernel, 12 , sizeof (int ), &ne11));
7311
+ CL_CHECK (clSetKernelArg (kernel, 13 , sizeof (int ), &ne12));
7312
+ CL_CHECK (clSetKernelArg (kernel, 14 , sizeof (cl_ulong), &nb11));
7313
+ CL_CHECK (clSetKernelArg (kernel, 15 , sizeof (cl_ulong), &nb12));
7314
+ CL_CHECK (clSetKernelArg (kernel, 16 , sizeof (int ), &ne20));
7315
+ CL_CHECK (clSetKernelArg (kernel, 17 , sizeof (int ), &ne21));
7316
+ CL_CHECK (clSetKernelArg (kernel, 18 , sizeof (cl_ulong), &nb21));
7317
+ CL_CHECK (clSetKernelArg (kernel, 19 , sizeof (int ), &ne0));
7318
+ CL_CHECK (clSetKernelArg (kernel, 20 , sizeof (int ), &ne1));
7319
+
7320
+ break ;
7321
+ }
7263
7322
case GGML_TYPE_MXFP4: {
7264
7323
#ifdef GGML_OPENCL_SOA_Q
7265
7324
kernel = backend_ctx->kernel_mul_mv_id_mxfp4_f32_flat ;
0 commit comments