Skip to content

Commit 36676c0

Browse files
committed
opencl: fix non adreno GPU
1 parent 464ebeb commit 36676c0

File tree

1 file changed

+12
-2
lines changed

1 file changed

+12
-2
lines changed

ggml/src/ggml-opencl/ggml-opencl.cpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6974,19 +6974,24 @@ static void ggml_cl_mul_mat(ggml_backend_t backend, const ggml_tensor * src0, co
69746974
#ifdef GGML_OPENCL_SOA_Q
69756975
kernel = backend_ctx->kernel_mul_mv_mxfp4_f32_flat;
69766976

6977+
cl_mem q;
69776978
if (backend_ctx->gpu_family == INTEL) {
69786979
nth0 = 16;
69796980
nth1 = 2;
69806981
ndst = nth1*2;
6982+
6983+
q = extra0_mxfp4->q;
69816984
} else if (backend_ctx->gpu_family == ADRENO) {
69826985
nth0 = 64;
69836986
nth1 = 2;
69846987
ndst = nth1;
6988+
6989+
q = extra0_mxfp4->q_img;
69856990
} else {
69866991
GGML_ASSERT(false && "TODO: Unknown GPU");
69876992
}
69886993

6989-
CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), &extra0_mxfp4->q_img));
6994+
CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), &q));
69906995
CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), &extra0_mxfp4->e));
69916996
CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), &extra1->data_device));
69926997
CL_CHECK(clSetKernelArg(kernel, 3, sizeof(cl_ulong), &offset1));
@@ -7197,19 +7202,24 @@ static void ggml_cl_mul_mat_id(ggml_backend_t backend, const ggml_tensor * src0,
71977202
#ifdef GGML_OPENCL_SOA_Q
71987203
kernel = backend_ctx->kernel_mul_mv_id_mxfp4_f32_flat;
71997204

7205+
cl_mem q;
72007206
if (backend_ctx->gpu_family == INTEL) {
72017207
sgs = 16;
72027208
nsg = 2;
72037209
ndst = 2;
7210+
7211+
q = extra0_mxfp4->q;
72047212
} else if (backend_ctx->gpu_family == ADRENO) {
72057213
sgs = 64;
72067214
nsg = 1;
72077215
ndst = 4;
7216+
7217+
q = extra0_mxfp4->q_img;
72087218
} else {
72097219
GGML_ASSERT(false && "TODO: Unknown GPU");
72107220
}
72117221

7212-
CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), &extra0_mxfp4->q_img));
7222+
CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), &q));
72137223
CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), &extra0_mxfp4->e));
72147224
CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), &extra1->data_device));
72157225
CL_CHECK(clSetKernelArg(kernel, 3, sizeof(cl_ulong), &offset1));

0 commit comments

Comments
 (0)