Skip to content

Commit 374c3b7

Browse files
committed
opencl: clean up
1 parent 76d3e84 commit 374c3b7

File tree

2 files changed

+18
-26
lines changed

2 files changed

+18
-26
lines changed

ggml/src/ggml-opencl/CMakeLists.txt

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,6 @@ if (GGML_OPENCL_PROFILING)
1515
endif ()
1616

1717
add_compile_definitions(GGML_OPENCL_SOA_Q)
18-
1918
add_compile_definitions(GGML_OPENCL_TARGET_VERSION=${GGML_OPENCL_TARGET_VERSION})
2019

2120
if (GGML_OPENCL_USE_ADRENO_KERNELS)

ggml/src/ggml-opencl/ggml-opencl.cpp

Lines changed: 18 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -765,11 +765,10 @@ static void load_cl_kernels(ggml_backend_opencl_context *backend_ctx, ggml_cl_ve
765765
#endif
766766
backend_ctx->program_cvt =
767767
build_program_from_source(backend_ctx->context, backend_ctx->device, kernel_src.c_str(), compile_opts);
768-
// q4_0
768+
769769
CL_CHECK((backend_ctx->kernel_convert_block_q4_0_noshuffle = clCreateKernel(backend_ctx->program_cvt, "kernel_convert_block_q4_0_noshuffle", &err), err));
770770
CL_CHECK((backend_ctx->kernel_convert_block_q4_0 = clCreateKernel(backend_ctx->program_cvt, "kernel_convert_block_q4_0", &err), err));
771771
CL_CHECK((backend_ctx->kernel_restore_block_q4_0 = clCreateKernel(backend_ctx->program_cvt, "kernel_restore_block_q4_0", &err), err));
772-
// mxfp4
773772
CL_CHECK((backend_ctx->kernel_convert_block_mxfp4 = clCreateKernel(backend_ctx->program_cvt, "kernel_convert_block_mxfp4", &err), err));
774773
CL_CHECK((backend_ctx->kernel_restore_block_mxfp4 = clCreateKernel(backend_ctx->program_cvt, "kernel_restore_block_mxfp4", &err), err));
775774
GGML_LOG_CONT(".");
@@ -2430,7 +2429,6 @@ struct ggml_tensor_extra_cl_q4_0 {
24302429
}
24312430
};
24322431

2433-
// Additional tensor extra structs for mxfp4 tensors.
24342432
struct ggml_tensor_extra_cl_mxfp4 {
24352433
// Quantized values.
24362434
cl_mem q = nullptr;
@@ -3403,7 +3401,8 @@ static void ggml_backend_opencl_buffer_set_tensor(ggml_backend_buffer_t buffer,
34033401

34043402
return;
34053403

3406-
} else if (tensor->type == GGML_TYPE_MXFP4) {
3404+
}
3405+
if (tensor->type == GGML_TYPE_MXFP4) {
34073406
ggml_tensor_extra_cl * extra_orig = (ggml_tensor_extra_cl *)tensor->extra;
34083407
GGML_ASSERT(extra_orig && "Tesnors in OpenCL backend should have been allocated and initialized");
34093408

@@ -3423,27 +3422,12 @@ static void ggml_backend_opencl_buffer_set_tensor(ggml_backend_buffer_t buffer,
34233422
queue, data_device, CL_TRUE, 0,
34243423
ggml_nbytes(tensor), data, 0, NULL, NULL));
34253424

3426-
// We consider the specified offset arg as always, although For weights
3427-
// the offset arg should be 0 (we do not assert this).
3428-
//GGML_ASSERT(offset == 0);
3429-
3430-
// We create subbuffers from the original tensor buffer for scales and
3431-
// quants - i.e., scales and quants are aliases into the buffer obejct
3432-
// that backs the original tensor. This is a cleaner way to adapt to the
3433-
// new memory management.
3434-
// In the old code, we allocate new buffers for scales and quants
3435-
// respectively, which could still be done but would result in double
3436-
// allocation; properly deallocating the preallocated buffer that backs
3437-
// the tensors is tricky and would leak the backend specific information
3438-
// into the general backend code.
3439-
// Does this create misaligned subbuffers (alignment is 1024) in certain
3440-
// cases ?
3441-
cl_buffer_region region;
3442-
34433425
// The original tensor memory is divided into scales and quants, i.e.,
34443426
// we first store scales, then quants.
3427+
cl_buffer_region region;
3428+
34453429
// Create subbuffer for scales.
3446-
region.origin = extra_orig->offset + tensor->view_offs + offset; //align_to(extra_orig->offset + tensor->view_offs + offset, backend_ctx->alignment);
3430+
region.origin = align_to(extra_orig->offset + tensor->view_offs + offset, backend_ctx->alignment);
34473431
region.size = size_e;
34483432
extra->e = clCreateSubBuffer(
34493433
extra_orig->data_device, CL_MEM_READ_WRITE,
@@ -3452,7 +3436,7 @@ static void ggml_backend_opencl_buffer_set_tensor(ggml_backend_buffer_t buffer,
34523436
auto previous_origin = region.origin;
34533437

34543438
// Create subbuffer for quants.
3455-
region.origin = previous_origin + size_e; //align_to(previous_origin + size_e, backend_ctx->alignment);
3439+
region.origin = align_to(previous_origin + size_e, backend_ctx->alignment);
34563440
region.size = size_q;
34573441
extra->q = clCreateSubBuffer(
34583442
extra_orig->data_device, CL_MEM_READ_WRITE,
@@ -3475,7 +3459,12 @@ static void ggml_backend_opencl_buffer_set_tensor(ggml_backend_buffer_t buffer,
34753459

34763460
// Create image for Q
34773461
cl_image_format img_format_q = {CL_RG, CL_UNSIGNED_INT32};
3478-
cl_image_desc img_desc_q = {CL_MEM_OBJECT_IMAGE1D_BUFFER, static_cast<size_t>(tensor->ne[0] * tensor->ne[1] * tensor->ne[2] / 32 * 2), 0,0,0,0,0,0,0, extra->q};
3462+
cl_image_desc img_desc_q = {
3463+
CL_MEM_OBJECT_IMAGE1D_BUFFER,
3464+
static_cast<size_t>(tensor->ne[0] * tensor->ne[1] * tensor->ne[2] / 32 * 2),
3465+
0, 0, 0, 0, 0, 0, 0,
3466+
{ extra->q }
3467+
};
34793468
extra->q_img = clCreateImage(context, CL_MEM_READ_ONLY, &img_format_q, &img_desc_q, NULL, &err);
34803469

34813470
tensor->extra = extra;
@@ -6275,8 +6264,10 @@ static void ggml_cl_mul_mat(ggml_backend_t backend, const ggml_tensor * src0, co
62756264
cl_ulong offset1 = extra1->offset + src1->view_offs;
62766265
cl_ulong offsetd = extrad->offset + dst->view_offs;
62776266

6267+
#ifdef GGML_OPENCL_SOA_Q
62786268
ggml_tensor_extra_cl_q4_0 * extra0_q4_0 = (ggml_tensor_extra_cl_q4_0 *)src0->extra;
62796269
ggml_tensor_extra_cl_mxfp4 * extra0_mxfp4 = (ggml_tensor_extra_cl_mxfp4 *)src0->extra;
6270+
#endif
62806271

62816272
const int ne00 = src0 ? src0->ne[0] : 0;
62826273
const int ne01 = src0 ? src0->ne[1] : 0;
@@ -7112,8 +7103,10 @@ static void ggml_cl_mul_mat_id(ggml_backend_t backend, const ggml_tensor * src0,
71127103
cl_ulong offset2 = extra2->offset + src2->view_offs;
71137104
cl_ulong offsetd = extrad->offset + dst->view_offs;
71147105

7106+
#ifdef GGML_OPENCL_SOA_Q
71157107
ggml_tensor_extra_cl_q4_0 * extra0_q4_0 = (ggml_tensor_extra_cl_q4_0 *)src0->extra;
71167108
ggml_tensor_extra_cl_mxfp4 * extra0_mxfp4 = (ggml_tensor_extra_cl_mxfp4 *)src0->extra;
7109+
#endif
71177110

71187111
const int ne00 = src0->ne[0];
71197112
const int ne01 = src0->ne[1];
@@ -7171,7 +7164,7 @@ static void ggml_cl_mul_mat_id(ggml_backend_t backend, const ggml_tensor * src0,
71717164
} else {
71727165
GGML_ASSERT(false && "TODO: Unknown GPU");
71737166
}
7174-
7167+
71757168
CL_CHECK(clSetKernelArg(kernel, 0, sizeof(cl_mem), &extra0_q4_0->q));
71767169
CL_CHECK(clSetKernelArg(kernel, 1, sizeof(cl_mem), &extra0_q4_0->d));
71777170
CL_CHECK(clSetKernelArg(kernel, 2, sizeof(cl_mem), &extra1->data_device));

0 commit comments

Comments
 (0)