Skip to content

Commit 8933b05

Browse files
committed
add insightful comments
1 parent 834acb1 commit 8933b05

File tree

1 file changed

+11
-0
lines changed

1 file changed

+11
-0
lines changed

ggml/src/ggml-opencl/ggml-opencl.cpp

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4787,6 +4787,17 @@ static void ggml_cl_mul_mat_f16_f32_tiled(ggml_backend_t backend, const ggml_ten
47874787
CL_CHECK(clSetKernelArg(kernel, 7, sizeof(cl_mem), &extrad->data_device));
47884788
CL_CHECK(clSetKernelArg(kernel, 8, sizeof(cl_ulong), &offsetd));
47894789

4790+
// Tiling parameters. These need to be tuned for optimal performance.
4791+
// They must match the #defines in the kernel mul_mat_f16_f32.cl.
4792+
//
4793+
// OPWM / OPWN: Output tile size per Work-Group. A work-group computes a tile of size OPWM x OPWN.
4794+
// TPWM / TPWN: Threads per Work-group. This is the work-group size.
4795+
// OPTM / OPTN: Output elements per Thread. Each thread computes OPTM x OPTN elements.
4796+
//
4797+
// The following relationships must hold:
4798+
// OPWM = TPWM * OPTM
4799+
// OPWN = TPWN * OPTN
4800+
//
47904801
const int OPWM = 64;
47914802
const int OPWN = 64;
47924803
const int TPWM = 16;

0 commit comments

Comments
 (0)