Skip to content

Commit 8a291af

Browse files
committed
opencl: refine logic for selecting Adreno kernels
1 parent b29328a commit 8a291af

File tree

1 file changed

+11
-5
lines changed

1 file changed

+11
-5
lines changed

ggml/src/ggml-opencl/ggml-opencl.cpp

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1970,8 +1970,14 @@ static enum ggml_status ggml_backend_opencl_buffer_init_tensor(ggml_backend_buff
19701970

19711971
// The optimized gemm and gemv kernels are used for large matrices without batch.
19721972
// tensor is the quantized weights matrix.
1973-
inline bool use_adreno_kernels(const ggml_tensor *tensor) {
1974-
return tensor->ne[0] >= 512 && tensor->ne[1] >= 512 &&
1973+
inline bool use_adreno_kernels(const ggml_backend_opencl_context *backend_ctx, const ggml_tensor *tensor) {
1974+
int64_t threshold_ne0 = 512;
1975+
int64_t threshold_ne1 = 512;
1976+
if (backend_ctx->adreno_cl_compiler_version.major <= 38) {
1977+
threshold_ne0 = 128;
1978+
threshold_ne1 = 128;
1979+
}
1980+
return tensor->ne[0] >= threshold_ne0 && tensor->ne[1] >= threshold_ne1 &&
19751981
tensor->ne[2] == 1 && tensor->ne[3] == 1;
19761982
}
19771983

@@ -2049,7 +2055,7 @@ static void ggml_backend_opencl_buffer_set_tensor(ggml_backend_buffer_t buffer,
20492055
cl_kernel kernel = backend_ctx->kernel_convert_block_q4_0;
20502056

20512057
// The optimized kernels need weights in natural order, so unshuffle.
2052-
if (use_adreno_kernels(tensor)) {
2058+
if (use_adreno_kernels(backend_ctx, tensor)) {
20532059
kernel = backend_ctx->kernel_convert_block_q4_0_noshuffle;
20542060
}
20552061
#else
@@ -2073,7 +2079,7 @@ static void ggml_backend_opencl_buffer_set_tensor(ggml_backend_buffer_t buffer,
20732079
#ifdef GGML_OPENCL_USE_ADRENO_KERNELS
20742080
// Only do transpose for large, non batched matrix
20752081
// TODO: use preallocated images instead of sub-buffer then image
2076-
if (use_adreno_kernels(tensor)) {
2082+
if (use_adreno_kernels(backend_ctx, tensor)) {
20772083
// <----------------------------------------------------------------------------------> //
20782084
// start transpose
20792085
// <----------------------------------------------------------------------------------> //
@@ -3512,7 +3518,7 @@ static void ggml_cl_mul_mat(ggml_backend_t backend, const ggml_tensor * src0, co
35123518
#ifdef GGML_OPENCL_USE_ADRENO_KERNELS
35133519
cl_context context = backend_ctx->context;
35143520

3515-
if (ne01 && ne1 && use_adreno_kernels(src0)) {
3521+
if (ne01 && ne1 && use_adreno_kernels(backend_ctx, src0)) {
35163522

35173523
// init CL objects
35183524
// <--------------------------------------------> //

0 commit comments

Comments
 (0)