@@ -1970,8 +1970,14 @@ static enum ggml_status ggml_backend_opencl_buffer_init_tensor(ggml_backend_buff
1970
1970
1971
1971
// The optimized gemm and gemv kernels are used for large matrices without batch.
1972
1972
// tensor is the quantized weights matrix.
1973
- inline bool use_adreno_kernels (const ggml_tensor *tensor) {
1974
- return tensor->ne [0 ] >= 512 && tensor->ne [1 ] >= 512 &&
1973
+ inline bool use_adreno_kernels (const ggml_backend_opencl_context *backend_ctx, const ggml_tensor *tensor) {
1974
+ int64_t threshold_ne0 = 512 ;
1975
+ int64_t threshold_ne1 = 512 ;
1976
+ if (backend_ctx->adreno_cl_compiler_version .major <= 38 ) {
1977
+ threshold_ne0 = 128 ;
1978
+ threshold_ne1 = 128 ;
1979
+ }
1980
+ return tensor->ne [0 ] >= threshold_ne0 && tensor->ne [1 ] >= threshold_ne1 &&
1975
1981
tensor->ne [2 ] == 1 && tensor->ne [3 ] == 1 ;
1976
1982
}
1977
1983
@@ -2049,7 +2055,7 @@ static void ggml_backend_opencl_buffer_set_tensor(ggml_backend_buffer_t buffer,
2049
2055
cl_kernel kernel = backend_ctx->kernel_convert_block_q4_0 ;
2050
2056
2051
2057
// The optimized kernels need weights in natural order, so unshuffle.
2052
- if (use_adreno_kernels (tensor)) {
2058
+ if (use_adreno_kernels (backend_ctx, tensor)) {
2053
2059
kernel = backend_ctx->kernel_convert_block_q4_0_noshuffle ;
2054
2060
}
2055
2061
#else
@@ -2073,7 +2079,7 @@ static void ggml_backend_opencl_buffer_set_tensor(ggml_backend_buffer_t buffer,
2073
2079
#ifdef GGML_OPENCL_USE_ADRENO_KERNELS
2074
2080
// Only do transpose for large, non batched matrix
2075
2081
// TODO: use preallocated images instead of sub-buffer then image
2076
- if (use_adreno_kernels (tensor)) {
2082
+ if (use_adreno_kernels (backend_ctx, tensor)) {
2077
2083
// <----------------------------------------------------------------------------------> //
2078
2084
// start transpose
2079
2085
// <----------------------------------------------------------------------------------> //
@@ -3512,7 +3518,7 @@ static void ggml_cl_mul_mat(ggml_backend_t backend, const ggml_tensor * src0, co
3512
3518
#ifdef GGML_OPENCL_USE_ADRENO_KERNELS
3513
3519
cl_context context = backend_ctx->context ;
3514
3520
3515
- if (ne01 && ne1 && use_adreno_kernels (src0)) {
3521
+ if (ne01 && ne1 && use_adreno_kernels (backend_ctx, src0)) {
3516
3522
3517
3523
// init CL objects
3518
3524
// <--------------------------------------------> //
0 commit comments