@@ -1970,8 +1970,14 @@ static enum ggml_status ggml_backend_opencl_buffer_init_tensor(ggml_backend_buff
19701970
19711971// The optimized gemm and gemv kernels are used for large matrices without batch.
19721972// tensor is the quantized weights matrix.
1973- inline bool use_adreno_kernels (const ggml_tensor *tensor) {
1974- return tensor->ne [0 ] >= 512 && tensor->ne [1 ] >= 512 &&
1973+ inline bool use_adreno_kernels (const ggml_backend_opencl_context *backend_ctx, const ggml_tensor *tensor) {
1974+ int64_t threshold_ne0 = 512 ;
1975+ int64_t threshold_ne1 = 512 ;
1976+ if (backend_ctx->adreno_cl_compiler_version .major <= 38 ) {
1977+ threshold_ne0 = 128 ;
1978+ threshold_ne1 = 128 ;
1979+ }
1980+ return tensor->ne [0 ] >= threshold_ne0 && tensor->ne [1 ] >= threshold_ne1 &&
19751981 tensor->ne [2 ] == 1 && tensor->ne [3 ] == 1 ;
19761982}
19771983
@@ -2049,7 +2055,7 @@ static void ggml_backend_opencl_buffer_set_tensor(ggml_backend_buffer_t buffer,
20492055 cl_kernel kernel = backend_ctx->kernel_convert_block_q4_0 ;
20502056
20512057 // The optimized kernels need weights in natural order, so unshuffle.
2052- if (use_adreno_kernels (tensor)) {
2058+ if (use_adreno_kernels (backend_ctx, tensor)) {
20532059 kernel = backend_ctx->kernel_convert_block_q4_0_noshuffle ;
20542060 }
20552061 #else
@@ -2073,7 +2079,7 @@ static void ggml_backend_opencl_buffer_set_tensor(ggml_backend_buffer_t buffer,
20732079 #ifdef GGML_OPENCL_USE_ADRENO_KERNELS
20742080 // Only do transpose for large, non batched matrix
20752081 // TODO: use preallocated images instead of sub-buffer then image
2076- if (use_adreno_kernels (tensor)) {
2082+ if (use_adreno_kernels (backend_ctx, tensor)) {
20772083 // <----------------------------------------------------------------------------------> //
20782084 // start transpose
20792085 // <----------------------------------------------------------------------------------> //
@@ -3512,7 +3518,7 @@ static void ggml_cl_mul_mat(ggml_backend_t backend, const ggml_tensor * src0, co
35123518#ifdef GGML_OPENCL_USE_ADRENO_KERNELS
35133519 cl_context context = backend_ctx->context ;
35143520
3515- if (ne01 && ne1 && use_adreno_kernels (src0)) {
3521+ if (ne01 && ne1 && use_adreno_kernels (backend_ctx, src0)) {
35163522
35173523 // init CL objects
35183524 // <--------------------------------------------> //
0 commit comments