@@ -64,24 +64,30 @@ enum ADRENO_GPU_GEN {
6464 X1E,
6565};
6666
67+ enum ADRENO_CL_COMPILER_TYPE {
68+ E031 ,
69+ DX,
70+ };
71+
6772struct ggml_cl_version {
6873 cl_uint major = 0 ;
6974 cl_uint minor = 0 ;
7075};
7176
7277struct ggml_cl_compiler_version {
78+ ADRENO_CL_COMPILER_TYPE type;
7379 int major = -1 ;
7480 int minor = -1 ;
7581 int patch = -1 ;
7682
77- bool same (int x, int y, int z) const {
78- return major == x && minor == y && patch == z;
83+ bool same (ADRENO_CL_COMPILER_TYPE t, int x, int y, int z) const {
84+ return major == x && minor == y && patch == z && type == t ;
7985 }
80- bool newer_than (int x, int y, int z) const {
81- return major*10000 + minor*100 + patch > x*10000 + y*100 + z;
86+ bool newer_than (ADRENO_CL_COMPILER_TYPE t, int x, int y, int z) const {
87+ return major*10000 + minor*100 + patch > x*10000 + y*100 + z && type == t ;
8288 }
83- bool newer_than_or_same (int x, int y, int z) const {
84- return same (x, y, z) || newer_than (x, y, z);
89+ bool newer_than_or_same (ADRENO_CL_COMPILER_TYPE t, int x, int y, int z) const {
90+ return same (t, x, y, z) || newer_than (t, x, y, z);
8591 }
8692};
8793
@@ -191,6 +197,7 @@ static ADRENO_GPU_GEN get_adreno_gpu_gen(const char *device_name) {
191197
192198static ggml_cl_compiler_version get_adreno_cl_compiler_version (const char *driver_version) {
193199 std::string driver_ver_str (driver_version);
200+ ADRENO_CL_COMPILER_TYPE type = ADRENO_CL_COMPILER_TYPE::E031 ;
194201 size_t compiler_ver_pos = driver_ver_str.find (" E031" );
195202 size_t compiler_ver_len = 13 ;
196203 size_t compiler_major_offset = 5 ;
@@ -202,6 +209,7 @@ static ggml_cl_compiler_version get_adreno_cl_compiler_version(const char *drive
202209 if (compiler_ver_pos == std::string::npos) {
203210 return {};
204211 }
212+ type = ADRENO_CL_COMPILER_TYPE::DX;
205213 compiler_ver_len = 11 ;
206214 compiler_major_offset = 3 ;
207215 }
@@ -210,7 +218,7 @@ static ggml_cl_compiler_version get_adreno_cl_compiler_version(const char *drive
210218 int major = std::atoi (compiler_ver_str.substr (compiler_major_offset, 2 ).c_str ());
211219 int minor = std::atoi (compiler_ver_str.substr (compiler_minor_offset, 2 ).c_str ());
212220 int patch = std::atoi (compiler_ver_str.substr (compiler_patch_offset, 2 ).c_str ());
213- return { major, minor, patch };
221+ return { type, major, minor, patch };
214222}
215223
216224// backend device context
@@ -643,7 +651,9 @@ static void load_cl_kernels(ggml_backend_opencl_context *backend_ctx, ggml_cl_ve
643651 // mul_mv_q4_0_f32_1d_8x_flat
644652 // This kernel does not compiler on Adreno cl compiler 38.01. Skip it for
645653 // those compiler versions since it is anyway not used for Adreno.
646- if (backend_ctx->gpu_family != ADRENO || backend_ctx->adreno_cl_compiler_version .newer_than_or_same (38 , 11 , 0 )) {
654+ if (backend_ctx->gpu_family != ADRENO ||
655+ backend_ctx->adreno_cl_compiler_version .newer_than_or_same (E031 , 38 , 11 , 0 ) ||
656+ backend_ctx->adreno_cl_compiler_version .type == DX) {
647657#ifdef GGML_OPENCL_EMBED_KERNELS
648658 const std::string kernel_src {
649659 #include " mul_mv_q4_0_f32_1d_8x_flat.cl.h"
@@ -661,7 +671,9 @@ static void load_cl_kernels(ggml_backend_opencl_context *backend_ctx, ggml_cl_ve
661671 // mul_mv_q4_0_f32_1d_16x_flat
662672 // This kernel does not compiler on Adreno cl compiler 38.01. Skip it for
663673 // those compiler versions since it is anyway not used for Adreno.
664- if (backend_ctx->gpu_family != ADRENO || backend_ctx->adreno_cl_compiler_version .newer_than_or_same (38 , 11 , 0 )) {
674+ if (backend_ctx->gpu_family != ADRENO ||
675+ backend_ctx->adreno_cl_compiler_version .newer_than_or_same (E031 , 38 , 11 , 0 ) ||
676+ backend_ctx->adreno_cl_compiler_version .type == DX) {
665677#ifdef GGML_OPENCL_EMBED_KERNELS
666678 const std::string kernel_src {
667679 #include " mul_mv_q4_0_f32_1d_16x_flat.cl.h"
@@ -1983,7 +1995,8 @@ static enum ggml_status ggml_backend_opencl_buffer_init_tensor(ggml_backend_buff
19831995inline bool use_adreno_kernels (const ggml_backend_opencl_context *backend_ctx, const ggml_tensor *tensor) {
19841996 int64_t threshold_ne0 = 512 ;
19851997 int64_t threshold_ne1 = 512 ;
1986- if (!backend_ctx->adreno_cl_compiler_version .newer_than_or_same (38 , 11 , 0 )) {
1998+ if (!backend_ctx->adreno_cl_compiler_version .newer_than_or_same (E031 , 38 , 11 , 0 ) &&
1999+ backend_ctx->adreno_cl_compiler_version .type != DX) {
19872000 threshold_ne0 = 128 ;
19882001 threshold_ne1 = 128 ;
19892002 }
0 commit comments