@@ -64,24 +64,30 @@ enum ADRENO_GPU_GEN {
64
64
X1E,
65
65
};
66
66
67
+ enum ADRENO_CL_COMPILER_TYPE {
68
+ E031 ,
69
+ DX,
70
+ };
71
+
67
72
struct ggml_cl_version {
68
73
cl_uint major = 0 ;
69
74
cl_uint minor = 0 ;
70
75
};
71
76
72
77
struct ggml_cl_compiler_version {
78
+ ADRENO_CL_COMPILER_TYPE type;
73
79
int major = -1 ;
74
80
int minor = -1 ;
75
81
int patch = -1 ;
76
82
77
- bool same (int x, int y, int z) const {
78
- return major == x && minor == y && patch == z;
83
+ bool same (ADRENO_CL_COMPILER_TYPE t, int x, int y, int z) const {
84
+ return major == x && minor == y && patch == z && type == t ;
79
85
}
80
- bool newer_than (int x, int y, int z) const {
81
- return major*10000 + minor*100 + patch > x*10000 + y*100 + z;
86
+ bool newer_than (ADRENO_CL_COMPILER_TYPE t, int x, int y, int z) const {
87
+ return major*10000 + minor*100 + patch > x*10000 + y*100 + z && type == t ;
82
88
}
83
- bool newer_than_or_same (int x, int y, int z) const {
84
- return same (x, y, z) || newer_than (x, y, z);
89
+ bool newer_than_or_same (ADRENO_CL_COMPILER_TYPE t, int x, int y, int z) const {
90
+ return same (t, x, y, z) || newer_than (t, x, y, z);
85
91
}
86
92
};
87
93
@@ -191,6 +197,7 @@ static ADRENO_GPU_GEN get_adreno_gpu_gen(const char *device_name) {
191
197
192
198
static ggml_cl_compiler_version get_adreno_cl_compiler_version (const char *driver_version) {
193
199
std::string driver_ver_str (driver_version);
200
+ ADRENO_CL_COMPILER_TYPE type = ADRENO_CL_COMPILER_TYPE::E031 ;
194
201
size_t compiler_ver_pos = driver_ver_str.find (" E031" );
195
202
size_t compiler_ver_len = 13 ;
196
203
size_t compiler_major_offset = 5 ;
@@ -202,6 +209,7 @@ static ggml_cl_compiler_version get_adreno_cl_compiler_version(const char *drive
202
209
if (compiler_ver_pos == std::string::npos) {
203
210
return {};
204
211
}
212
+ type = ADRENO_CL_COMPILER_TYPE::DX;
205
213
compiler_ver_len = 11 ;
206
214
compiler_major_offset = 3 ;
207
215
}
@@ -210,7 +218,7 @@ static ggml_cl_compiler_version get_adreno_cl_compiler_version(const char *drive
210
218
int major = std::atoi (compiler_ver_str.substr (compiler_major_offset, 2 ).c_str ());
211
219
int minor = std::atoi (compiler_ver_str.substr (compiler_minor_offset, 2 ).c_str ());
212
220
int patch = std::atoi (compiler_ver_str.substr (compiler_patch_offset, 2 ).c_str ());
213
- return { major, minor, patch };
221
+ return { type, major, minor, patch };
214
222
}
215
223
216
224
// backend device context
@@ -643,7 +651,9 @@ static void load_cl_kernels(ggml_backend_opencl_context *backend_ctx, ggml_cl_ve
643
651
// mul_mv_q4_0_f32_1d_8x_flat
644
652
// This kernel does not compiler on Adreno cl compiler 38.01. Skip it for
645
653
// those compiler versions since it is anyway not used for Adreno.
646
- if (backend_ctx->gpu_family != ADRENO || backend_ctx->adreno_cl_compiler_version .newer_than_or_same (38 , 11 , 0 )) {
654
+ if (backend_ctx->gpu_family != ADRENO ||
655
+ backend_ctx->adreno_cl_compiler_version .newer_than_or_same (E031 , 38 , 11 , 0 ) ||
656
+ backend_ctx->adreno_cl_compiler_version .type == DX) {
647
657
#ifdef GGML_OPENCL_EMBED_KERNELS
648
658
const std::string kernel_src {
649
659
#include " mul_mv_q4_0_f32_1d_8x_flat.cl.h"
@@ -661,7 +671,9 @@ static void load_cl_kernels(ggml_backend_opencl_context *backend_ctx, ggml_cl_ve
661
671
// mul_mv_q4_0_f32_1d_16x_flat
662
672
// This kernel does not compiler on Adreno cl compiler 38.01. Skip it for
663
673
// those compiler versions since it is anyway not used for Adreno.
664
- if (backend_ctx->gpu_family != ADRENO || backend_ctx->adreno_cl_compiler_version .newer_than_or_same (38 , 11 , 0 )) {
674
+ if (backend_ctx->gpu_family != ADRENO ||
675
+ backend_ctx->adreno_cl_compiler_version .newer_than_or_same (E031 , 38 , 11 , 0 ) ||
676
+ backend_ctx->adreno_cl_compiler_version .type == DX) {
665
677
#ifdef GGML_OPENCL_EMBED_KERNELS
666
678
const std::string kernel_src {
667
679
#include " mul_mv_q4_0_f32_1d_16x_flat.cl.h"
@@ -1983,7 +1995,8 @@ static enum ggml_status ggml_backend_opencl_buffer_init_tensor(ggml_backend_buff
1983
1995
inline bool use_adreno_kernels (const ggml_backend_opencl_context *backend_ctx, const ggml_tensor *tensor) {
1984
1996
int64_t threshold_ne0 = 512 ;
1985
1997
int64_t threshold_ne1 = 512 ;
1986
- if (!backend_ctx->adreno_cl_compiler_version .newer_than_or_same (38 , 11 , 0 )) {
1998
+ if (!backend_ctx->adreno_cl_compiler_version .newer_than_or_same (E031 , 38 , 11 , 0 ) &&
1999
+ backend_ctx->adreno_cl_compiler_version .type != DX) {
1987
2000
threshold_ne0 = 128 ;
1988
2001
threshold_ne1 = 128 ;
1989
2002
}
0 commit comments