Skip to content

Commit fbd3b4b

Browse files
committed
opencl: consider Adreno CL compiler on Windows
1 parent a4a0c24 commit fbd3b4b

File tree

1 file changed

+23
-10
lines changed

1 file changed

+23
-10
lines changed

ggml/src/ggml-opencl/ggml-opencl.cpp

Lines changed: 23 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -64,24 +64,30 @@ enum ADRENO_GPU_GEN {
6464
X1E,
6565
};
6666

67+
enum ADRENO_CL_COMPILER_TYPE {
68+
E031,
69+
DX,
70+
};
71+
6772
struct ggml_cl_version {
6873
cl_uint major = 0;
6974
cl_uint minor = 0;
7075
};
7176

7277
struct ggml_cl_compiler_version {
78+
ADRENO_CL_COMPILER_TYPE type;
7379
int major = -1;
7480
int minor = -1;
7581
int patch = -1;
7682

77-
bool same(int x, int y, int z) const {
78-
return major == x && minor == y && patch == z;
83+
bool same(ADRENO_CL_COMPILER_TYPE t, int x, int y, int z) const {
84+
return major == x && minor == y && patch == z && type == t;
7985
}
80-
bool newer_than(int x, int y, int z) const {
81-
return major*10000 + minor*100 + patch > x*10000 + y*100 + z;
86+
bool newer_than(ADRENO_CL_COMPILER_TYPE t, int x, int y, int z) const {
87+
return major*10000 + minor*100 + patch > x*10000 + y*100 + z && type == t;
8288
}
83-
bool newer_than_or_same(int x, int y, int z) const {
84-
return same(x, y, z) || newer_than(x, y, z);
89+
bool newer_than_or_same(ADRENO_CL_COMPILER_TYPE t, int x, int y, int z) const {
90+
return same(t, x, y, z) || newer_than(t, x, y, z);
8591
}
8692
};
8793

@@ -191,6 +197,7 @@ static ADRENO_GPU_GEN get_adreno_gpu_gen(const char *device_name) {
191197

192198
static ggml_cl_compiler_version get_adreno_cl_compiler_version(const char *driver_version) {
193199
std::string driver_ver_str(driver_version);
200+
ADRENO_CL_COMPILER_TYPE type = ADRENO_CL_COMPILER_TYPE::E031;
194201
size_t compiler_ver_pos = driver_ver_str.find("E031");
195202
size_t compiler_ver_len = 13;
196203
size_t compiler_major_offset = 5;
@@ -202,6 +209,7 @@ static ggml_cl_compiler_version get_adreno_cl_compiler_version(const char *drive
202209
if (compiler_ver_pos == std::string::npos) {
203210
return {};
204211
}
212+
type = ADRENO_CL_COMPILER_TYPE::DX;
205213
compiler_ver_len = 11;
206214
compiler_major_offset = 3;
207215
}
@@ -210,7 +218,7 @@ static ggml_cl_compiler_version get_adreno_cl_compiler_version(const char *drive
210218
int major = std::atoi(compiler_ver_str.substr(compiler_major_offset, 2).c_str());
211219
int minor = std::atoi(compiler_ver_str.substr(compiler_minor_offset, 2).c_str());
212220
int patch = std::atoi(compiler_ver_str.substr(compiler_patch_offset, 2).c_str());
213-
return { major, minor, patch };
221+
return { type, major, minor, patch };
214222
}
215223

216224
// backend device context
@@ -643,7 +651,9 @@ static void load_cl_kernels(ggml_backend_opencl_context *backend_ctx, ggml_cl_ve
643651
// mul_mv_q4_0_f32_1d_8x_flat
644652
// This kernel does not compiler on Adreno cl compiler 38.01. Skip it for
645653
// those compiler versions since it is anyway not used for Adreno.
646-
if (backend_ctx->gpu_family != ADRENO || backend_ctx->adreno_cl_compiler_version.newer_than_or_same(38, 11, 0)) {
654+
if (backend_ctx->gpu_family != ADRENO ||
655+
backend_ctx->adreno_cl_compiler_version.newer_than_or_same(E031, 38, 11, 0) ||
656+
backend_ctx->adreno_cl_compiler_version.type == DX) {
647657
#ifdef GGML_OPENCL_EMBED_KERNELS
648658
const std::string kernel_src {
649659
#include "mul_mv_q4_0_f32_1d_8x_flat.cl.h"
@@ -661,7 +671,9 @@ static void load_cl_kernels(ggml_backend_opencl_context *backend_ctx, ggml_cl_ve
661671
// mul_mv_q4_0_f32_1d_16x_flat
662672
// This kernel does not compiler on Adreno cl compiler 38.01. Skip it for
663673
// those compiler versions since it is anyway not used for Adreno.
664-
if (backend_ctx->gpu_family != ADRENO || backend_ctx->adreno_cl_compiler_version.newer_than_or_same(38, 11, 0)) {
674+
if (backend_ctx->gpu_family != ADRENO ||
675+
backend_ctx->adreno_cl_compiler_version.newer_than_or_same(E031, 38, 11, 0) ||
676+
backend_ctx->adreno_cl_compiler_version.type == DX) {
665677
#ifdef GGML_OPENCL_EMBED_KERNELS
666678
const std::string kernel_src {
667679
#include "mul_mv_q4_0_f32_1d_16x_flat.cl.h"
@@ -1983,7 +1995,8 @@ static enum ggml_status ggml_backend_opencl_buffer_init_tensor(ggml_backend_buff
19831995
inline bool use_adreno_kernels(const ggml_backend_opencl_context *backend_ctx, const ggml_tensor *tensor) {
19841996
int64_t threshold_ne0 = 512;
19851997
int64_t threshold_ne1 = 512;
1986-
if (!backend_ctx->adreno_cl_compiler_version.newer_than_or_same(38, 11, 0)) {
1998+
if (!backend_ctx->adreno_cl_compiler_version.newer_than_or_same(E031, 38, 11, 0) &&
1999+
backend_ctx->adreno_cl_compiler_version.type != DX) {
19872000
threshold_ne0 = 128;
19882001
threshold_ne1 = 128;
19892002
}

0 commit comments

Comments
 (0)