1- #define  CL_TARGET_OPENCL_VERSION  220 
1+ #define  CL_TARGET_OPENCL_VERSION  GGML_OPENCL_TARGET_VERSION 
22#define  CL_USE_DEPRECATED_OPENCL_1_2_APIS 
33
44//  suppress warnings in CL headers for GCC and Clang
2525#include  < vector> 
2626#include  < string> 
2727#include  < cmath> 
28+ #include  < memory> 
29+ #include  < charconv> 
2830
2931#undef  MIN
3032#undef  MAX
@@ -62,6 +64,97 @@ enum ADRENO_GPU_GEN {
6264    X1E,
6365};
6466
67+ struct  ggml_cl_version  {
68+     cl_uint major = 0 ;
69+     cl_uint minor = 0 ;
70+ };
71+ 
72+ //  Parses a version string of form "XX.YY ". On an error returns ggml_cl_version with all zeroes.
73+ static  ggml_cl_version parse_cl_version (std::string_view str) {
74+     size_t  major_str_begin = 0 ;
75+     size_t  major_str_end   = str.find (" ." 
76+     if  (major_str_end == std::string::npos) {
77+         return  {};
78+     }
79+ 
80+     size_t  minor_str_begin = major_str_end + 1 ;
81+     size_t  minor_str_end   = str.find ("  " 
82+     if  (minor_str_end == std::string::npos) {
83+         return  {};
84+     }
85+ 
86+     cl_uint version_major;
87+     if  (std::from_chars (str.data () + major_str_begin, str.data () + major_str_end, version_major).ec  != std::errc{}) {
88+         return  {};
89+     }
90+ 
91+     cl_uint version_minor;
92+     if  (std::from_chars (str.data () + minor_str_begin, str.data () + minor_str_end, version_minor).ec  != std::errc{}) {
93+         return  {};
94+     }
95+     return  { version_major, version_minor };
96+ }
97+ 
98+ //  Returns OpenCL platform's version. On an error returns ggml_cl_version with all zeroes.
99+ static  ggml_cl_version get_opencl_platform_version (cl_platform_id platform) {
100+     size_t  param_size;
101+     CL_CHECK (clGetPlatformInfo (platform, CL_PLATFORM_VERSION, 0 , nullptr , ¶m_size));
102+     std::unique_ptr<char []> param_storage (new  char [param_size]);
103+     CL_CHECK (clGetPlatformInfo (platform, CL_PLATFORM_VERSION, param_size, param_storage.get (), nullptr ));
104+ 
105+     auto               param_value    = std::string_view (param_storage.get (), param_size);
106+     const  std::string version_prefix = " OpenCL " //  Suffix: "XX.YY <platform-specific-info>"
107+     if  (param_value.find (version_prefix) != 0 ) {
108+         return  {};
109+     }
110+     param_value.remove_prefix (version_prefix.length ());
111+     return  parse_cl_version (param_value);
112+ }
113+ 
114+ //  Return a version to use in OpenCL C compilation. On an error returns ggml_cl_version with all zeroes.
115+ static  ggml_cl_version get_opencl_c_version (ggml_cl_version platform_version, cl_device_id device) {
116+     size_t  param_size;
117+ 
118+ #if  CL_TARGET_OPENCL_VERSION >= 300
119+     if  (platform_version.major  >= 3 ) {
120+         CL_CHECK (clGetDeviceInfo (device, CL_DEVICE_OPENCL_C_ALL_VERSIONS, 0 , nullptr , ¶m_size));
121+         if  (!param_size) {
122+             return  {};
123+         }
124+ 
125+         std::unique_ptr<cl_name_version[]> versions (new  cl_name_version[param_size]);
126+         CL_CHECK (clGetDeviceInfo (device, CL_DEVICE_OPENCL_C_ALL_VERSIONS, param_size, versions.get (), nullptr ));
127+         unsigned  versions_count = param_size / sizeof (cl_name_version);
128+ 
129+         cl_version version_max = 0 ;
130+         for  (unsigned  i = 0 ; i < versions_count; i++) {
131+             version_max = std::max<cl_version>(versions[i].version , version_max);
132+         }
133+ 
134+         return  { CL_VERSION_MAJOR (version_max), CL_VERSION_MINOR (version_max) };
135+     }
136+ #else 
137+     GGML_UNUSED (platform_version);
138+ #endif   //  CL_TARGET_OPENCL_VERSION >= 300
139+ 
140+     CL_CHECK (clGetDeviceInfo (device, CL_DEVICE_OPENCL_C_VERSION, 0 , nullptr , ¶m_size));
141+     if  (!param_size) {
142+         return  {};
143+     }
144+ 
145+     std::unique_ptr<char []> param_storage (new  char [param_size]);
146+     CL_CHECK (clGetDeviceInfo (device, CL_DEVICE_OPENCL_C_VERSION, param_size, param_storage.get (), nullptr ));
147+     auto  param_value = std::string_view (param_storage.get (), param_size);
148+ 
149+     const  std::string version_prefix = " OpenCL C " //  Suffix: "XX.YY <platform-specific-info>"
150+     if  (param_value.find (version_prefix) != 0 ) {
151+         return  {};
152+     }
153+     param_value.remove_prefix (version_prefix.length ());
154+ 
155+     return  parse_cl_version (param_value);
156+ }
157+ 
65158static  ADRENO_GPU_GEN get_adreno_gpu_gen (const  char  *device_name) {
66159    if  (strstr (device_name, " 730" 
67160        strstr (device_name, " 740" 
@@ -470,16 +563,11 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
470563    //  A local ref of cl_device_id for convenience
471564    cl_device_id device = backend_ctx->device ;
472565
566+     ggml_cl_version platform_version = get_opencl_platform_version (default_device->platform ->id );
567+ 
473568    //  Check device OpenCL version, OpenCL 2.0 or above is required
474-     size_t  device_ver_str_size;
475-     clGetDeviceInfo (device, CL_DEVICE_VERSION, 0 , NULL , &device_ver_str_size);
476-     char  *device_ver_buffer = (char  *)alloca (device_ver_str_size + 1 );
477-     clGetDeviceInfo (device, CL_DEVICE_VERSION, device_ver_str_size, device_ver_buffer, NULL );
478-     device_ver_buffer[device_ver_str_size] = ' \0 ' 
479-     GGML_LOG_INFO (" ggml_opencl: device OpenCL version: %s\n " 
480- 
481-     if  (strstr (device_ver_buffer, " OpenCL 2" NULL  &&
482-         strstr (device_ver_buffer, " OpenCL 3" NULL ) {
569+     ggml_cl_version opencl_c_version = get_opencl_c_version (platform_version, device);
570+     if  (opencl_c_version.major  < 2 ) {
483571        GGML_LOG_ERROR (" ggml_opencl: OpenCL 2.0 or above is required\n " 
484572        return  backend_ctx;
485573    }
@@ -516,8 +604,7 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
516604
517605    //  If OpenCL 3.0 is supported, then check for cl_khr_subgroups, which becomes
518606    //  optional in OpenCL 3.0 (cl_khr_subgroup is mandatory in OpenCL 2.x)
519-     if  (strstr (device_ver_buffer, " OpenCL 3" 
520-         strstr (ext_buffer, " cl_khr_subgroups" NULL  &&
607+     if  (opencl_c_version.major  == 3  && strstr (ext_buffer, " cl_khr_subgroups" NULL  &&
521608        strstr (ext_buffer, " cl_intel_subgroups" NULL ) {
522609        GGML_LOG_ERROR (" ggml_opencl: device does not support subgroups (cl_khr_subgroups or cl_intel_subgroups) " 
523610            " (note that subgroups is an optional feature in OpenCL 3.0)\n " 
@@ -581,9 +668,12 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
581668    const  std::string kernel_src = read_file (" ggml-opencl.cl" 
582669#endif 
583670
584-     std::string compile_opts =
585-         " -cl-std=CL2.0 -cl-mad-enable -cl-unsafe-math-optimizations " 
586-         " -cl-finite-math-only -cl-fast-relaxed-math " 
671+     auto  opencl_c_std =
672+         std::string (" CL" std::to_string (opencl_c_version.major ) + " ." std::to_string (opencl_c_version.minor );
673+ 
674+     std::string compile_opts = std::string (" -cl-std=" 
675+                                "  -cl-mad-enable -cl-unsafe-math-optimizations" 
676+                                "  -cl-finite-math-only -cl-fast-relaxed-math" 
587677    backend_ctx->program  = build_program_from_source (context, device, kernel_src.c_str (), compile_opts);
588678
589679    //  Non matmul kernels.
@@ -693,10 +783,10 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
693783    CL_CHECK ((backend_ctx->kernel_transpose_16  = clCreateKernel (backend_ctx->program_transpose_16 , " kernel_transpose_16" 
694784
695785    //  Gemv general
696-     std::string CL_gemv_compile_opts =
697-         "  -cl-std=CL2.0  " 
698-         "  -cl-mad-enable  " 
699-         "  -DSIMDGROUP_WIDTH= "  +  std::to_string (backend_ctx->adreno_wave_size );
786+     std::string CL_gemv_compile_opts =  std::string ( " -cl-std= " ) + opencl_c_std + 
787+                                         "  -cl-mad-enable  " 
788+                                         "  -DSIMDGROUP_WIDTH= "  + 
789+                                         std::to_string (backend_ctx->adreno_wave_size );
700790    if  (has_vector_subgroup_broadcast) {
701791        CL_gemv_compile_opts += "  -DVECTOR_SUB_GROUP_BROADCAT " 
702792    }
@@ -713,12 +803,12 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
713803    CL_CHECK ((backend_ctx->CL_mul_mat_vec_q4_0_f32_1d_4x_flat_general  = clCreateKernel (backend_ctx->program_CL_gemv_general , " kernel_gemv_noshuffle" 
714804
715805    //  Gemv 2048, 16384
716-     CL_gemv_compile_opts =
717-         "  -cl-std=CL2.0  " 
718-         "  -cl-mad-enable  " 
719-         "  -DLINE_STRIDE_A=2048  " 
720-         "  -DBLOCK_STRIDE_A=16384  " 
721-         "  -DSIMDGROUP_WIDTH= "  +  std::to_string (backend_ctx->adreno_wave_size );
806+     CL_gemv_compile_opts =  std::string ( " -cl-std= " ) + opencl_c_std + 
807+                             "  -cl-mad-enable  " 
808+                             "  -DLINE_STRIDE_A=2048  " 
809+                             "  -DBLOCK_STRIDE_A=16384  " 
810+                             "  -DSIMDGROUP_WIDTH= "  + 
811+                             std::to_string (backend_ctx->adreno_wave_size );
722812    if  (has_vector_subgroup_broadcast) {
723813        CL_gemv_compile_opts += "  -DVECTOR_SUB_GROUP_BROADCAT " 
724814    }
@@ -735,12 +825,12 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
735825    CL_CHECK ((backend_ctx->CL_mul_mat_vec_q4_0_f32_1d_4x_flat_4096_1_4096  = clCreateKernel (backend_ctx->program_CL_gemv_4096_1_4096 , " kernel_gemv_noshuffle" 
736826
737827    //  Gemv 2048, 16384
738-     CL_gemv_compile_opts =
739-         "  -cl-std=CL2.0  " 
740-         "  -cl-mad-enable  " 
741-         "  -DLINE_STRIDE_A=2048  " 
742-         "  -DBLOCK_STRIDE_A=16384  " 
743-         "  -DSIMDGROUP_WIDTH= "  +  std::to_string (backend_ctx->adreno_wave_size );
828+     CL_gemv_compile_opts =  std::string ( " -cl-std= " ) + opencl_c_std + 
829+                             "  -cl-mad-enable  " 
830+                             "  -DLINE_STRIDE_A=2048  " 
831+                             "  -DBLOCK_STRIDE_A=16384  " 
832+                             "  -DSIMDGROUP_WIDTH= "  + 
833+                             std::to_string (backend_ctx->adreno_wave_size );
744834    if  (has_vector_subgroup_broadcast) {
745835        CL_gemv_compile_opts += "  -DVECTOR_SUB_GROUP_BROADCAT " 
746836    }
@@ -750,12 +840,12 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
750840    CL_CHECK ((backend_ctx->CL_mul_mat_vec_q4_0_f32_1d_4x_flat_4096_1_11008  = clCreateKernel (backend_ctx->program_CL_gemv_4096_1_11008 , " kernel_gemv_noshuffle" 
751841
752842    //  Gemv 5504, 44032
753-     CL_gemv_compile_opts =
754-         "  -cl-std=CL2.0  " 
755-         "  -cl-mad-enable  " 
756-         "  -DLINE_STRIDE_A=5504  " 
757-         "  -DBLOCK_STRIDE_A=44032  " 
758-         "  -DSIMDGROUP_WIDTH= "  +  std::to_string (backend_ctx->adreno_wave_size );
843+     CL_gemv_compile_opts =  std::string ( " -cl-std= " ) + opencl_c_std + 
844+                             "  -cl-mad-enable  " 
845+                             "  -DLINE_STRIDE_A=5504  " 
846+                             "  -DBLOCK_STRIDE_A=44032  " 
847+                             "  -DSIMDGROUP_WIDTH= "  + 
848+                             std::to_string (backend_ctx->adreno_wave_size );
759849    if  (has_vector_subgroup_broadcast) {
760850        CL_gemv_compile_opts += "  -DVECTOR_SUB_GROUP_BROADCAT " 
761851    }
@@ -765,12 +855,12 @@ static ggml_backend_opencl_context * ggml_cl2_init(ggml_backend_dev_t dev) {
765855    CL_CHECK ((backend_ctx->CL_mul_mat_vec_q4_0_f32_1d_4x_flat_11008_1_4096  = clCreateKernel (backend_ctx->program_CL_gemv_11008_1_4096 , " kernel_gemv_noshuffle" 
766856
767857    //  Gemv 16000, 128000
768-     CL_gemv_compile_opts =
769-         "  -cl-std=CL2.0  " 
770-         "  -cl-mad-enable  " 
771-         "  -DLINE_STRIDE_A=16000  " 
772-         "  -DBLOCK_STRIDE_A=128000  " 
773-         "  -DSIMDGROUP_WIDTH= "  +  std::to_string (backend_ctx->adreno_wave_size );
858+     CL_gemv_compile_opts =  std::string ( " -cl-std= " ) + opencl_c_std + 
859+                             "  -cl-mad-enable  " 
860+                             "  -DLINE_STRIDE_A=16000  " 
861+                             "  -DBLOCK_STRIDE_A=128000  " 
862+                             "  -DSIMDGROUP_WIDTH= "  + 
863+                             std::to_string (backend_ctx->adreno_wave_size );
774864    if  (has_vector_subgroup_broadcast) {
775865        CL_gemv_compile_opts += "  -DVECTOR_SUB_GROUP_BROADCAT " 
776866    }
0 commit comments