@@ -34,25 +34,25 @@ struct ggml_kleidiai_context {
3434 ggml_kleidiai_kernels * kernels;
3535} static ctx = { NULL };
3636
37- static void init_kleidiai_context (int n_threads ) {
37+ static void init_kleidiai_context (void ) {
3838 static bool initialized = false ;
3939
4040 if (!initialized) {
41- GGML_ASSERT (n_threads > 0 );
42-
4341 initialized = true ;
42+ const char *env_var = getenv (" GGML_KLEIDIAI_SME" );
43+ int sme_enabled = 0 ;
4444
4545 cpu_feature features = (ggml_cpu_has_dotprod () ? CPU_FEATURE_DOTPROD : CPU_FEATURE_NONE) |
4646 (ggml_cpu_has_matmul_int8 () ? CPU_FEATURE_I8MM : CPU_FEATURE_NONE) |
4747 (ggml_cpu_has_sve () ? CPU_FEATURE_SVE : CPU_FEATURE_NONE);
4848
49- #if defined(__APPLE__)
50- if (n_threads == 1 ) {
49+ if (env_var) {
50+ sme_enabled = atoi (env_var);
51+ }
52+
53+ if (sme_enabled != 0 ) {
5154 features |= ggml_cpu_has_sme () ? CPU_FEATURE_SME : CPU_FEATURE_NONE;
5255 }
53- #else
54- features |= ggml_cpu_has_sme () ? CPU_FEATURE_SME : CPU_FEATURE_NONE;
55- #endif
5656 ctx.kernels = ggml_kleidiai_select_kernels (features);
5757 }
5858}
@@ -162,6 +162,8 @@ class tensor_traits : public ggml::cpu::tensor_traits {
162162 ctx.kernels ->rhs_info .pack_func (1 , n, k, nr, kr, sr, k_q4_0_block_size, (const uint8_t *)data, NULL , tensor->data , 0 , ¶ms);
163163
164164 return 0 ;
165+
166+ GGML_UNUSED (data_size);
165167 }
166168};
167169
@@ -223,7 +225,7 @@ class extra_buffer_type : ggml::cpu::extra_buffer_type {
223225 op->src [0 ]->type == GGML_TYPE_Q4_0 &&
224226 op->src [0 ]->buffer &&
225227 (ggml_n_dims (op->src [0 ]) == 2 ) &&
226- op->src [0 ]->buffer ->buft == ggml_backend_cpu_kleidiai_buffer_type (- 1 ) && ctx.kernels
228+ op->src [0 ]->buffer ->buft == ggml_backend_cpu_kleidiai_buffer_type () && ctx.kernels
227229 ) {
228230 if (op->src [1 ]->buffer && !ggml_backend_buft_is_host (op->src [1 ]->buffer ->buft )) {
229231 return false ;
@@ -237,7 +239,7 @@ class extra_buffer_type : ggml::cpu::extra_buffer_type {
237239
238240 ggml::cpu::tensor_traits * get_tensor_traits (const struct ggml_tensor * op) override {
239241 if (op->op == GGML_OP_MUL_MAT) {
240- if (op->src [0 ]->buffer && op->src [0 ]->buffer ->buft == ggml_backend_cpu_kleidiai_buffer_type (- 1 )) {
242+ if (op->src [0 ]->buffer && op->src [0 ]->buffer ->buft == ggml_backend_cpu_kleidiai_buffer_type ()) {
241243 return (ggml::cpu::tensor_traits *) op->src [0 ]->extra ;
242244 }
243245 }
@@ -246,7 +248,7 @@ class extra_buffer_type : ggml::cpu::extra_buffer_type {
246248};
247249} // namespace ggml::cpu::kleidiai
248250
249- ggml_backend_buffer_type_t ggml_backend_cpu_kleidiai_buffer_type (int n_threads ) {
251+ ggml_backend_buffer_type_t ggml_backend_cpu_kleidiai_buffer_type (void ) {
250252 static ggml::cpu::kleidiai::extra_buffer_type ctx;
251253 static struct ggml_backend_buffer_type ggml_backend_cpu_buffer_type_kleidiai = {
252254 /* .iface = */ {
@@ -261,7 +263,7 @@ ggml_backend_buffer_type_t ggml_backend_cpu_kleidiai_buffer_type(int n_threads)
261263 /* .context = */ &ctx,
262264 };
263265
264- init_kleidiai_context (n_threads );
266+ init_kleidiai_context ();
265267
266268 return &ggml_backend_cpu_buffer_type_kleidiai;
267269}
0 commit comments