@@ -473,10 +473,10 @@ struct ggml_threadpool {
473
473
struct ggml_compute_state {
474
474
#ifndef GGML_USE_OPENMP
475
475
ggml_thread_t thrd ;
476
- bool cpumask [GGML_MAX_N_THREADS ];
477
476
int last_graph ;
478
477
bool pending ;
479
478
#endif
479
+ bool cpumask [GGML_MAX_N_THREADS ];
480
480
struct ggml_threadpool * threadpool ;
481
481
int ith ;
482
482
};
@@ -3081,7 +3081,14 @@ static struct ggml_threadpool * ggml_threadpool_new_impl(
3081
3081
3082
3082
threadpool -> workers = workers ;
3083
3083
3084
- #ifndef GGML_USE_OPENMP
3084
+ #ifdef GGML_USE_OPENMP
3085
+ int32_t cpumask_iter = 0 ;
3086
+
3087
+ // Compute CPU masks for each thread
3088
+ for (int j = 0 ; j < tpp -> n_threads ; j ++ ) {
3089
+ ggml_thread_cpumask_next (tpp -> cpumask , workers [j ].cpumask , tpp -> strict_cpu , & cpumask_iter );
3090
+ }
3091
+ #else // GGML_USE_OPENMP
3085
3092
ggml_mutex_init (& threadpool -> mutex );
3086
3093
ggml_cond_init (& threadpool -> cond );
3087
3094
@@ -3154,7 +3161,14 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
3154
3161
atomic_store_explicit (& threadpool -> n_threads_cur , n_threads , memory_order_relaxed );
3155
3162
}
3156
3163
3157
- ggml_graph_compute_thread (& threadpool -> workers [omp_get_thread_num ()]);
3164
+ // Apply thread CPU mask and priority
3165
+ int ith = omp_get_thread_num ();
3166
+
3167
+ ggml_thread_apply_priority (threadpool -> prio );
3168
+ if (ggml_thread_cpumask_is_valid (threadpool -> workers [ith ].cpumask )) {
3169
+ ggml_thread_apply_affinity (threadpool -> workers [ith ].cpumask );
3170
+ }
3171
+ ggml_graph_compute_thread (& threadpool -> workers [ith ]);
3158
3172
}
3159
3173
} else {
3160
3174
atomic_store_explicit (& threadpool -> n_threads_cur , 1 , memory_order_relaxed );
0 commit comments