Skip to content

Commit 95a1ab2

Browse files
wishstudiopwilkin
authored andcommitted
ggml-cpu: Respect cpumask settings (ggml-org#16164)
1 parent 5af320a commit 95a1ab2

File tree

1 file changed

+17
-3
lines changed

1 file changed

+17
-3
lines changed

ggml/src/ggml-cpu/ggml-cpu.c

Lines changed: 17 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -473,10 +473,10 @@ struct ggml_threadpool {
473473
struct ggml_compute_state {
474474
#ifndef GGML_USE_OPENMP
475475
ggml_thread_t thrd;
476-
bool cpumask[GGML_MAX_N_THREADS];
477476
int last_graph;
478477
bool pending;
479478
#endif
479+
bool cpumask[GGML_MAX_N_THREADS];
480480
struct ggml_threadpool * threadpool;
481481
int ith;
482482
};
@@ -3103,7 +3103,14 @@ static struct ggml_threadpool * ggml_threadpool_new_impl(
31033103

31043104
threadpool->workers = workers;
31053105

3106-
#ifndef GGML_USE_OPENMP
3106+
#ifdef GGML_USE_OPENMP
3107+
int32_t cpumask_iter = 0;
3108+
3109+
// Compute CPU masks for each thread
3110+
for (int j = 0; j < tpp->n_threads; j++) {
3111+
ggml_thread_cpumask_next(tpp->cpumask, workers[j].cpumask, tpp->strict_cpu, &cpumask_iter);
3112+
}
3113+
#else // GGML_USE_OPENMP
31073114
ggml_mutex_init(&threadpool->mutex);
31083115
ggml_cond_init(&threadpool->cond);
31093116

@@ -3176,7 +3183,14 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
31763183
atomic_store_explicit(&threadpool->n_threads_cur, n_threads, memory_order_relaxed);
31773184
}
31783185

3179-
ggml_graph_compute_thread(&threadpool->workers[omp_get_thread_num()]);
3186+
// Apply thread CPU mask and priority
3187+
int ith = omp_get_thread_num();
3188+
3189+
ggml_thread_apply_priority(threadpool->prio);
3190+
if (ggml_thread_cpumask_is_valid(threadpool->workers[ith].cpumask)) {
3191+
ggml_thread_apply_affinity(threadpool->workers[ith].cpumask);
3192+
}
3193+
ggml_graph_compute_thread(&threadpool->workers[ith]);
31803194
}
31813195
} else {
31823196
atomic_store_explicit(&threadpool->n_threads_cur, 1, memory_order_relaxed);

0 commit comments

Comments
 (0)