Skip to content

Commit 49ac51f

Browse files
max-krasnyanskyfmz
authored andcommitted
threadpool: simplify threadpool init logic and fix main thread affinity application
Most of the init code is now exactly the same between threadpool and openmp.
1 parent 8008463 commit 49ac51f

File tree

1 file changed

+25
-48
lines changed

1 file changed

+25
-48
lines changed

ggml/src/ggml.c

Lines changed: 25 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -19191,7 +19191,6 @@ static thread_ret_t ggml_graph_compute_secondary_thread(void* data) {
1919119191
struct ggml_compute_threadpool * threadpool = state->threadpool;
1919219192

1919319193
ggml_thread_apply_thread_priority(threadpool->prio);
19194-
1919519194
if (ggml_thread_cpumask_is_valid(state->cpumask)) {
1919619195
ggml_thread_apply_affinity(state->cpumask);
1919719196
}
@@ -19296,51 +19295,35 @@ static struct ggml_compute_threadpool * ggml_create_threadpool_impl(
1929619295
threadpool->ec = GGML_STATUS_SUCCESS;
1929719296
}
1929819297

19299-
#ifndef GGML_USE_OPENMP
19300-
ggml_mutex_init(&threadpool->mutex);
19301-
ggml_cond_init(&threadpool->cond);
19302-
#endif // GGML_USE_OPENMP
19298+
// Allocate and init workers state
19299+
const size_t workers_size = sizeof(struct ggml_compute_state) * tpp->n_threads;
19300+
struct ggml_compute_state * workers = GGML_ALIGNED_MALLOC(workers_size);
1930319301

19304-
struct ggml_compute_state * workers =
19305-
GGML_ALIGNED_MALLOC(sizeof(struct ggml_compute_state) * tpp->n_threads);
19302+
memset(workers, 0, workers_size);
19303+
for (int j = 0; j < tpp->n_threads; j++) {
19304+
workers[j].threadpool = threadpool;
19305+
workers[j].ith = j;
19306+
}
1930619307

1930719308
threadpool->workers = workers;
1930819309

19309-
#ifdef GGML_USE_OPENMP
19310-
for (int j = 0; j < tpp->n_threads; j++) {
19311-
workers[j] = (struct ggml_compute_state) {
19312-
.threadpool = threadpool,
19313-
.ith = j
19314-
};
19315-
}
19316-
#else // Not using OPENMP
19317-
int32_t cpumask_iter = 0;
19310+
#ifndef GGML_USE_OPENMP
19311+
ggml_mutex_init(&threadpool->mutex);
19312+
ggml_cond_init(&threadpool->cond);
1931819313

19319-
ggml_thread_apply_process_priority(tpp->prio);
19320-
ggml_thread_apply_thread_priority(tpp->prio);
19314+
// Spin the threads for all workers, and update CPU placements.
19315+
// Place the main thread last (towards the higher numbered CPU cores).
1932119316

19322-
for (int j = 0; j < tpp->n_threads; j++) {
19323-
workers[j] = (struct ggml_compute_state) {
19324-
.thrd = 0,
19325-
.threadpool = threadpool,
19326-
.ith = j,
19327-
.last_graph = 0,
19328-
.pending = false
19329-
};
19317+
int32_t cpumask_iter = 0;
1933019318

19319+
for (int j = 1; j < tpp->n_threads; j++) {
1933119320
ggml_thread_cpumask_next(tpp->cpumask, workers[j].cpumask, tpp->strict_cpu, &cpumask_iter);
1933219321

19333-
// Spin threads for all secondary workers
19334-
if (j > 0) {
19335-
int32_t rc = ggml_thread_create(
19336-
&workers[j].thrd,
19337-
NULL,
19338-
ggml_graph_compute_secondary_thread,
19339-
&workers[j]
19340-
);
19341-
GGML_ASSERT(rc == 0);
19342-
}
19322+
int32_t rc = ggml_thread_create(&workers[j].thrd, NULL, ggml_graph_compute_secondary_thread, &workers[j]);
19323+
GGML_ASSERT(rc == 0);
1934319324
}
19325+
19326+
ggml_thread_cpumask_next(tpp->cpumask, workers[0].cpumask, tpp->strict_cpu, &cpumask_iter);
1934419327
#endif // GGML_USE_OPENMP
1934519328

1934619329
return threadpool;
@@ -19391,22 +19374,16 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
1939119374
threadpool->n_threads_cur = n_threads;
1939219375
}
1939319376

19394-
struct ggml_compute_state worker = {
19395-
.ith = omp_get_thread_num(),
19396-
.threadpool = threadpool,
19397-
};
19398-
ggml_graph_compute_thread(&worker);
19377+
ggml_graph_compute_thread(&threadpool->workers[omp_get_thread_num()]);
1939919378
}
1940019379
} else {
19401-
struct ggml_compute_state worker = {
19402-
.ith = 0,
19403-
.threadpool = threadpool,
19404-
};
19405-
ggml_graph_compute_thread(&worker);
19380+
ggml_graph_compute_thread(&threadpool->workers[0]);
1940619381
}
1940719382
#else
19408-
// Update main thread affinity to match the current threadpool
19409-
if (!ggml_thread_cpumask_is_valid(threadpool->workers[0].cpumask)) {
19383+
// Update main thread prio and affinity to match the current threadpool
19384+
ggml_thread_apply_process_priority(threadpool->prio);
19385+
ggml_thread_apply_thread_priority(threadpool->prio);
19386+
if (ggml_thread_cpumask_is_valid(threadpool->workers[0].cpumask)) {
1941019387
ggml_thread_apply_affinity(threadpool->workers[0].cpumask);
1941119388
}
1941219389

0 commit comments

Comments
 (0)