@@ -19191,7 +19191,6 @@ static thread_ret_t ggml_graph_compute_secondary_thread(void* data) {
19191
19191
struct ggml_compute_threadpool * threadpool = state->threadpool;
19192
19192
19193
19193
ggml_thread_apply_thread_priority(threadpool->prio);
19194
-
19195
19194
if (ggml_thread_cpumask_is_valid(state->cpumask)) {
19196
19195
ggml_thread_apply_affinity(state->cpumask);
19197
19196
}
@@ -19296,51 +19295,35 @@ static struct ggml_compute_threadpool * ggml_create_threadpool_impl(
19296
19295
threadpool->ec = GGML_STATUS_SUCCESS;
19297
19296
}
19298
19297
19299
- #ifndef GGML_USE_OPENMP
19300
- ggml_mutex_init(&threadpool->mutex);
19301
- ggml_cond_init(&threadpool->cond);
19302
- #endif // GGML_USE_OPENMP
19298
+ // Allocate and init workers state
19299
+ const size_t workers_size = sizeof(struct ggml_compute_state) * tpp->n_threads;
19300
+ struct ggml_compute_state * workers = GGML_ALIGNED_MALLOC(workers_size);
19303
19301
19304
- struct ggml_compute_state * workers =
19305
- GGML_ALIGNED_MALLOC(sizeof(struct ggml_compute_state) * tpp->n_threads);
19302
+ memset(workers, 0, workers_size);
19303
+ for (int j = 0; j < tpp->n_threads; j++) {
19304
+ workers[j].threadpool = threadpool;
19305
+ workers[j].ith = j;
19306
+ }
19306
19307
19307
19308
threadpool->workers = workers;
19308
19309
19309
- #ifdef GGML_USE_OPENMP
19310
- for (int j = 0; j < tpp->n_threads; j++) {
19311
- workers[j] = (struct ggml_compute_state) {
19312
- .threadpool = threadpool,
19313
- .ith = j
19314
- };
19315
- }
19316
- #else // Not using OPENMP
19317
- int32_t cpumask_iter = 0;
19310
+ #ifndef GGML_USE_OPENMP
19311
+ ggml_mutex_init(&threadpool->mutex);
19312
+ ggml_cond_init(&threadpool->cond);
19318
19313
19319
- ggml_thread_apply_process_priority(tpp->prio);
19320
- ggml_thread_apply_thread_priority(tpp->prio);
19314
+ // Spin the threads for all workers, and update CPU placements.
19315
+ // Place the main thread last (towards the higher numbered CPU cores).
19321
19316
19322
- for (int j = 0; j < tpp->n_threads; j++) {
19323
- workers[j] = (struct ggml_compute_state) {
19324
- .thrd = 0,
19325
- .threadpool = threadpool,
19326
- .ith = j,
19327
- .last_graph = 0,
19328
- .pending = false
19329
- };
19317
+ int32_t cpumask_iter = 0;
19330
19318
19319
+ for (int j = 1; j < tpp->n_threads; j++) {
19331
19320
ggml_thread_cpumask_next(tpp->cpumask, workers[j].cpumask, tpp->strict_cpu, &cpumask_iter);
19332
19321
19333
- // Spin threads for all secondary workers
19334
- if (j > 0) {
19335
- int32_t rc = ggml_thread_create(
19336
- &workers[j].thrd,
19337
- NULL,
19338
- ggml_graph_compute_secondary_thread,
19339
- &workers[j]
19340
- );
19341
- GGML_ASSERT(rc == 0);
19342
- }
19322
+ int32_t rc = ggml_thread_create(&workers[j].thrd, NULL, ggml_graph_compute_secondary_thread, &workers[j]);
19323
+ GGML_ASSERT(rc == 0);
19343
19324
}
19325
+
19326
+ ggml_thread_cpumask_next(tpp->cpumask, workers[0].cpumask, tpp->strict_cpu, &cpumask_iter);
19344
19327
#endif // GGML_USE_OPENMP
19345
19328
19346
19329
return threadpool;
@@ -19391,22 +19374,16 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
19391
19374
threadpool->n_threads_cur = n_threads;
19392
19375
}
19393
19376
19394
- struct ggml_compute_state worker = {
19395
- .ith = omp_get_thread_num(),
19396
- .threadpool = threadpool,
19397
- };
19398
- ggml_graph_compute_thread(&worker);
19377
+ ggml_graph_compute_thread(&threadpool->workers[omp_get_thread_num()]);
19399
19378
}
19400
19379
} else {
19401
- struct ggml_compute_state worker = {
19402
- .ith = 0,
19403
- .threadpool = threadpool,
19404
- };
19405
- ggml_graph_compute_thread(&worker);
19380
+ ggml_graph_compute_thread(&threadpool->workers[0]);
19406
19381
}
19407
19382
#else
19408
- // Update main thread affinity to match the current threadpool
19409
- if (!ggml_thread_cpumask_is_valid(threadpool->workers[0].cpumask)) {
19383
+ // Update main thread prio and affinity to match the current threadpool
19384
+ ggml_thread_apply_process_priority(threadpool->prio);
19385
+ ggml_thread_apply_thread_priority(threadpool->prio);
19386
+ if (ggml_thread_cpumask_is_valid(threadpool->workers[0].cpumask)) {
19410
19387
ggml_thread_apply_affinity(threadpool->workers[0].cpumask);
19411
19388
}
19412
19389
0 commit comments