@@ -1987,7 +1987,6 @@ struct ggml_compute_state {
1987
1987
#ifndef GGML_USE_OPENMP
1988
1988
ggml_thread_t thrd;
1989
1989
bool cpumask[GGML_MAX_N_THREADS];
1990
- bool mask_specified;
1991
1990
int last_graph;
1992
1991
bool pending;
1993
1992
#endif
@@ -18828,11 +18827,14 @@ static bool ggml_thread_apply_thread_priority(int32_t prio) {
18828
18827
18829
18828
#endif
18830
18829
18831
- static void ggml_thread_cpumask_next(const bool * global_mask, bool * local_mask, bool strict, int32_t* iter) {
18832
- if (!global_mask) {
18833
- memset(local_mask, 1, GGML_MAX_N_THREADS);
18834
- return;
18830
+ static bool ggml_thread_cpumask_is_valid(const bool * mask) {
18831
+ for (int i = 0; i < GGML_MAX_N_THREADS; i++) {
18832
+ if (mask[i]) { return true; }
18835
18833
}
18834
+ return false;
18835
+ }
18836
+
18837
+ static void ggml_thread_cpumask_next(const bool * global_mask, bool * local_mask, bool strict, int32_t* iter) {
18836
18838
if (!strict) {
18837
18839
memcpy(local_mask, global_mask, GGML_MAX_N_THREADS);
18838
18840
return;
@@ -19189,8 +19191,10 @@ static thread_ret_t ggml_graph_compute_secondary_thread(void* data) {
19189
19191
struct ggml_compute_threadpool * threadpool = state->threadpool;
19190
19192
19191
19193
ggml_thread_apply_thread_priority(threadpool->prio);
19192
- if (state->mask_specified)
19194
+
19195
+ if (ggml_thread_cpumask_is_valid(state->cpumask)) {
19193
19196
ggml_thread_apply_affinity(state->cpumask);
19197
+ }
19194
19198
19195
19199
while (true) {
19196
19200
// Check if we need to sleep
@@ -19249,17 +19253,27 @@ static void ggml_graph_compute_kickoff(struct ggml_compute_threadpool * threadpo
19249
19253
19250
19254
#endif // GGML_USE_OPENMP
19251
19255
19256
+ void ggml_threadpool_params_init(struct ggml_threadpool_params * p, int n_threads) {
19257
+ p->n_threads = n_threads;
19258
+ p->prio = 0; // default priority (usually means normal or inherited)
19259
+ p->poll = 50; // hybrid-polling enabled
19260
+ p->strict_cpu = false; // no strict placement (all threads share same cpumask)
19261
+ p->paused = false; // threads are ready to go
19262
+ memset(p->cpumask, 0, GGML_MAX_N_THREADS); // all-zero means use the default affinity (usually inherited)
19263
+ }
19264
+
19265
+ struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads) {
19266
+ struct ggml_threadpool_params p;
19267
+ ggml_threadpool_params_init(&p, n_threads);
19268
+ return p;
19269
+ }
19270
+
19252
19271
bool ggml_threadpool_params_match(const struct ggml_threadpool_params * p0, const struct ggml_threadpool_params * p1) {
19253
19272
if (p0->n_threads != p1->n_threads ) return false;
19254
19273
if (p0->prio != p1->prio ) return false;
19255
19274
if (p0->poll != p1->poll ) return false;
19256
19275
if (p0->strict_cpu != p1->strict_cpu ) return false;
19257
- if (p0->mask_specified != p1->mask_specified) return false;
19258
- if (p0->mask_specified) {
19259
- return memcmp(p0->cpumask, p1->cpumask, GGML_MAX_N_THREADS) == 0;
19260
- }
19261
-
19262
- return true;
19276
+ return memcmp(p0->cpumask, p1->cpumask, GGML_MAX_N_THREADS) == 0;
19263
19277
}
19264
19278
19265
19279
static struct ggml_compute_threadpool * ggml_create_threadpool_impl(
@@ -19312,16 +19326,13 @@ static struct ggml_compute_threadpool * ggml_create_threadpool_impl(
19312
19326
for (int j = 0; j < tpp->n_threads; j++) {
19313
19327
workers[j] = (struct ggml_compute_state) {
19314
19328
.thrd = 0,
19315
- .mask_specified = tpp->mask_specified,
19316
19329
.threadpool = threadpool,
19317
19330
.ith = j,
19318
19331
.last_graph = 0,
19319
19332
.pending = false
19320
19333
};
19321
19334
19322
- if (tpp->mask_specified) {
19323
- ggml_thread_cpumask_next(tpp->cpumask, workers[j].cpumask, tpp->strict_cpu, &cpumask_iter);
19324
- }
19335
+ ggml_thread_cpumask_next(tpp->cpumask, workers[j].cpumask, tpp->strict_cpu, &cpumask_iter);
19325
19336
19326
19337
// Spin threads for all secondary workers
19327
19338
if (j > 0) {
@@ -19357,15 +19368,7 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
19357
19368
GGML_PRINT_DEBUG("Threadpool is not specified. Will create a disposable threadpool : n_threads %d\n", n_threads);
19358
19369
disposable_threadpool = true;
19359
19370
19360
- struct ggml_threadpool_params ttp = {
19361
- .mask_specified = false,
19362
- .n_threads = n_threads,
19363
- .prio = 0,
19364
- .poll = 1,
19365
- .strict_cpu = false,
19366
- .paused = false
19367
- };
19368
-
19371
+ struct ggml_threadpool_params ttp = ggml_threadpool_params_default(n_threads);
19369
19372
threadpool = ggml_create_threadpool_impl(&ttp, cgraph, cplan);
19370
19373
} else {
19371
19374
// Reset some of the parameters that need resetting
@@ -19407,7 +19410,7 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
19407
19410
}
19408
19411
#else
19409
19412
// Update main thread affinity to match the current threadpool
19410
- if (threadpool->workers[0].mask_specified ) {
19413
+ if (!ggml_thread_cpumask_is_valid( threadpool->workers[0].cpumask) ) {
19411
19414
ggml_thread_apply_affinity(threadpool->workers[0].cpumask);
19412
19415
}
19413
19416
0 commit comments