@@ -1987,7 +1987,6 @@ struct ggml_compute_state {
1987
1987
#ifndef GGML_USE_OPENMP
1988
1988
ggml_thread_t thrd;
1989
1989
bool cpumask[GGML_MAX_N_THREADS];
1990
- bool mask_specified;
1991
1990
int last_graph;
1992
1991
bool pending;
1993
1992
#endif
@@ -18815,11 +18814,14 @@ static bool ggml_thread_apply_thread_priority(int32_t prio) {
18815
18814
18816
18815
#endif
18817
18816
18818
- static void ggml_thread_cpumask_next(const bool * global_mask, bool * local_mask, bool strict, int32_t* iter) {
18819
- if (!global_mask) {
18820
- memset(local_mask, 1, GGML_MAX_N_THREADS);
18821
- return;
18817
+ static bool ggml_thread_cpumask_is_valid(const bool * mask) {
18818
+ for (int i = 0; i < GGML_MAX_N_THREADS; i++) {
18819
+ if (mask[i]) { return true; }
18822
18820
}
18821
+ return false;
18822
+ }
18823
+
18824
+ static void ggml_thread_cpumask_next(const bool * global_mask, bool * local_mask, bool strict, int32_t* iter) {
18823
18825
if (!strict) {
18824
18826
memcpy(local_mask, global_mask, GGML_MAX_N_THREADS);
18825
18827
return;
@@ -19176,8 +19178,10 @@ static thread_ret_t ggml_graph_compute_secondary_thread(void* data) {
19176
19178
struct ggml_compute_threadpool * threadpool = state->threadpool;
19177
19179
19178
19180
ggml_thread_apply_thread_priority(threadpool->prio);
19179
- if (state->mask_specified)
19181
+
19182
+ if (ggml_thread_cpumask_is_valid(state->cpumask)) {
19180
19183
ggml_thread_apply_affinity(state->cpumask);
19184
+ }
19181
19185
19182
19186
while (true) {
19183
19187
// Check if we need to sleep
@@ -19236,17 +19240,27 @@ static void ggml_graph_compute_kickoff(struct ggml_compute_threadpool * threadpo
19236
19240
19237
19241
#endif // GGML_USE_OPENMP
19238
19242
19243
+ void ggml_threadpool_params_init(struct ggml_threadpool_params * p, int n_threads) {
19244
+ p->n_threads = n_threads;
19245
+ p->prio = 0; // default priority (usually means normal or inherited)
19246
+ p->poll = 50; // hybrid-polling enabled
19247
+ p->strict_cpu = false; // no strict placement (all threads share same cpumask)
19248
+ p->paused = false; // threads are ready to go
19249
+ memset(p->cpumask, 0, GGML_MAX_N_THREADS); // all-zero means use the default affinity (usually inherited)
19250
+ }
19251
+
19252
+ struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads) {
19253
+ struct ggml_threadpool_params p;
19254
+ ggml_threadpool_params_init(&p, n_threads);
19255
+ return p;
19256
+ }
19257
+
19239
19258
bool ggml_threadpool_params_match(const struct ggml_threadpool_params * p0, const struct ggml_threadpool_params * p1) {
19240
19259
if (p0->n_threads != p1->n_threads ) return false;
19241
19260
if (p0->prio != p1->prio ) return false;
19242
19261
if (p0->poll != p1->poll ) return false;
19243
19262
if (p0->strict_cpu != p1->strict_cpu ) return false;
19244
- if (p0->mask_specified != p1->mask_specified) return false;
19245
- if (p0->mask_specified) {
19246
- return memcmp(p0->cpumask, p1->cpumask, GGML_MAX_N_THREADS) == 0;
19247
- }
19248
-
19249
- return true;
19263
+ return memcmp(p0->cpumask, p1->cpumask, GGML_MAX_N_THREADS) == 0;
19250
19264
}
19251
19265
19252
19266
static struct ggml_compute_threadpool * ggml_create_threadpool_impl(
@@ -19299,16 +19313,13 @@ static struct ggml_compute_threadpool * ggml_create_threadpool_impl(
19299
19313
for (int j = 0; j < tpp->n_threads; j++) {
19300
19314
workers[j] = (struct ggml_compute_state) {
19301
19315
.thrd = 0,
19302
- .mask_specified = tpp->mask_specified,
19303
19316
.threadpool = threadpool,
19304
19317
.ith = j,
19305
19318
.last_graph = 0,
19306
19319
.pending = false
19307
19320
};
19308
19321
19309
- if (tpp->mask_specified) {
19310
- ggml_thread_cpumask_next(tpp->cpumask, workers[j].cpumask, tpp->strict_cpu, &cpumask_iter);
19311
- }
19322
+ ggml_thread_cpumask_next(tpp->cpumask, workers[j].cpumask, tpp->strict_cpu, &cpumask_iter);
19312
19323
19313
19324
// Spin threads for all secondary workers
19314
19325
if (j > 0) {
@@ -19344,15 +19355,7 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
19344
19355
GGML_PRINT_DEBUG("Threadpool is not specified. Will create a disposable threadpool : n_threads %d\n", n_threads);
19345
19356
disposable_threadpool = true;
19346
19357
19347
- struct ggml_threadpool_params ttp = {
19348
- .mask_specified = false,
19349
- .n_threads = n_threads,
19350
- .prio = 0,
19351
- .poll = 1,
19352
- .strict_cpu = false,
19353
- .paused = false
19354
- };
19355
-
19358
+ struct ggml_threadpool_params ttp = ggml_threadpool_params_default(n_threads);
19356
19359
threadpool = ggml_create_threadpool_impl(&ttp, cgraph, cplan);
19357
19360
} else {
19358
19361
// Reset some of the parameters that need resetting
@@ -19394,7 +19397,7 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
19394
19397
}
19395
19398
#else
19396
19399
// Update main thread affinity to match the current threadpool
19397
- if (threadpool->workers[0].mask_specified ) {
19400
+ if (!ggml_thread_cpumask_is_valid( threadpool->workers[0].cpumask) ) {
19398
19401
ggml_thread_apply_affinity(threadpool->workers[0].cpumask);
19399
19402
}
19400
19403
0 commit comments