Skip to content

Commit 50d3500

Browse files
threadpool: remove the need for explicit cpumask_specified param
all-zero cpumask means use default (usually inherited) cpu affinity mask.
1 parent 2bf6c55 commit 50d3500

File tree

4 files changed

+41
-43
lines changed

4 files changed

+41
-43
lines changed

common/common.cpp

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -295,13 +295,7 @@ void postprocess_cpu_params(cpu_params& cpuparams, const cpu_params* role_model)
295295
}
296296
}
297297

298-
if (n_set == 0) {
299-
// You hit the jackpot!
300-
memset(&cpuparams.cpumask[0], 1, GGML_MAX_N_THREADS);
301-
n_set = GGML_MAX_N_THREADS;
302-
}
303-
304-
if (n_set < cpuparams.n_threads) {
298+
if (n_set && n_set < cpuparams.n_threads) {
305299
// Not enough set bits, may experience performance issues.
306300
fprintf(stderr, "warn: Not enough set bits in CPU mask (%d) to satisfy requested thread count: %d\n", n_set, cpuparams.n_threads);
307301
}
@@ -2593,16 +2587,15 @@ struct llama_context_params llama_context_params_from_gpt_params(const gpt_param
25932587
struct ggml_threadpool_params ggml_threadpool_params_from_cpu_params(const cpu_params & params) {
25942588
struct ggml_threadpool_params tpp;
25952589

2596-
tpp.mask_specified = params.mask_valid;
2590+
ggml_threadpool_params_init(&tpp, params.n_threads); // setup the defaults
2591+
25972592
if (params.mask_valid) {
25982593
std::memcpy(&tpp.cpumask, &params.cpumask, GGML_MAX_N_THREADS);
25992594
}
26002595

2601-
tpp.n_threads = params.n_threads;
26022596
tpp.prio = params.priority;
26032597
tpp.poll = params.poll;
26042598
tpp.strict_cpu = params.strict_cpu;
2605-
tpp.paused = false;
26062599

26072600
return tpp;
26082601
}

examples/llama-bench/llama-bench.cpp

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1462,14 +1462,13 @@ int main(int argc, char ** argv) {
14621462

14631463
llama_kv_cache_clear(ctx);
14641464

1465-
struct ggml_threadpool_params tpp;
1466-
tpp.n_threads = t.n_threads;
1467-
tpp.mask_specified = params.cpuparams.mask_valid;
1465+
struct ggml_threadpool_params tpp = ggml_threadpool_params_default(t.n_threads);
14681466
tpp.strict_cpu = params.cpuparams.strict_cpu;
14691467
tpp.prio = params.cpuparams.priority;
14701468
tpp.poll = params.cpuparams.poll;
1471-
1472-
std::memcpy(&tpp.cpumask[0], &params.cpuparams.cpumask[0], GGML_MAX_N_THREADS);
1469+
if (params.cpuparams.mask_valid) {
1470+
std::memcpy(&tpp.cpumask[0], &params.cpuparams.cpumask[0], GGML_MAX_N_THREADS);
1471+
}
14731472

14741473
struct ggml_compute_threadpool* threadpool = ggml_create_threadpool(&tpp);
14751474
if (!threadpool) {

ggml/include/ggml.h

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -626,9 +626,10 @@ extern "C" {
626626
// If it returns true, the computation is aborted
627627
typedef bool (*ggml_abort_callback)(void * data);
628628

629+
// Threadpool params
630+
// Use ggml_threadpool_params_default() or ggml_threadpool_params_init() to populate the defaults
629631
struct ggml_threadpool_params {
630-
bool cpumask[GGML_MAX_N_THREADS]; // mask of cpu cores
631-
bool mask_specified; // mask is non-empty
632+
bool cpumask[GGML_MAX_N_THREADS]; // mask of cpu cores (all-zeros means use default affinity settings)
632633
int n_threads; // number of threads
633634
int32_t prio; // thread priority
634635
uint32_t poll; // polling level (0 - no polling, 100 - aggressive polling)
@@ -2024,6 +2025,8 @@ extern "C" {
20242025
GGML_API size_t ggml_graph_overhead(void);
20252026
GGML_API size_t ggml_graph_overhead_custom(size_t size, bool grads);
20262027

2028+
GGML_API struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads);
2029+
GGML_API void ggml_threadpool_params_init(struct ggml_threadpool_params *p, int n_threads);
20272030
GGML_API bool ggml_threadpool_params_match (const struct ggml_threadpool_params *p0, const struct ggml_threadpool_params *p1);
20282031
GGML_API struct ggml_compute_threadpool* ggml_create_threadpool (struct ggml_threadpool_params * params);
20292032
GGML_API void ggml_release_threadpool (struct ggml_compute_threadpool * threadpool);

ggml/src/ggml.c

Lines changed: 29 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1987,7 +1987,6 @@ struct ggml_compute_state {
19871987
#ifndef GGML_USE_OPENMP
19881988
ggml_thread_t thrd;
19891989
bool cpumask[GGML_MAX_N_THREADS];
1990-
bool mask_specified;
19911990
int last_graph;
19921991
bool pending;
19931992
#endif
@@ -18815,11 +18814,14 @@ static bool ggml_thread_apply_thread_priority(int32_t prio) {
1881518814

1881618815
#endif
1881718816

18818-
static void ggml_thread_cpumask_next(const bool * global_mask, bool * local_mask, bool strict, int32_t* iter) {
18819-
if (!global_mask) {
18820-
memset(local_mask, 1, GGML_MAX_N_THREADS);
18821-
return;
18817+
static bool ggml_thread_cpumask_is_valid(const bool * mask) {
18818+
for (int i = 0; i < GGML_MAX_N_THREADS; i++) {
18819+
if (mask[i]) { return true; }
1882218820
}
18821+
return false;
18822+
}
18823+
18824+
static void ggml_thread_cpumask_next(const bool * global_mask, bool * local_mask, bool strict, int32_t* iter) {
1882318825
if (!strict) {
1882418826
memcpy(local_mask, global_mask, GGML_MAX_N_THREADS);
1882518827
return;
@@ -19176,8 +19178,10 @@ static thread_ret_t ggml_graph_compute_secondary_thread(void* data) {
1917619178
struct ggml_compute_threadpool * threadpool = state->threadpool;
1917719179

1917819180
ggml_thread_apply_thread_priority(threadpool->prio);
19179-
if (state->mask_specified)
19181+
19182+
if (ggml_thread_cpumask_is_valid(state->cpumask)) {
1918019183
ggml_thread_apply_affinity(state->cpumask);
19184+
}
1918119185

1918219186
while (true) {
1918319187
// Check if we need to sleep
@@ -19236,17 +19240,27 @@ static void ggml_graph_compute_kickoff(struct ggml_compute_threadpool * threadpo
1923619240

1923719241
#endif // GGML_USE_OPENMP
1923819242

19243+
void ggml_threadpool_params_init(struct ggml_threadpool_params * p, int n_threads) {
19244+
p->n_threads = n_threads;
19245+
p->prio = 0; // default priority (usually means normal or inherited)
19246+
p->poll = 50; // hybrid-polling enabled
19247+
p->strict_cpu = false; // no strict placement (all threads share same cpumask)
19248+
p->paused = false; // threads are ready to go
19249+
memset(p->cpumask, 0, GGML_MAX_N_THREADS); // all-zero means use the default affinity (usually inherited)
19250+
}
19251+
19252+
struct ggml_threadpool_params ggml_threadpool_params_default(int n_threads) {
19253+
struct ggml_threadpool_params p;
19254+
ggml_threadpool_params_init(&p, n_threads);
19255+
return p;
19256+
}
19257+
1923919258
bool ggml_threadpool_params_match(const struct ggml_threadpool_params * p0, const struct ggml_threadpool_params * p1) {
1924019259
if (p0->n_threads != p1->n_threads ) return false;
1924119260
if (p0->prio != p1->prio ) return false;
1924219261
if (p0->poll != p1->poll ) return false;
1924319262
if (p0->strict_cpu != p1->strict_cpu ) return false;
19244-
if (p0->mask_specified != p1->mask_specified) return false;
19245-
if (p0->mask_specified) {
19246-
return memcmp(p0->cpumask, p1->cpumask, GGML_MAX_N_THREADS) == 0;
19247-
}
19248-
19249-
return true;
19263+
return memcmp(p0->cpumask, p1->cpumask, GGML_MAX_N_THREADS) == 0;
1925019264
}
1925119265

1925219266
static struct ggml_compute_threadpool * ggml_create_threadpool_impl(
@@ -19299,16 +19313,13 @@ static struct ggml_compute_threadpool * ggml_create_threadpool_impl(
1929919313
for (int j = 0; j < tpp->n_threads; j++) {
1930019314
workers[j] = (struct ggml_compute_state) {
1930119315
.thrd = 0,
19302-
.mask_specified = tpp->mask_specified,
1930319316
.threadpool = threadpool,
1930419317
.ith = j,
1930519318
.last_graph = 0,
1930619319
.pending = false
1930719320
};
1930819321

19309-
if (tpp->mask_specified) {
19310-
ggml_thread_cpumask_next(tpp->cpumask, workers[j].cpumask, tpp->strict_cpu, &cpumask_iter);
19311-
}
19322+
ggml_thread_cpumask_next(tpp->cpumask, workers[j].cpumask, tpp->strict_cpu, &cpumask_iter);
1931219323

1931319324
// Spin threads for all secondary workers
1931419325
if (j > 0) {
@@ -19344,15 +19355,7 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
1934419355
GGML_PRINT_DEBUG("Threadpool is not specified. Will create a disposable threadpool : n_threads %d\n", n_threads);
1934519356
disposable_threadpool = true;
1934619357

19347-
struct ggml_threadpool_params ttp = {
19348-
.mask_specified = false,
19349-
.n_threads = n_threads,
19350-
.prio = 0,
19351-
.poll = 1,
19352-
.strict_cpu = false,
19353-
.paused = false
19354-
};
19355-
19358+
struct ggml_threadpool_params ttp = ggml_threadpool_params_default(n_threads);
1935619359
threadpool = ggml_create_threadpool_impl(&ttp, cgraph, cplan);
1935719360
} else {
1935819361
// Reset some of the parameters that need resetting
@@ -19394,7 +19397,7 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
1939419397
}
1939519398
#else
1939619399
// Update main thread affinity to match the current threadpool
19397-
if (threadpool->workers[0].mask_specified) {
19400+
if (!ggml_thread_cpumask_is_valid(threadpool->workers[0].cpumask)) {
1939819401
ggml_thread_apply_affinity(threadpool->workers[0].cpumask);
1939919402
}
1940019403

0 commit comments

Comments
 (0)