@@ -1964,16 +1964,16 @@ struct ggml_compute_threadpool {
1964
1964
atomic_int n_barrier_passed;
1965
1965
atomic_int current_chunk; // currently processing chunk during Mat_Mul, shared between all the threads.
1966
1966
1967
- volatile bool stop; // Used for stopping the threadpool altogether
1968
- volatile bool pause; // Used for pausing the threadpool or individual threads
1967
+ // these are atomic as an annotation for thread-sanitizer
1968
+ atomic_bool stop; // Used for stopping the threadpool altogether
1969
+ atomic_bool pause; // Used for pausing the threadpool or individual threads
1969
1970
1970
1971
struct ggml_compute_state * workers; // per thread state
1971
1972
int32_t n_threads_max; // number of threads in the pool
1972
1973
int32_t n_threads_cur; // number of threads used in the current graph
1973
1974
1974
- int32_t prio; // Scheduling priority
1975
- bool disposable; // Doesn't initialize a conv-var
1976
- uint32_t poll; // Polling level (0 - no polling)
1975
+ int32_t prio; // Scheduling priority
1976
+ uint32_t poll; // Polling level (0 - no polling)
1977
1977
1978
1978
ggml_abort_callback abort_callback; // abort ggml_graph_compute when true
1979
1979
void * abort_callback_data;
@@ -18847,15 +18847,13 @@ void ggml_release_threadpool(struct ggml_compute_threadpool* threadpool) {
18847
18847
struct ggml_compute_state* workers = threadpool->workers;
18848
18848
const int32_t n_threads = threadpool->n_threads_max;
18849
18849
18850
- if (!threadpool->disposable) {
18851
- ggml_mutex_lock(&threadpool->mutex);
18852
- }
18850
+ ggml_mutex_lock(&threadpool->mutex);
18851
+
18853
18852
threadpool->stop = true;
18854
18853
threadpool->pause = false;
18855
- if (!threadpool->disposable) {
18856
- ggml_cond_broadcast(&threadpool->cond);
18857
- ggml_mutex_unlock(&threadpool->mutex);
18858
- }
18854
+
18855
+ ggml_cond_broadcast(&threadpool->cond);
18856
+ ggml_mutex_unlock(&threadpool->mutex);
18859
18857
18860
18858
for (int32_t j = 1; j < n_threads; j++) {
18861
18859
int32_t rc = ggml_thread_join(workers[j].thrd, NULL);
@@ -18865,10 +18863,8 @@ void ggml_release_threadpool(struct ggml_compute_threadpool* threadpool) {
18865
18863
18866
18864
GGML_ALIGNED_FREE(workers);
18867
18865
18868
- if (!threadpool->disposable) {
18869
- ggml_mutex_destroy(&threadpool->mutex);
18870
- ggml_cond_destroy(&threadpool->cond);
18871
- }
18866
+ ggml_mutex_destroy(&threadpool->mutex);
18867
+ ggml_cond_destroy(&threadpool->cond);
18872
18868
#endif // GGML_USE_OPENMP
18873
18869
18874
18870
GGML_ALIGNED_FREE(threadpool);
@@ -18891,7 +18887,6 @@ static void __ggml_resume_threadpool(struct ggml_compute_threadpool * threadpool
18891
18887
18892
18888
void ggml_pause_threadpool(struct ggml_compute_threadpool * threadpool) {
18893
18889
#ifndef GGML_USE_OPENMP
18894
- GGML_ASSERT(!threadpool->disposable);
18895
18890
ggml_mutex_lock(&threadpool->mutex);
18896
18891
if (!threadpool->pause) {
18897
18892
__ggml_pause_threadpool(threadpool);
@@ -18904,7 +18899,6 @@ void ggml_pause_threadpool(struct ggml_compute_threadpool * threadpool) {
18904
18899
18905
18900
void ggml_resume_threadpool(struct ggml_compute_threadpool * threadpool) {
18906
18901
#ifndef GGML_USE_OPENMP
18907
- GGML_ASSERT(!threadpool->disposable);
18908
18902
ggml_mutex_lock(&threadpool->mutex);
18909
18903
if (threadpool->pause) {
18910
18904
__ggml_resume_threadpool(threadpool);
@@ -18921,7 +18915,7 @@ struct ggml_cplan ggml_graph_plan(
18921
18915
struct ggml_compute_threadpool * threadpool) {
18922
18916
18923
18917
if (threadpool == NULL) {
18924
- GGML_PRINT_DEBUG("Threadpool is not specified. Will create a disposable threadpool\n");
18918
+ GGML_PRINT_DEBUG("Threadpool is not specified. Will create a disposable threadpool : n_threads %u \n", n_threads );
18925
18919
}
18926
18920
if (n_threads <= 0) {
18927
18921
n_threads = threadpool ? threadpool->n_threads_max : GGML_DEFAULT_N_THREADS;
@@ -19130,7 +19124,8 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
19130
19124
19131
19125
static inline bool ggml_graph_compute_ready(struct ggml_compute_state * state) {
19132
19126
struct ggml_compute_threadpool * threadpool = state->threadpool;
19133
- if (threadpool->stop || threadpool->pause || state->pending) { return true; }
19127
+
19128
+ if (state->pending || threadpool->stop || threadpool->pause) { return true; }
19134
19129
19135
19130
// check for new graph/work
19136
19131
int new_graph = atomic_load_explicit(&threadpool->n_graph, memory_order_relaxed);
@@ -19179,8 +19174,6 @@ static thread_ret_t ggml_graph_compute_secondary_thread(void* data) {
19179
19174
struct ggml_compute_state * state = (struct ggml_compute_state *) data;
19180
19175
struct ggml_compute_threadpool * threadpool = state->threadpool;
19181
19176
19182
- GGML_ASSERT(!threadpool->disposable);
19183
-
19184
19177
__thread_priority(threadpool->prio);
19185
19178
if (state->mask_specified)
19186
19179
__thread_affinity(state->cpumask);
@@ -19196,6 +19189,7 @@ static thread_ret_t ggml_graph_compute_secondary_thread(void* data) {
19196
19189
GGML_PRINT_DEBUG("thread #%d resuming after wait\n", state->ith);
19197
19190
ggml_mutex_unlock_shared(&threadpool->mutex);
19198
19191
}
19192
+
19199
19193
// This needs to be checked for after the cond_wait
19200
19194
if (threadpool->stop) break;
19201
19195
@@ -19220,6 +19214,25 @@ static thread_ret_t ggml_graph_compute_secondary_thread(void* data) {
19220
19214
return (thread_ret_t) 0;
19221
19215
}
19222
19216
19217
+ // Start processing new graph
19218
+ static void ggml_graph_compute_kickoff(struct ggml_compute_threadpool * threadpool)
19219
+ {
19220
+ // always take the mutex here because the worker threads are doing hybrid poll/wait
19221
+
19222
+ ggml_mutex_lock(&threadpool->mutex);
19223
+
19224
+ atomic_fetch_add_explicit(&threadpool->n_graph, 1, memory_order_relaxed);
19225
+
19226
+ if (threadpool->pause) {
19227
+ // resume does cond broadcast
19228
+ __ggml_resume_threadpool(threadpool);
19229
+ } else {
19230
+ ggml_cond_broadcast(&threadpool->cond);
19231
+ }
19232
+
19233
+ ggml_mutex_unlock(&threadpool->mutex);
19234
+ }
19235
+
19223
19236
#endif // GGML_USE_OPENMP
19224
19237
19225
19238
bool ggml_threadpool_params_match(const struct ggml_threadpool_params * p0, const struct ggml_threadpool_params * p1) {
@@ -19237,7 +19250,6 @@ bool ggml_threadpool_params_match(const struct ggml_threadpool_params * p0, cons
19237
19250
19238
19251
static struct ggml_compute_threadpool * ggml_create_threadpool_impl(
19239
19252
struct ggml_threadpool_params * tpp,
19240
- bool disposable,
19241
19253
struct ggml_cgraph * cgraph,
19242
19254
struct ggml_cplan * cplan) {
19243
19255
@@ -19251,11 +19263,10 @@ static struct ggml_compute_threadpool * ggml_create_threadpool_impl(
19251
19263
threadpool->n_barrier_passed = 0;
19252
19264
threadpool->current_chunk = 0;
19253
19265
threadpool->stop = false;
19254
- threadpool->pause = disposable ? false : tpp->paused;
19266
+ threadpool->pause = tpp->paused;
19255
19267
threadpool->workers = NULL;
19256
19268
threadpool->n_threads_max = tpp->n_threads;
19257
- threadpool->n_threads_cur = disposable ? tpp->n_threads : 0;
19258
- threadpool->disposable = disposable;
19269
+ threadpool->n_threads_cur = tpp->n_threads;
19259
19270
threadpool->poll = tpp->poll;
19260
19271
threadpool->prio = tpp->prio;
19261
19272
@@ -19265,10 +19276,8 @@ static struct ggml_compute_threadpool * ggml_create_threadpool_impl(
19265
19276
}
19266
19277
19267
19278
#ifndef GGML_USE_OPENMP
19268
- if (!disposable) {
19269
- ggml_mutex_init(&threadpool->mutex);
19270
- ggml_cond_init(&threadpool->cond);
19271
- }
19279
+ ggml_mutex_init(&threadpool->mutex);
19280
+ ggml_cond_init(&threadpool->cond);
19272
19281
#endif // GGML_USE_OPENMP
19273
19282
19274
19283
struct ggml_compute_state * workers =
@@ -19303,14 +19312,12 @@ static struct ggml_compute_threadpool * ggml_create_threadpool_impl(
19303
19312
__cpumask_next(tpp->cpumask, workers[j].cpumask, tpp->strict_cpu, &cpumask_iter);
19304
19313
}
19305
19314
19306
- // Disposable threadpools need to have a valid cplan and cgraph immediately.
19307
- thread_ret_t (*thread_entrypoint)(void*) = disposable ? ggml_graph_compute_thread : ggml_graph_compute_secondary_thread;
19308
19315
// Spin threads for all secondary workers
19309
19316
if (j > 0) {
19310
19317
int32_t rc = ggml_thread_create(
19311
19318
&workers[j].thrd,
19312
19319
NULL,
19313
- thread_entrypoint ,
19320
+ ggml_graph_compute_secondary_thread ,
19314
19321
&workers[j]
19315
19322
);
19316
19323
GGML_ASSERT(rc == 0);
@@ -19322,7 +19329,7 @@ static struct ggml_compute_threadpool * ggml_create_threadpool_impl(
19322
19329
}
19323
19330
19324
19331
struct ggml_compute_threadpool * ggml_create_threadpool(struct ggml_threadpool_params * tpp) {
19325
- return ggml_create_threadpool_impl(tpp, false, NULL, NULL);
19332
+ return ggml_create_threadpool_impl(tpp, NULL, NULL);
19326
19333
}
19327
19334
19328
19335
enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cplan * cplan) {
@@ -19336,35 +19343,35 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
19336
19343
bool disposable_threadpool = false;
19337
19344
19338
19345
if (threadpool == NULL) {
19339
- GGML_PRINT_DEBUG("NOTE: No threadpool was specified in this cplan . Will create a disposable threadpool\n");
19346
+ GGML_PRINT_DEBUG("Threadpool is not specified. Will create a disposable threadpool : n_threads %u \n", n_threads );
19340
19347
disposable_threadpool = true;
19341
19348
19342
19349
struct ggml_threadpool_params ttp = {
19343
19350
.mask_specified = false,
19344
19351
.n_threads = n_threads,
19345
19352
.prio = 0,
19346
- .poll = false ,
19353
+ .poll = 1 ,
19347
19354
.strict_cpu = false,
19348
19355
.paused = false
19349
19356
};
19350
19357
19351
- threadpool = ggml_create_threadpool_impl(&ttp, true, cgraph, cplan);
19358
+ threadpool = ggml_create_threadpool_impl(&ttp, cgraph, cplan);
19352
19359
} else {
19353
- if (n_threads > threadpool->n_threads_max) {
19354
- GGML_PRINT("WARNING: cplan is requesting more threads than the threadpool contains. Expect a bad time!\n");
19355
- }
19356
- // Not a disposable threadpool:
19357
- // Reset some of the paramters that need resetting
19360
+ // Reset some of the parameters that need resetting
19358
19361
// No worker threads should be accessing the parameters below at this stage
19359
- threadpool->cgraph = cgraph;
19360
- threadpool->cplan = cplan;
19361
- threadpool->n_threads_cur = n_threads;
19362
+ threadpool->cgraph = cgraph;
19363
+ threadpool->cplan = cplan;
19364
+ threadpool->n_threads_cur = n_threads;
19362
19365
threadpool->n_barrier = 0;
19363
19366
threadpool->n_barrier_passed = 0;
19364
19367
threadpool->current_chunk = 0;
19365
19368
threadpool->ec = GGML_STATUS_SUCCESS;
19366
19369
}
19367
19370
19371
+ if (n_threads > threadpool->n_threads_max) {
19372
+ GGML_PRINT("WARNING: cplan is requesting more threads than the threadpool contains. Expect a bad time!\n");
19373
+ }
19374
+
19368
19375
#ifdef GGML_USE_OPENMP
19369
19376
if (n_threads > 1) {
19370
19377
#pragma omp parallel num_threads(n_threads)
@@ -19390,26 +19397,15 @@ enum ggml_status ggml_graph_compute(struct ggml_cgraph * cgraph, struct ggml_cpl
19390
19397
ggml_graph_compute_thread(&worker);
19391
19398
}
19392
19399
#else
19393
- if (!disposable_threadpool) {
19394
- // Update main thread affinity to match the current threadpool
19395
- if (threadpool->workers[0].mask_specified) {
19396
- __thread_affinity(threadpool->workers[0].cpumask);
19397
- }
19398
-
19399
- // always take the mutex here because the worker threads are doing hybrid poll/wait
19400
-
19401
- ggml_mutex_lock(&threadpool->mutex);
19402
- atomic_fetch_add_explicit(&threadpool->n_graph, 1, memory_order_relaxed);
19403
- if (threadpool->pause) {
19404
- // resume does cond broadcast
19405
- __ggml_resume_threadpool(threadpool);
19406
- } else {
19407
- ggml_cond_broadcast(&threadpool->cond);
19408
- }
19409
- ggml_mutex_unlock(&threadpool->mutex);
19400
+ // Update main thread affinity to match the current threadpool
19401
+ if (threadpool->workers[0].mask_specified) {
19402
+ __thread_affinity(threadpool->workers[0].cpumask);
19410
19403
}
19411
19404
19412
- // this is a work thread too
19405
+ // Kick all threads to start the new graph
19406
+ ggml_graph_compute_kickoff(threadpool);
19407
+
19408
+ // This is a work thread too
19413
19409
ggml_graph_compute_thread(&threadpool->workers[0]);
19414
19410
#endif
19415
19411
0 commit comments