Skip to content

Commit 8ef4b95

Browse files
committed
cont : reuse current_chunk from ggml_threadpool
ggml-ci
1 parent 169774a commit 8ef4b95

File tree

3 files changed

+14
-1
lines changed

3 files changed

+14
-1
lines changed

ggml/src/ggml-cpu/ggml-cpu-impl.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -503,6 +503,9 @@ static __m256 __lasx_xvreplfr2vr_s(const float val) {
503503
// TODO: move to ggml-threading
504504
void ggml_barrier(struct ggml_threadpool * tp);
505505

506+
void ggml_threadpool_chunk_set(struct ggml_threadpool * threadpool, int value);
507+
int ggml_threadpool_chunk_add(struct ggml_threadpool * threadpool, int value);
508+
506509
#ifdef __cplusplus
507510
}
508511
#endif

ggml/src/ggml-cpu/ggml-cpu.c

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -559,6 +559,14 @@ void ggml_barrier(struct ggml_threadpool * tp) {
559559
#endif
560560
}
561561

562+
void ggml_threadpool_chunk_set(struct ggml_threadpool * threadpool, int value) {
563+
atomic_store_explicit(&threadpool->current_chunk, value, memory_order_release);
564+
}
565+
566+
int ggml_threadpool_chunk_add(struct ggml_threadpool * threadpool, int value) {
567+
return atomic_fetch_add_explicit(&threadpool->current_chunk, value, memory_order_acq_rel);
568+
}
569+
562570
#if defined(__gnu_linux__)
563571
static cpu_set_t ggml_get_numa_affinity(void) {
564572
cpu_set_t cpuset;

ggml/src/ggml-cpu/llamafile/sgemm.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -406,6 +406,8 @@ class tinyBLAS {
406406

407407
if (params->ith == 0) {
408408
GGML_ASSERT( jj_BN * SIZE_BN + (NB_BN - jj_BN) * (SIZE_BN - 1) == xtiles);
409+
// Every thread starts at ith, so the first unprocessed chunk is nth. This save a bit of coordination right at the start.
410+
ggml_threadpool_chunk_set(params->threadpool, params->nth);
409411
}
410412

411413
ggml_barrier(params->threadpool);
@@ -434,7 +436,7 @@ class tinyBLAS {
434436
GGML_ASSERT(jj == jj2);
435437
}
436438

437-
job += params->nth;
439+
job = ggml_threadpool_chunk_add(params->threadpool, 1);
438440
}
439441

440442
ggml_barrier(params->threadpool);

0 commit comments

Comments
 (0)