Skip to content

Commit 4adbc75

Browse files
threadpool: use relaxed order for chunk sync
Full memory barrier is an overkill for this since each thread works on different chunk
1 parent 32704bb commit 4adbc75

File tree

1 file changed

+6
-2
lines changed

1 file changed

+6
-2
lines changed

ggml/src/ggml.c

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,10 @@ typedef enum {
8888
static void atomic_store(atomic_int * ptr, LONG val) {
8989
InterlockedExchange(ptr, val);
9090
}
91+
static void atomic_store_explicit(atomic_int * ptr, LONG val, memory_order mo) {
92+
// TODO: add support for explicit memory order
93+
InterlockedExchange(ptr, val);
94+
}
9195
static LONG atomic_load(atomic_int * ptr) {
9296
return InterlockedCompareExchange(ptr, 0, 0);
9397
}
@@ -12471,7 +12475,7 @@ UseGgmlGemm1:;
1247112475

1247212476
if (ith == 0) {
1247312477
// Every thread starts at ith, so the first unprocessed chunk is nth. This save a bit of coordination right at the start.
12474-
atomic_store(&params->threadpool->current_chunk, nth);
12478+
atomic_store_explicit(&params->threadpool->current_chunk, nth, memory_order_relaxed);
1247512479
}
1247612480

1247712481
ggml_barrier(params->threadpool);
@@ -12582,7 +12586,7 @@ UseGgmlGemm2:;
1258212586
break;
1258312587
}
1258412588

12585-
current_chunk = atomic_fetch_add(&params->threadpool->current_chunk, 1);
12589+
current_chunk = atomic_fetch_add_explicit(&params->threadpool->current_chunk, 1, memory_order_relaxed);
1258612590
}
1258712591
}
1258812592

0 commit comments

Comments
 (0)