Skip to content

Commit ab80f55

Browse files
committed
fix the cgraph null issue when running with llama-bench
1 parent e5cb47d commit ab80f55

File tree

2 files changed

+8
-8
lines changed

2 files changed

+8
-8
lines changed

ggml/src/ggml-cpu/ggml-cpu-impl.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -506,7 +506,11 @@ static __m256 __lasx_xvreplfr2vr_s(const float val) {
506506

507507
// TODO: move to ggml-threading
508508
void ggml_barrier(struct ggml_threadpool * tp);
509-
#define GGML_BARRIER_NODE_LAST -1
509+
enum ggml_barrier_node_index {
510+
GGML_BARRIER_NODE_PING = 0,
511+
GGML_BARRIER_NODE_PONG = 1,
512+
GGML_BARRIER_NODE_LAST = 2,
513+
};
510514
void ggml_barrier_numa_aware(struct ggml_threadpool * tp, int ith, int node_n);
511515

512516
#ifdef __cplusplus

ggml/src/ggml-cpu/ggml-cpu.c

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -598,10 +598,6 @@ void ggml_barrier_numa_aware(struct ggml_threadpool * tp, int ith, int node_n) {
598598
return;
599599
}
600600

601-
if (node_n == GGML_BARRIER_NODE_LAST) {
602-
node_n = tp->cgraph->n_nodes;
603-
}
604-
605601
int node = get_node_from_cpu(ith, cores_per_numa);
606602

607603
int n_passed = atomic_load_explicit(tp->n_barrier_passed_node[node], memory_order_relaxed);
@@ -3002,7 +2998,7 @@ static thread_ret_t ggml_graph_compute_thread(void * data) {
30022998
}
30032999
#endif
30043000

3005-
ggml_barrier_numa_aware(state->threadpool, state->ith, node_n);
3001+
ggml_barrier_numa_aware(state->threadpool, state->ith, node_n % GGML_BARRIER_NODE_LAST);
30063002

30073003
#ifdef GGML_USE_NUMA_MIGRATE_DEBUG
30083004
if (log_time) {
@@ -3191,8 +3187,8 @@ static struct ggml_threadpool * ggml_threadpool_new_impl(
31913187
*threadpool->n_barrier_passed_node[node] = 0;
31923188
}
31933189

3194-
threadpool->n_barrier_passed_last = (atomic_int *)malloc((threadpool->cgraph->n_nodes + 1) * sizeof(atomic_int));
3195-
for (int i = 0; i < threadpool->cgraph->n_nodes + 1; i++) {
3190+
threadpool->n_barrier_passed_last = (atomic_int *)malloc(GGML_BARRIER_NODE_LAST * sizeof(atomic_int));
3191+
for (int i = 0; i < GGML_BARRIER_NODE_LAST; i++) {
31963192
threadpool->n_barrier_passed_last[i] = 0;
31973193
}
31983194
#endif

0 commit comments

Comments
 (0)