Skip to content

Commit b7b6caf

Browse files
committed
graph : avoid set_max_nodes in llm_graph_result
ggml-ci
1 parent 3d28b3b commit b7b6caf

File tree

3 files changed

+7
-7
lines changed

3 files changed

+7
-7
lines changed

include/llama.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1389,7 +1389,7 @@ extern "C" {
13891389

13901390
int32_t n_p_eval;
13911391
int32_t n_eval;
1392-
int32_t n_reused;
1392+
int32_t n_reused; // number of times a ggml compute graph had been reused
13931393
};
13941394

13951395
struct llama_perf_sampler_data {

src/llama-graph.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -478,8 +478,8 @@ class llm_graph_result : public llm_graph_result_i {
478478
ggml_cgraph * get_gf() override { return gf; }
479479
ggml_context * get_ctx() override { return ctx_compute.get(); }
480480

481-
void set_max_nodes(int64_t max_nodes) {
482-
this->max_nodes = max_nodes;
481+
int64_t get_max_nodes() const {
482+
return max_nodes;
483483
}
484484

485485
void reset() override {

src/llama-kv-cache-unified.cpp

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -68,8 +68,6 @@ llama_kv_cache_unified::llama_kv_cache_unified(
6868

6969
cells.resize(kv_size);
7070

71-
gf_res.reset(new llm_graph_result(32768)); // note: the max nodes will be updated later
72-
7371
for (uint32_t il = 0; il < n_layer_cache; il++) {
7472
if (filter && !filter(il)) {
7573
LLAMA_LOG_DEBUG("%s: layer %3d: skipped\n", __func__, il);
@@ -471,6 +469,10 @@ bool llama_kv_cache_unified::update(llama_context * lctx, bool do_shift, const d
471469

472470
auto * sched = lctx->get_sched();
473471

472+
if (!gf_res || gf_res->get_max_nodes() != lctx->graph_max_nodes()) {
473+
gf_res.reset(new llm_graph_result(lctx->graph_max_nodes()));
474+
}
475+
474476
if (do_shift) {
475477
if (!get_can_shift()) {
476478
GGML_ABORT("The current KV cache / model configuration does not support K-shift");
@@ -484,7 +486,6 @@ bool llama_kv_cache_unified::update(llama_context * lctx, bool do_shift, const d
484486

485487
auto * res = gf_res.get();
486488

487-
res->set_max_nodes(lctx->graph_max_nodes());
488489
res->reset();
489490

490491
auto * gf = build_graph_shift(res, lctx);
@@ -531,7 +532,6 @@ bool llama_kv_cache_unified::update(llama_context * lctx, bool do_shift, const d
531532

532533
auto * res = gf_res.get();
533534

534-
res->set_max_nodes(lctx->graph_max_nodes());
535535
res->reset();
536536

537537
auto * gf = build_graph_defrag(res, lctx, dinfo);

0 commit comments

Comments
 (0)