@@ -68,8 +68,6 @@ llama_kv_cache_unified::llama_kv_cache_unified(
6868
6969 cells.resize (kv_size);
7070
71- gf_res.reset (new llm_graph_result (32768 )); // note: the max nodes will be updated later
72-
7371 for (uint32_t il = 0 ; il < n_layer_cache; il++) {
7472 if (filter && !filter (il)) {
7573 LLAMA_LOG_DEBUG (" %s: layer %3d: skipped\n " , __func__, il);
@@ -471,6 +469,10 @@ bool llama_kv_cache_unified::update(llama_context * lctx, bool do_shift, const d
471469
472470 auto * sched = lctx->get_sched ();
473471
472+ if (!gf_res || gf_res->get_max_nodes () != lctx->graph_max_nodes ()) {
473+ gf_res.reset (new llm_graph_result (lctx->graph_max_nodes ()));
474+ }
475+
474476 if (do_shift) {
475477 if (!get_can_shift ()) {
476478 GGML_ABORT (" The current KV cache / model configuration does not support K-shift" );
@@ -484,7 +486,6 @@ bool llama_kv_cache_unified::update(llama_context * lctx, bool do_shift, const d
484486
485487 auto * res = gf_res.get ();
486488
487- res->set_max_nodes (lctx->graph_max_nodes ());
488489 res->reset ();
489490
490491 auto * gf = build_graph_shift (res, lctx);
@@ -531,7 +532,6 @@ bool llama_kv_cache_unified::update(llama_context * lctx, bool do_shift, const d
531532
532533 auto * res = gf_res.get ();
533534
534- res->set_max_nodes (lctx->graph_max_nodes ());
535535 res->reset ();
536536
537537 auto * gf = build_graph_defrag (res, lctx, dinfo);
0 commit comments