@@ -256,7 +256,7 @@ void llama_context::init() {
256256 {
257257 llama_ubatch ubatch_pp = { true , n_tokens, n_tokens / n_seqs, n_seqs, &token, nullptr , nullptr , nullptr , nullptr , nullptr };
258258 auto ctx = graph_init ();
259- auto res_pp = graph_build (ctx, ubatch_pp, true );
259+ auto res_pp = graph_build (ctx. get () , ubatch_pp, true );
260260 auto & gf_pp = res_pp.gf ;
261261 if (!ggml_backend_sched_reserve (sched.get (), gf_pp)) {
262262 LLAMA_LOG_ERROR (" %s: failed to allocate compute pp buffers\n " , __func__);
@@ -271,7 +271,7 @@ void llama_context::init() {
271271 {
272272 llama_ubatch ubatch_tg = { true , 1 , 1 , n_seqs, &token, nullptr , nullptr , nullptr , nullptr , nullptr };
273273 auto ctx = graph_init ();
274- auto res_tg = graph_build (ctx, ubatch_tg, true );
274+ auto res_tg = graph_build (ctx. get () , ubatch_tg, true );
275275 auto & gf_tg = res_tg.gf ;
276276 if (!ggml_backend_sched_reserve (sched.get (), gf_tg)) {
277277 LLAMA_LOG_ERROR (" %s: failed to allocate compute tg buffers\n " , __func__);
@@ -285,7 +285,7 @@ void llama_context::init() {
285285 {
286286 llama_ubatch ubatch_pp = { true , n_tokens, n_tokens / n_seqs, n_seqs, &token, nullptr , nullptr , nullptr , nullptr , nullptr };
287287 auto ctx = graph_init ();
288- auto res_pp = graph_build (ctx, ubatch_pp, true );
288+ auto res_pp = graph_build (ctx. get () , ubatch_pp, true );
289289 auto & gf_pp = res_pp.gf ;
290290 if (!ggml_backend_sched_reserve (sched.get (), gf_pp)) {
291291 LLAMA_LOG_ERROR (" %s: failed to allocate compute pp buffers\n " , __func__);
@@ -573,7 +573,7 @@ ggml_context_ptr llama_context::graph_init() {
573573}
574574
575575llama_graph_result llama_context::graph_build (
576- ggml_context_ptr & ctx,
576+ ggml_context * ctx,
577577 const llama_ubatch & ubatch,
578578 bool worst_case) {
579579 return model.build_graph (ctx, *this , cparams, ubatch, worst_case);
@@ -1720,7 +1720,7 @@ int llama_context_kv_self::encode(llama_batch & inp_batch) {
17201720 ggml_backend_sched_set_eval_callback (sched.get (), cparams.cb_eval , cparams.cb_eval_user_data );
17211721
17221722 auto ctx = graph_init ();
1723- auto res = graph_build (ctx, ubatch, false );
1723+ auto res = graph_build (ctx. get () , ubatch, false );
17241724
17251725 auto * gf = res.gf ;
17261726
@@ -2000,7 +2000,7 @@ int llama_context_kv_self::decode(llama_batch & inp_batch) {
20002000 llama_ubatch ubatch = { true , n_tokens, n_tokens / n_seqs, n_seqs, &token, nullptr , nullptr , nullptr , nullptr , nullptr };
20012001
20022002 auto ctx = graph_init ();
2003- auto res = graph_build (ctx, ubatch, true );
2003+ auto res = graph_build (ctx. get () , ubatch, true );
20042004
20052005 // initialize scheduler with the worst-case graph
20062006 ggml_backend_sched_reset (sched.get ());
@@ -2015,7 +2015,7 @@ int llama_context_kv_self::decode(llama_batch & inp_batch) {
20152015 ggml_backend_sched_set_eval_callback (sched.get (), cparams.cb_eval , cparams.cb_eval_user_data );
20162016
20172017 auto ctx = graph_init ();
2018- auto res = graph_build (ctx, ubatch, false );
2018+ auto res = graph_build (ctx. get () , ubatch, false );
20192019
20202020 auto * gf = res.gf ;
20212021
@@ -2483,11 +2483,10 @@ void llama_context_kv_self::kv_self_update() {
24832483 ggml_backend_sched_reset (sched.get ());
24842484
24852485 auto ctx = graph_init ();
2486- auto * ctx0 = ctx.get ();
24872486
2488- ggml_cgraph * gf = ggml_new_graph_custom (ctx0 , model.max_nodes (), false );
2487+ ggml_cgraph * gf = ggml_new_graph_custom (ctx. get () , model.max_nodes (), false );
24892488
2490- build_kv_self_shift (ctx0 , gf);
2489+ build_kv_self_shift (ctx. get () , gf);
24912490
24922491 ggml_backend_sched_alloc_graph (sched.get (), gf);
24932492
@@ -2512,11 +2511,10 @@ void llama_context_kv_self::kv_self_update() {
25122511 ggml_backend_sched_reset (sched.get ());
25132512
25142513 auto ctx = graph_init ();
2515- auto * ctx0 = ctx.get ();
25162514
2517- ggml_cgraph * gf = ggml_new_graph_custom (ctx0 , model.max_nodes (), false );
2515+ ggml_cgraph * gf = ggml_new_graph_custom (ctx. get () , model.max_nodes (), false );
25182516
2519- build_kv_self_defrag (ctx0 , gf);
2517+ build_kv_self_defrag (ctx. get () , gf);
25202518
25212519 ggml_backend_sched_alloc_graph (sched.get (), gf);
25222520
0 commit comments