We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent dec6ce2 commit b135927Copy full SHA for b135927
src/llama.cpp
@@ -19694,7 +19694,7 @@ struct llama_context * llama_new_context_with_model(
19694
int n_nodes_tg = ggml_graph_n_nodes(gf_tg);
19695
19696
// reserve again with pp graph to avoid ggml-alloc reallocations during inference
19697
- gf_pp = llama_build_graph(*ctx, ubatch_pp, false);
+ gf_pp = llama_build_graph(*ctx, ubatch_pp, true);
19698
if (!ggml_backend_sched_reserve(ctx->sched, gf_pp)) {
19699
LLAMA_LOG_ERROR("%s: failed to allocate compute buffers\n", __func__);
19700
llama_free(ctx);
0 commit comments