add comment about not reserving graphs with zero outputs

danbev · danbev · commit 4bf6181b789e · 2025-09-04T14:23:36.000+02:00
diff --git a/src/llama-context.cpp b/src/llama-context.cpp
@@ -285,6 +285,7 @@ llama_context::llama_context(
         const uint32_t n_seqs = cparams.kv_unified ? 1 : cparams.n_seq_max;
         const uint32_t n_tokens = std::min(cparams.n_ctx, cparams.n_ubatch);
 
+        // avoid reserving graphs with zero outputs
         n_outputs = 1;
 
         LLAMA_LOG_DEBUG("%s: worst-case: n_tokens = %d, n_seqs = %d, n_outputs = %d\n", __func__, n_tokens, n_seqs, n_outputs);