llama : clarify comment about pp and tg graphs [no ci]

danbev · danbev · commit acf2e89dc11c · 2025-07-27T07:10:35.000+02:00
This commit clarifies the comment in `llama-context.cpp` regarding the
prefill prompt (pp), and token generation (tg) graphs.

The motivation for this is that I've struggled to remember these and had
to look them up more than once, so I thought it would be helpful to add
a comment that makes it clear what these stand for.
diff --git a/src/llama-context.cpp b/src/llama-context.cpp
@@ -298,7 +298,7 @@ llama_context::llama_context(
 
         cross.v_embd.clear();
 
-        // reserve pp graph first so that buffers are only allocated once
+        // reserve pp (prefill prompt) graph first so that buffers are only allocated once
         {
             auto * gf = graph_reserve(n_tokens, n_seqs, n_tokens, mctx.get());
             if (!gf) {
@@ -309,7 +309,7 @@ llama_context::llama_context(
             n_nodes_pp  = ggml_graph_n_nodes(gf);
         }
 
-        // reserve with tg graph to get the number of splits and nodes
+        // reserve with tg (token generation) graph to get the number of splits and nodes
         {
             auto * gf = graph_reserve(n_seqs, n_seqs, n_seqs, mctx.get());
             if (!gf) {

Original file line number	Diff line number	Diff line change
`@@ -298,7 +298,7 @@ llama_context::llama_context(`
`298`	`298`
`299`	`299`	`cross.v_embd.clear();`
`300`	`300`
`301`		`- // reserve pp graph first so that buffers are only allocated once`
	`301`	`+ // reserve pp (prefill prompt) graph first so that buffers are only allocated once`
`302`	`302`	`{`
`303`	`303`	`auto * gf = graph_reserve(n_tokens, n_seqs, n_tokens, mctx.get());`
`304`	`304`	`if (!gf) {`
`@@ -309,7 +309,7 @@ llama_context::llama_context(`
`309`	`309`	`n_nodes_pp = ggml_graph_n_nodes(gf);`
`310`	`310`	`}`
`311`	`311`
`312`		`- // reserve with tg graph to get the number of splits and nodes`
	`312`	`+ // reserve with tg (token generation) graph to get the number of splits and nodes`
`313`	`313`	`{`
`314`	`314`	`auto * gf = graph_reserve(n_seqs, n_seqs, n_seqs, mctx.get());`
`315`	`315`	`if (!gf) {`