cont : comments [no ci]

ggerganov · ggerganov · commit 32b5eeea73bf · 2025-06-15T09:18:08.000+03:00
diff --git a/include/llama.h b/include/llama.h
@@ -243,14 +243,14 @@ extern "C" {
 
     typedef bool (*llama_progress_callback)(float progress, void * user_data);
 
-    // Input data for llama_decode
+    // Input data for llama_encode/llama_decode
     // A llama_batch object can contain input about one or many sequences
     // The provided arrays (i.e. token, embd, pos, etc.) must have size of n_tokens
     //
     // - token  : the token ids of the input (used when embd is NULL)
     // - embd   : token embeddings (i.e. float vector of size n_embd) (used when token is NULL)
     // - pos    : the positions of the respective token in the sequence
-    //            (if set to NULL, the token position will be tracked automatically by llama_decode)
+    //            (if set to NULL, the token position will be tracked automatically by llama_encode/llama_decode)
     // - seq_id : the sequence to which the respective token belongs
     //            (if set to NULL, the sequence ID will be assumed to be 0)
     // - logits : if zero, the logits (and/or the embeddings) for the respective token will not be output
diff --git a/src/llama-batch.h b/src/llama-batch.h
@@ -84,7 +84,7 @@ class llama_batch_allocr {
     llama_batch_allocr();
 
     // sanitize and auto-gen missing data in the input batch
-    // memory is optional. if provided will be used to check for sequence continuity
+    // memory is optional. if provided will be used to check for sequence continuity and to determine the positions
     bool init(
             const llama_batch & batch_inp,
             const llama_vocab & vocab,
@@ -111,8 +111,8 @@ class llama_batch_allocr {
     std::vector<llama_seq_id *> seq_id;
     std::vector<int8_t>         output;
 
-    std::vector<std::set<llama_pos>> seq_pos; // the positions of each sequence
-    std::vector<std::vector<bool>>   seq_cpl; // if sequences i is coupled to sequence j
+    std::vector<std::set<llama_pos>> seq_pos; // seq_pos[s]: the set of positions in sequence s
+    std::vector<std::vector<bool>>   seq_cpl; // seq_cpl[s0][s1]: if sequence s0 is coupled to sequence s1
 
     int debug;
 };
diff --git a/src/llama-context.cpp b/src/llama-context.cpp
@@ -727,7 +727,6 @@ int llama_context::encode(const llama_batch & batch_inp) {
         return -1;
     }
 
-    // temporary allocate memory for the input batch if needed
     // note: during encode, we always pass the full sequence starting from pos = 0
     if (!batch_allocr->init(batch_inp, model.vocab, nullptr)) {
         LLAMA_LOG_ERROR("%s: failed to initialize batch\n", __func__);
@@ -895,7 +894,6 @@ int llama_context::decode(const llama_batch & batch_inp) {
         return -1;
     }
 
-    // temporary allocate memory for the input batch if needed
     if (!batch_allocr->init(batch_inp, model.vocab, memory.get())) {
         LLAMA_LOG_ERROR("%s: failed to initialize batch\n", __func__);
         return -1;

Original file line number	Diff line number	Diff line change
`@@ -727,7 +727,6 @@ int llama_context::encode(const llama_batch & batch_inp) {`
`727`	`727`	`return -1;`
`728`	`728`	`}`
`729`	`729`
`730`		`- // temporary allocate memory for the input batch if needed`
`731`	`730`	`// note: during encode, we always pass the full sequence starting from pos = 0`
`732`	`731`	`if (!batch_allocr->init(batch_inp, model.vocab, nullptr)) {`
`733`	`732`	`LLAMA_LOG_ERROR("%s: failed to initialize batch\n", __func__);`
`@@ -895,7 +894,6 @@ int llama_context::decode(const llama_batch & batch_inp) {`
`895`	`894`	`return -1;`
`896`	`895`	`}`
`897`	`896`
`898`		`- // temporary allocate memory for the input batch if needed`
`899`	`897`	`if (!batch_allocr->init(batch_inp, model.vocab, memory.get())) {`
`900`	`898`	`LLAMA_LOG_ERROR("%s: failed to initialize batch\n", __func__);`
`901`	`899`	`return -1;`