cont : fix position auto-gen + add comments

ggerganov · ggerganov · commit 91b7792023c5 · 2025-06-14T11:21:16.000+03:00
ggml-ci
diff --git a/src/llama-batch.cpp b/src/llama-batch.cpp
@@ -366,7 +366,12 @@ bool llama_batch_allocr::init(
 
         for (int32_t i = 0; i < batch.n_tokens; i++) {
             const llama_seq_id seq_id = batch.seq_id[i][0];
-            pos[i] = p0[seq_id] + i;
+
+            pos[i] = p0[seq_id];
+
+            for (int32_t s = 0; s < batch.n_seq_id[i]; ++s) {
+                p0[batch.seq_id[i][s]] = pos[i] + 1;
+            }
         }
 
         batch.pos = pos.data();
@@ -397,7 +402,11 @@ bool llama_batch_allocr::init(
                 const llama_seq_id s0 = batch.seq_id[i][0];
                 const llama_seq_id s1 = batch.seq_id[i][s];
 
+                // mark that sequences s1 is couled to s0
                 seq_cpl[s1][s0] = true;
+
+                // note: the other way around is not necessary for now
+                //seq_cpl[s0][s1] = true;
             }
         }
     }
@@ -467,6 +476,10 @@ bool llama_batch_allocr::init(
         }
     }
 
+    //
+    // consistency checks
+    //
+
     for (int32_t s = 0; s < LLAMA_MAX_PARALLEL_SEQUENCES; ++s) {
         if (seq_pos[s].empty()) {
             continue;
@@ -478,7 +491,7 @@ bool llama_batch_allocr::init(
         }
 
         if (seq_pos_max(s) - seq_pos_min(s) + 1 > (int) seq_pos[s].size()) {
-            LLAMA_LOG_ERROR("%s: sequence %d is not contiguous\n", __func__, s);
+            LLAMA_LOG_ERROR("%s: sequence %d positions are not continuous\n", __func__, s);
             return false;
         }
     }
diff --git a/src/llama-batch.h b/src/llama-batch.h
@@ -78,12 +78,13 @@ struct llama_sbatch {
     llama_sbatch(const llama_batch & batch, size_t n_embd, bool simple_split = false);
 };
 
-// temporary allocate memory for the input batch if needed
+// a helper for sanitizing and fullfilling a batch
 class llama_batch_allocr {
 public:
     llama_batch_allocr();
 
-    // optionally fulfill the batch returned by llama_batch_get_one
+    // sanitize and auto-gen missing data in the input batch
+    // memory is optional. if provided will be used to check for sequence continuity
     bool init(
             const llama_batch & batch_inp,
             const llama_vocab & vocab,

Original file line number	Diff line number	Diff line change
`@@ -366,7 +366,12 @@ bool llama_batch_allocr::init(`
`366`	`366`
`367`	`367`	`for (int32_t i = 0; i < batch.n_tokens; i++) {`
`368`	`368`	`const llama_seq_id seq_id = batch.seq_id[i][0];`
`369`		`- pos[i] = p0[seq_id] + i;`
	`369`	`+`
	`370`	`+ pos[i] = p0[seq_id];`
	`371`	`+`
	`372`	`+ for (int32_t s = 0; s < batch.n_seq_id[i]; ++s) {`
	`373`	`+ p0[batch.seq_id[i][s]] = pos[i] + 1;`
	`374`	`+ }`
`370`	`375`	`}`
`371`	`376`
`372`	`377`	`batch.pos = pos.data();`
`@@ -397,7 +402,11 @@ bool llama_batch_allocr::init(`
`397`	`402`	`const llama_seq_id s0 = batch.seq_id[i][0];`
`398`	`403`	`const llama_seq_id s1 = batch.seq_id[i][s];`
`399`	`404`
	`405`	`+ // mark that sequences s1 is couled to s0`
`400`	`406`	`seq_cpl[s1][s0] = true;`
	`407`	`+`
	`408`	`+ // note: the other way around is not necessary for now`
	`409`	`+ //seq_cpl[s0][s1] = true;`
`401`	`410`	`}`
`402`	`411`	`}`
`403`	`412`	`}`
`@@ -467,6 +476,10 @@ bool llama_batch_allocr::init(`
`467`	`476`	`}`
`468`	`477`	`}`
`469`	`478`
	`479`	`+ //`
	`480`	`+ // consistency checks`
	`481`	`+ //`
	`482`	`+`
`470`	`483`	`for (int32_t s = 0; s < LLAMA_MAX_PARALLEL_SEQUENCES; ++s) {`
`471`	`484`	`if (seq_pos[s].empty()) {`
`472`	`485`	`continue;`
`@@ -478,7 +491,7 @@ bool llama_batch_allocr::init(`
`478`	`491`	`}`
`479`	`492`
`480`	`493`	`if (seq_pos_max(s) - seq_pos_min(s) + 1 > (int) seq_pos[s].size()) {`
`481`		`- LLAMA_LOG_ERROR("%s: sequence %d is not contiguous\n", __func__, s);`
	`494`	`+ LLAMA_LOG_ERROR("%s: sequence %d positions are not continuous\n", __func__, s);`
`482`	`495`	`return false;`
`483`	`496`	`}`
`484`	`497`	`}`