@@ -251,46 +251,39 @@ bool llama_batch_allocr::init(
251251 // consistency checks
252252 //
253253
254- for (uint32_t s = 0 ; s < n_seq_max; ++s) {
255- if (seq_pos[s].empty ()) {
256- continue ;
257- }
254+ // TODO @ngxson : we currently can't check M-RoPE positions, as the position is increased based on image size
255+ if (n_pos_per_embd == 1 ) {
256+ for (uint32_t s = 0 ; s < n_seq_max; ++s) {
257+ if (seq_pos[s].empty ()) {
258+ continue ;
259+ }
258260
259- const llama_pos p0 = memory ? memory->seq_pos_max (s) : -1 ;
261+ const llama_pos p0 = memory ? memory->seq_pos_max (s) : -1 ;
260262
261- if (p0 >= 0 ) {
262- bool ok = true ;
263+ if (p0 >= 0 ) {
264+ bool ok = true ;
263265
264- if (batch.token ) {
265266 if (seq_pos_min (s) != p0 + 1 ) {
266267 ok = false ;
267268 }
268- } else {
269- assert (batch.embd );
270269
271- // for embeddings (typically used as vision input), we allow them to have repeating positions
272- // ref: https://github.com/ggml-org/llama.cpp/issues/13694#issuecomment-2983871762
273- if (seq_pos_min (s) != p0 && seq_pos_min (s) != p0 + 1 ) {
274- ok = false ;
270+ if (!ok) {
271+ LLAMA_LOG_ERROR (
272+ " %s: the tokens of sequence %d in the input batch have inconsistent sequence positions:\n "
273+ " - the last position stored in the memory module of the context (i.e. the KV cache) for sequence %d is X = %d\n "
274+ " - the tokens for sequence %d in the input batch have a starting position of Y = %d\n "
275+ " it is required that the sequence positions remain consecutive: Y = X + 1\n " ,
276+ __func__, s, s, p0, s, seq_pos_min (s));
277+
278+ return false ;
275279 }
276280 }
277281
278- if (!ok) {
279- LLAMA_LOG_ERROR (
280- " %s: the tokens of sequence %d in the input batch have inconsistent sequence positions:\n "
281- " - the last position stored in the memory module of the context (i.e. the KV cache) for sequence %d is X = %d\n "
282- " - the tokens for sequence %d in the input batch have a starting position of Y = %d\n "
283- " it is required that the sequence positions remain consecutive: Y = X + 1\n " ,
284- __func__, s, s, p0, s, seq_pos_min (s));
285-
282+ if (seq_pos_max (s) - seq_pos_min (s) + 1 > (int ) seq_pos[s].size ()) {
283+ LLAMA_LOG_ERROR (" %s: sequence %d positions are not continuous\n " , __func__, s);
286284 return false ;
287285 }
288286 }
289-
290- if (seq_pos_max (s) - seq_pos_min (s) + 1 > (int ) seq_pos[s].size ()) {
291- LLAMA_LOG_ERROR (" %s: sequence %d positions are not continuous\n " , __func__, s);
292- return false ;
293- }
294287 }
295288
296289 if (memory) {
@@ -660,9 +653,6 @@ llama_ubatch llama_batch_allocr::ubatch_add(const std::vector<int32_t> & idxs, u
660653 const int64_t n_embd_all = batch.embd ? (int64_t ) n_tokens*n_embd : 0 ;
661654 const int64_t n_pos_all = (int64_t ) n_tokens*n_pos_cur;
662655
663- // printf("ubatch_add: n_tokens=%d, n_seqs=%d, n_pos_cur=%d, n_embd_all=%lld, n_pos_all=%lld\n",
664- // n_tokens, n_seqs, n_pos_cur, n_embd_all, n_pos_all);
665-
666656 udata->token .resize (n_tokens);
667657 udata->embd .resize (n_embd_all);
668658 udata->pos .resize (n_pos_all);
0 commit comments