From 8dd61afcdf6a9e499f072db0d03521e77cca7d31 Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Fri, 13 Jun 2025 17:01:14 +0300
Subject: [PATCH 1/2] batch : add LLAMA_BATCH_DEBUG environment variable

ggml-ci
---
 src/llama-batch.cpp | 33 ++++++++++++++++++++++++++++++++-
 src/llama-batch.h   |  2 ++
 2 files changed, 34 insertions(+), 1 deletion(-)
diff --git a/src/llama-batch.cpp b/src/llama-batch.cpp
index 9066d5a9b274d..20ad4af1b1249 100644
--- a/src/llama-batch.cpp
+++ b/src/llama-batch.cpp
@@ -7,6 +7,7 @@
 #include <cassert>
 #include <cstring>
 #include <algorithm>
+#include <sstream>
 
 llama_ubatch llama_sbatch::reserve_ubatch(size_t n_ubatch, bool has_embd) {
     // clear empty sequences
@@ -283,7 +284,10 @@ llama_sbatch::llama_sbatch(const llama_batch & batch, size_t n_embd, bool simple
             );
 }
 
-llama_batch_allocr::llama_batch_allocr() = default;
+llama_batch_allocr::llama_batch_allocr() {
+    const char * LLAMA_BATCH_DEBUG = getenv("LLAMA_BATCH_DEBUG");
+    debug = LLAMA_BATCH_DEBUG ? atoi(LLAMA_BATCH_DEBUG) : 0;
+}
 
 bool llama_batch_allocr::init(const llama_batch & batch_inp, const llama_vocab & vocab, llama_pos p0) {
     clear();
@@ -356,6 +360,33 @@ bool llama_batch_allocr::init(const llama_batch & batch_inp, const llama_vocab &
         n_outputs += batch.logits[i] != 0;
     }
 
+    if (debug > 0) {
+        LLAMA_LOG_DEBUG("%s: input batch info (p0 = %d):\n", __func__, p0);
+        LLAMA_LOG_DEBUG("%s:   n_tokens  = %d\n", __func__, batch.n_tokens);
+        LLAMA_LOG_DEBUG("%s:   token     = %p\n", __func__, (void *) batch.token);
+        LLAMA_LOG_DEBUG("%s:   embd      = %p\n", __func__, (void *) batch.embd);
+        LLAMA_LOG_DEBUG("%s:   pos       = %p\n", __func__, (void *) batch.pos);
+        LLAMA_LOG_DEBUG("%s:   n_seq_id  = %p\n", __func__, (void *) batch.n_seq_id);
+        LLAMA_LOG_DEBUG("%s:   seq_id    = %p\n", __func__, (void *) batch.seq_id);
+        LLAMA_LOG_DEBUG("%s:   logits    = %p\n", __func__, (void *) batch.logits);
+        LLAMA_LOG_DEBUG("%s:   n_outputs = %d\n", __func__, n_outputs);
+
+        if (debug > 1) {
+            LLAMA_LOG_DEBUG("%s:   token     = [\n", __func__);
+            for (int32_t i = 0; i < batch.n_tokens; ++i) {
+                std::stringstream ss;
+                for (int s = 0; s < batch.n_seq_id[i]; ++s) {
+                    ss << batch.seq_id[i][s] << " ";
+                }
+
+                LLAMA_LOG_DEBUG("%s:  %4d: id = %6d (%8s), pos = %4d, n_seq_id = %2d, seq_id = [ %s], output = %d\n",
+                        __func__, i, batch.token[i], vocab.token_to_piece(batch.token[i]).c_str(),
+                        batch.pos[i], batch.n_seq_id[i], ss.str().c_str(), batch.logits[i]);
+            }
+            LLAMA_LOG_DEBUG("%s:   ]\n", __func__);
+        }
+    }
+
     return true;
 }
 
diff --git a/src/llama-batch.h b/src/llama-batch.h
index 24340b00f2702..1e0be8ac2c6ce 100644
--- a/src/llama-batch.h
+++ b/src/llama-batch.h
@@ -102,4 +102,6 @@ class llama_batch_allocr {
     std::vector<int32_t>        n_seq_id;
     std::vector<llama_seq_id *> seq_id;
     std::vector<int8_t>         output;
+
+    int debug;
 };

From 3ef36cb642629e48f7e6f78d34aa920a52504724 Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Fri, 13 Jun 2025 17:34:56 +0300
Subject: [PATCH 2/2] cont : improve seq_id display

---
 src/llama-batch.cpp | 26 +++++++++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

diff --git a/src/llama-batch.cpp b/src/llama-batch.cpp
index 20ad4af1b1249..bdbf766266f90 100644
--- a/src/llama-batch.cpp
+++ b/src/llama-batch.cpp
@@ -372,14 +372,34 @@ bool llama_batch_allocr::init(const llama_batch & batch_inp, const llama_vocab &
         LLAMA_LOG_DEBUG("%s:   n_outputs = %d\n", __func__, n_outputs);
 
         if (debug > 1) {
+            int seq_id_max = 0;
+            for (int32_t i = 0; i < batch.n_tokens; ++i) {
+                for (int s = 0; s < batch.n_seq_id[i]; ++s) {
+                    for (int s = 0; s < batch.n_seq_id[i]; ++s) {
+                        seq_id_max = std::max(seq_id_max, batch.seq_id[i][s]);
+                    }
+                }
+            }
+            ++seq_id_max;
+
             LLAMA_LOG_DEBUG("%s:   token     = [\n", __func__);
             for (int32_t i = 0; i < batch.n_tokens; ++i) {
-                std::stringstream ss;
+                std::vector<int8_t> seq_id(seq_id_max);
+
                 for (int s = 0; s < batch.n_seq_id[i]; ++s) {
-                    ss << batch.seq_id[i][s] << " ";
+                    seq_id[batch.seq_id[i][s]] = 1;
+                }
+
+                std::stringstream ss;
+                for (int s = 0; s < seq_id_max; ++s) {
+                    if (seq_id[s]) {
+                        ss << s%10;
+                    } else {
+                        ss << ".";
+                    }
                 }
 
-                LLAMA_LOG_DEBUG("%s:  %4d: id = %6d (%8s), pos = %4d, n_seq_id = %2d, seq_id = [ %s], output = %d\n",
+                LLAMA_LOG_DEBUG("%s:  %4d: id = %6d (%16s), pos = %4d, n_seq_id = %2d, seq_id = [%s], output = %d\n",
                         __func__, i, batch.token[i], vocab.token_to_piece(batch.token[i]).c_str(),
                         batch.pos[i], batch.n_seq_id[i], ss.str().c_str(), batch.logits[i]);
             }