From 8dd61afcdf6a9e499f072db0d03521e77cca7d31 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Fri, 13 Jun 2025 17:01:14 +0300 Subject: [PATCH 1/2] batch : add LLAMA_BATCH_DEBUG environment variable ggml-ci --- src/llama-batch.cpp | 33 ++++++++++++++++++++++++++++++++- src/llama-batch.h | 2 ++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/src/llama-batch.cpp b/src/llama-batch.cpp index 9066d5a9b274d..20ad4af1b1249 100644 --- a/src/llama-batch.cpp +++ b/src/llama-batch.cpp @@ -7,6 +7,7 @@ #include #include #include +#include llama_ubatch llama_sbatch::reserve_ubatch(size_t n_ubatch, bool has_embd) { // clear empty sequences @@ -283,7 +284,10 @@ llama_sbatch::llama_sbatch(const llama_batch & batch, size_t n_embd, bool simple ); } -llama_batch_allocr::llama_batch_allocr() = default; +llama_batch_allocr::llama_batch_allocr() { + const char * LLAMA_BATCH_DEBUG = getenv("LLAMA_BATCH_DEBUG"); + debug = LLAMA_BATCH_DEBUG ? atoi(LLAMA_BATCH_DEBUG) : 0; +} bool llama_batch_allocr::init(const llama_batch & batch_inp, const llama_vocab & vocab, llama_pos p0) { clear(); @@ -356,6 +360,33 @@ bool llama_batch_allocr::init(const llama_batch & batch_inp, const llama_vocab & n_outputs += batch.logits[i] != 0; } + if (debug > 0) { + LLAMA_LOG_DEBUG("%s: input batch info (p0 = %d):\n", __func__, p0); + LLAMA_LOG_DEBUG("%s: n_tokens = %d\n", __func__, batch.n_tokens); + LLAMA_LOG_DEBUG("%s: token = %p\n", __func__, (void *) batch.token); + LLAMA_LOG_DEBUG("%s: embd = %p\n", __func__, (void *) batch.embd); + LLAMA_LOG_DEBUG("%s: pos = %p\n", __func__, (void *) batch.pos); + LLAMA_LOG_DEBUG("%s: n_seq_id = %p\n", __func__, (void *) batch.n_seq_id); + LLAMA_LOG_DEBUG("%s: seq_id = %p\n", __func__, (void *) batch.seq_id); + LLAMA_LOG_DEBUG("%s: logits = %p\n", __func__, (void *) batch.logits); + LLAMA_LOG_DEBUG("%s: n_outputs = %d\n", __func__, n_outputs); + + if (debug > 1) { + LLAMA_LOG_DEBUG("%s: token = [\n", __func__); + for (int32_t i = 0; i < batch.n_tokens; ++i) { + std::stringstream ss; + for (int s = 0; s < batch.n_seq_id[i]; ++s) { + ss << batch.seq_id[i][s] << " "; + } + + LLAMA_LOG_DEBUG("%s: %4d: id = %6d (%8s), pos = %4d, n_seq_id = %2d, seq_id = [ %s], output = %d\n", + __func__, i, batch.token[i], vocab.token_to_piece(batch.token[i]).c_str(), + batch.pos[i], batch.n_seq_id[i], ss.str().c_str(), batch.logits[i]); + } + LLAMA_LOG_DEBUG("%s: ]\n", __func__); + } + } + return true; } diff --git a/src/llama-batch.h b/src/llama-batch.h index 24340b00f2702..1e0be8ac2c6ce 100644 --- a/src/llama-batch.h +++ b/src/llama-batch.h @@ -102,4 +102,6 @@ class llama_batch_allocr { std::vector n_seq_id; std::vector seq_id; std::vector output; + + int debug; }; From 3ef36cb642629e48f7e6f78d34aa920a52504724 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Fri, 13 Jun 2025 17:34:56 +0300 Subject: [PATCH 2/2] cont : improve seq_id display --- src/llama-batch.cpp | 26 +++++++++++++++++++++++--- 1 file changed, 23 insertions(+), 3 deletions(-) diff --git a/src/llama-batch.cpp b/src/llama-batch.cpp index 20ad4af1b1249..bdbf766266f90 100644 --- a/src/llama-batch.cpp +++ b/src/llama-batch.cpp @@ -372,14 +372,34 @@ bool llama_batch_allocr::init(const llama_batch & batch_inp, const llama_vocab & LLAMA_LOG_DEBUG("%s: n_outputs = %d\n", __func__, n_outputs); if (debug > 1) { + int seq_id_max = 0; + for (int32_t i = 0; i < batch.n_tokens; ++i) { + for (int s = 0; s < batch.n_seq_id[i]; ++s) { + for (int s = 0; s < batch.n_seq_id[i]; ++s) { + seq_id_max = std::max(seq_id_max, batch.seq_id[i][s]); + } + } + } + ++seq_id_max; + LLAMA_LOG_DEBUG("%s: token = [\n", __func__); for (int32_t i = 0; i < batch.n_tokens; ++i) { - std::stringstream ss; + std::vector seq_id(seq_id_max); + for (int s = 0; s < batch.n_seq_id[i]; ++s) { - ss << batch.seq_id[i][s] << " "; + seq_id[batch.seq_id[i][s]] = 1; + } + + std::stringstream ss; + for (int s = 0; s < seq_id_max; ++s) { + if (seq_id[s]) { + ss << s%10; + } else { + ss << "."; + } } - LLAMA_LOG_DEBUG("%s: %4d: id = %6d (%8s), pos = %4d, n_seq_id = %2d, seq_id = [ %s], output = %d\n", + LLAMA_LOG_DEBUG("%s: %4d: id = %6d (%16s), pos = %4d, n_seq_id = %2d, seq_id = [%s], output = %d\n", __func__, i, batch.token[i], vocab.token_to_piece(batch.token[i]).c_str(), batch.pos[i], batch.n_seq_id[i], ss.str().c_str(), batch.logits[i]); }