From 7c89f0f1a062fe30e160bc6d0d86f56822af5eab Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 15 Oct 2025 12:34:22 +0300 Subject: [PATCH] server : fix mtmd checkpoints --- tools/server/server.cpp | 6 +++--- tools/server/utils.hpp | 5 +++-- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/tools/server/server.cpp b/tools/server/server.cpp index 77969d24e13e1..36dc32fe3d17e 100644 --- a/tools/server/server.cpp +++ b/tools/server/server.cpp @@ -3812,7 +3812,7 @@ struct server_context { if (slot.n_past > 0 && slot.n_past < (int) slot.prompt.tokens.size()) { const auto pos_min = llama_memory_seq_pos_min(llama_get_memory(ctx), slot.id); if (pos_min == -1) { - SLT_ERR(slot, "n_past = %d, cache_tokens.size() = %d, seq_id = %d, pos_min = %d\n", slot.n_past, (int) slot.prompt.tokens.size(), slot.id, pos_min); + SLT_ERR(slot, "n_past = %d, slot.prompt.tokens.size() = %d, seq_id = %d, pos_min = %d\n", slot.n_past, (int) slot.prompt.tokens.size(), slot.id, pos_min); GGML_ABORT("pos_min == -1, but n_past > 0 - should not happen: https://github.com/ggml-org/llama.cpp/pull/13833#discussion_r2116181237"); } @@ -3860,7 +3860,7 @@ struct server_context { } if (pos_min > pos_min_thold) { - SLT_WRN(slot, "n_past = %d, cache_tokens.size() = %d, seq_id = %d, pos_min = %d, n_swa = %d\n", slot.n_past, (int) slot.prompt.tokens.size(), slot.id, pos_min, n_swa); + SLT_WRN(slot, "n_past = %d, slot.prompt.tokens.size() = %d, seq_id = %d, pos_min = %d, n_swa = %d\n", slot.n_past, (int) slot.prompt.tokens.size(), slot.id, pos_min, n_swa); // search for a context checkpoint const auto it = std::find_if( @@ -4028,7 +4028,7 @@ struct server_context { } } - // SLT_INF(slot, "new cache_tokens: %s\n", slot.cache_tokens.str().c_str()); + // SLT_INF(slot, "new slot.prompt.tokens: %s\n", slot.slot.prompt.tokens.str().c_str()); SLT_INF(slot, "prompt processing progress, n_past = %d, n_tokens = %d, progress = %f\n", slot.n_past, batch.n_tokens, (float) slot.n_past / slot.n_prompt_tokens()); diff --git a/tools/server/utils.hpp b/tools/server/utils.hpp index fd0bc8de533cf..cc48f5a9d0ac7 100644 --- a/tools/server/utils.hpp +++ b/tools/server/utils.hpp @@ -1237,9 +1237,10 @@ struct server_tokens { // allowed to resize ^ ^ // disallowed to resize ^ ^ ^ if (n > 0) { - llama_token last_token = tokens[n - 1]; // make sure we never remove tokens in the middle of an image - if (last_token == LLAMA_TOKEN_NULL) { + // note that the case where we keep a full image at the end is allowed: + // tokens[n - 1] == LLAMA_TOKEN_NULL && tokens[n] != LLAMA_TOKEN_NULL + if (tokens[n - 1] == LLAMA_TOKEN_NULL && tokens[n] == LLAMA_TOKEN_NULL) { find_chunk(n - 1); // will throw an error if the token is not begin-of-chunk } }