Skip to content

Commit 85cc1ae

Browse files
authored
context : print graph stats for memory-less contexts (ggml-org#15586)
ggml-ci
1 parent 1d8d83d commit 85cc1ae

File tree

1 file changed

+12
-10
lines changed

1 file changed

+12
-10
lines changed

src/llama-context.cpp

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -280,7 +280,7 @@ llama_context::llama_context(
280280
}
281281

282282
// reserve worst-case graph
283-
if (!hparams.vocab_only && memory) {
283+
if (!hparams.vocab_only) {
284284
const uint32_t n_seqs = cparams.kv_unified ? 1 : cparams.n_seq_max;
285285
const uint32_t n_tokens = std::min(cparams.n_ctx, cparams.n_ubatch);
286286

@@ -292,11 +292,13 @@ llama_context::llama_context(
292292
int n_splits_tg = -1;
293293
int n_nodes_tg = -1;
294294

295-
// simulate full KV cache
296-
297-
const auto mctx = memory->init_full();
298-
if (!mctx) {
299-
throw std::runtime_error("failed to initialize KV cache");
295+
llama_memory_context_ptr mctx;
296+
if (memory) {
297+
LLAMA_LOG_DEBUG("%s: reserving full memory module\n", __func__);
298+
mctx = memory->init_full();
299+
if (!mctx) {
300+
throw std::runtime_error("failed to initialize memory module");
301+
}
300302
}
301303

302304
cross.v_embd.clear();
@@ -1056,7 +1058,7 @@ int llama_context::decode(const llama_batch & batch_inp) {
10561058
const auto * res = process_ubatch(ubatch, LLM_GRAPH_TYPE_DECODER, mctx.get(), status);
10571059

10581060
if (!res) {
1059-
// the last ubatch failed or was aborted -> remove all positions of that ubatch from the KV cache
1061+
// the last ubatch failed or was aborted -> remove all positions of that ubatch from the memory module
10601062
llama_pos pos_min[LLAMA_MAX_SEQ];
10611063
for (int s = 0; s < LLAMA_MAX_SEQ; ++s) {
10621064
pos_min[s] = std::numeric_limits<llama_pos>::max();
@@ -1073,7 +1075,7 @@ int llama_context::decode(const llama_batch & batch_inp) {
10731075
continue;
10741076
}
10751077

1076-
LLAMA_LOG_WARN("%s: removing KV cache entries for seq_id = %d, pos = [%d, +inf)\n", __func__, s, pos_min[s]);
1078+
LLAMA_LOG_WARN("%s: removing memory module entries for seq_id = %d, pos = [%d, +inf)\n", __func__, s, pos_min[s]);
10771079

10781080
memory->seq_rm(s, pos_min[s], -1);
10791081
}
@@ -1857,7 +1859,7 @@ size_t llama_context::state_write_data(llama_io_write_i & io) {
18571859
}
18581860

18591861
if (memory != nullptr) {
1860-
LLAMA_LOG_DEBUG("%s: - writing KV self\n", __func__);
1862+
LLAMA_LOG_DEBUG("%s: - writing memory module\n", __func__);
18611863
memory->state_write(io);
18621864
}
18631865

@@ -1943,7 +1945,7 @@ size_t llama_context::state_read_data(llama_io_read_i & io) {
19431945
}
19441946

19451947
if (memory) {
1946-
LLAMA_LOG_DEBUG("%s: - reading KV self\n", __func__);
1948+
LLAMA_LOG_DEBUG("%s: - reading memory module\n", __func__);
19471949

19481950
memory->state_read(io);
19491951
}

0 commit comments

Comments
 (0)