Skip to content

Commit 7b19b12

Browse files
print bufts + memory use for non-GPU
1 parent c47ead4 commit 7b19b12

File tree

2 files changed

+14
-21
lines changed

2 files changed

+14
-21
lines changed

src/llama-context.cpp

Lines changed: 14 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
#include "llama-context.h"
22

3-
#include "ggml-backend.h"
43
#include "llama-impl.h"
54
#include "llama-batch.h"
65
#include "llama-io.h"
@@ -2790,7 +2789,7 @@ void llama_memory_breakdown_print(const struct llama_context * ctx) {
27902789
table_data.reserve(devices.size());
27912790
const std::string template_header = "%s: | %s | %s %s %s %s %s %s %s |\n";
27922791
const std::string template_gpu = "%s: | %s | %s = %s + (%s = %s + %s + %s) + %s |\n";
2793-
const std::string template_host = "%s: | %s | %s %s %s = %s + %s + %s %s |\n";
2792+
const std::string template_other = "%s: | %s | %s %s %s = %s + %s + %s %s |\n";
27942793

27952794
table_data.push_back({template_header, "memory breakdown [MiB]", "total", "free", "self", "model", "context", "compute", "unaccounted"});
27962795

@@ -2835,34 +2834,29 @@ void llama_memory_breakdown_print(const struct llama_context * ctx) {
28352834
seen_host_buffer_types.insert(buft);
28362835
}
28372836

2838-
// "free" host memory is poorly defined, instead track only memory that we know is being used:
2839-
llama_memory_breakdown_data mb_host = {0, 0, 0};
2840-
28412837
// consolidate all memory buffers not on any of the models GPU devices as host memory:
28422838
for (const auto & buft_mb : memory_breakdown) {
28432839
ggml_backend_buffer_type_t buft = buft_mb.first;
28442840
const llama_memory_breakdown_data & mb = buft_mb.second;
28452841
if (seen_host_buffer_types.count(buft) == 1) {
28462842
continue;
28472843
}
2848-
mb_host.model += mb.model;
2849-
mb_host.context += mb.context;
2850-
mb_host.compute += mb.compute;
2844+
const std::string name = ggml_backend_buft_name(buft);
2845+
const size_t self = mb.model + mb.context + mb.compute;
2846+
table_data.push_back({
2847+
template_other,
2848+
" - " + name,
2849+
"", // total
2850+
"", // free
2851+
std::to_string(self / MiB),
2852+
std::to_string(mb.model / MiB),
2853+
std::to_string(mb.context / MiB),
2854+
std::to_string(mb.compute / MiB),
2855+
""}); // unaccounted
2856+
seen_host_buffer_types.insert(buft);
28512857
seen_host_buffer_types.insert(buft);
28522858
}
28532859

2854-
const size_t self_host = mb_host.model + mb_host.context + mb_host.compute;
2855-
table_data.push_back({
2856-
template_host,
2857-
" - Host",
2858-
"", // total
2859-
"", // free
2860-
std::to_string(self_host / MiB),
2861-
std::to_string(mb_host.model / MiB),
2862-
std::to_string(mb_host.context / MiB),
2863-
std::to_string(mb_host.compute / MiB),
2864-
""}); // unaccounted
2865-
28662860
for (size_t j = 1; j < table_data[0].size(); j++) {
28672861
size_t max_len = 0;
28682862
for (const auto & td : table_data) {

src/llama-memory-recurrent.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
#include "llama-memory-recurrent.h"
22

3-
#include "ggml-alloc.h"
43
#include "llama-impl.h"
54
#include "llama-io.h"
65
#include "llama-batch.h"

0 commit comments

Comments
 (0)