From bd6f1dd09a6e707369d7a0bdd99e95eb166df429 Mon Sep 17 00:00:00 2001 From: bachelor-dou <15529241576@163.com> Date: Thu, 17 Apr 2025 16:19:20 +0800 Subject: [PATCH 1/4] Add the n_graph_splits metric to the performance test output parameters in llama-bench --- examples/llama-bench/llama-bench.cpp | 37 ++++++++++++++++++++++------ src/llama-context.cpp | 7 ++++++ src/llama-context.h | 2 ++ 3 files changed, 38 insertions(+), 8 deletions(-) diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp index cbcbfcee861ee..3ed57359b6d25 100644 --- a/examples/llama-bench/llama-bench.cpp +++ b/examples/llama-bench/llama-bench.cpp @@ -21,6 +21,7 @@ #include "common.h" #include "ggml.h" #include "llama.h" +#include "llama-context.h" #ifdef _WIN32 # define WIN32_LEAN_AND_MEAN @@ -872,6 +873,21 @@ static std::vector get_cmd_params_instances(const cmd_param return instances; } +/** + * @brief Remove the input model path information and keep only the model name. + * + * @param path The input model path information. + * @return Full name of the model. + */ +static std::string get_modelfile_name(const std::string & path) { + size_t index = path.find_last_of('/'); + if (index != std::string::npos) { + std::string filename = path.substr(index + 1); + return filename; + } else { + return path; + } +} struct test { static const std::string build_commit; @@ -879,6 +895,7 @@ struct test { const std::string cpu_info; const std::string gpu_info; std::string model_filename; + int n_graph_splits; std::string model_type; uint64_t model_size; uint64_t model_n_params; @@ -907,7 +924,7 @@ struct test { cpu_info(get_cpu_info()), gpu_info(get_gpu_info()) { - model_filename = inst.model; + model_filename = get_modelfile_name(inst.model); char buf[128]; llama_model_desc(lmodel, buf, sizeof(buf)); model_type = buf; @@ -936,7 +953,7 @@ struct test { std::strftime(buf, sizeof(buf), "%FT%TZ", gmtime(&t)); test_time = buf; - (void) ctx; + n_graph_splits = ctx->get_graph_splits(); } uint64_t avg_ns() const { return ::avg(samples_ns); } @@ -970,11 +987,11 @@ struct test { static const std::vector & get_fields() { static const std::vector fields = { "build_commit", "build_number", "cpu_info", "gpu_info", "backends", "model_filename", - "model_type", "model_size", "model_n_params", "n_batch", "n_ubatch", "n_threads", - "cpu_mask", "cpu_strict", "poll", "type_k", "type_v", "n_gpu_layers", - "split_mode", "main_gpu", "no_kv_offload", "flash_attn", "tensor_split", "use_mmap", - "embeddings", "n_prompt", "n_gen", "test_time", "avg_ns", "stddev_ns", - "avg_ts", "stddev_ts", + "n_graph_splits", "model_type", "model_size", "model_n_params", "n_batch", "n_ubatch", + "n_threads", "cpu_mask", "cpu_strict", "poll", "type_k", "type_v", + "n_gpu_layers", "split_mode", "main_gpu", "no_kv_offload", "flash_attn", "tensor_split", + "use_mmap", "embeddings", "n_prompt", "n_gen", "test_time", "avg_ns", + "stddev_ns", "avg_ts", "stddev_ts", }; return fields; } @@ -985,7 +1002,7 @@ struct test { if (field == "build_number" || field == "n_batch" || field == "n_ubatch" || field == "n_threads" || field == "poll" || field == "model_size" || field == "model_n_params" || field == "n_gpu_layers" || field == "main_gpu" || field == "n_prompt" || field == "n_gen" || field == "avg_ns" || - field == "stddev_ns") { + field == "stddev_ns" || field == "n_graph_splits") { return INT; } if (field == "f16_kv" || field == "no_kv_offload" || field == "cpu_strict" || field == "flash_attn" || @@ -1020,6 +1037,7 @@ struct test { gpu_info, get_backend(), model_filename, + std::to_string(n_graph_splits), model_type, std::to_string(model_size), std::to_string(model_n_params), @@ -1196,6 +1214,9 @@ struct markdown_printer : public printer { if (field == "n_gpu_layers") { return 3; } + if (field == "n_graph_splits") { + return 3; + } if (field == "n_threads") { return 7; } diff --git a/src/llama-context.cpp b/src/llama-context.cpp index 983385f86d494..cb4802ca9b195 100644 --- a/src/llama-context.cpp +++ b/src/llama-context.cpp @@ -2216,6 +2216,13 @@ void llama_context::perf_reset() { t_p_eval_us = n_p_eval = 0; } +/** + * @brief Get the number of graph splits. + */ +int llama_context::get_graph_splits() const{ + return ggml_backend_sched_get_n_splits(sched.get()); +} + // // interface implementation // diff --git a/src/llama-context.h b/src/llama-context.h index 04facb544cb1a..b0a8c6e703134 100644 --- a/src/llama-context.h +++ b/src/llama-context.h @@ -127,6 +127,8 @@ struct llama_context { llama_perf_context_data perf_get_data() const; void perf_reset(); + + int get_graph_splits() const; private: // From b54eaa63dd9feece067ef4e636ef3a1fd2b44871 Mon Sep 17 00:00:00 2001 From: bachelor-dou <15529241576@163.com> Date: Thu, 17 Apr 2025 17:29:53 +0800 Subject: [PATCH 2/4] Adaptation of model name extraction for different systems --- examples/llama-bench/llama-bench.cpp | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp index 3ed57359b6d25..92eef230407d7 100644 --- a/examples/llama-bench/llama-bench.cpp +++ b/examples/llama-bench/llama-bench.cpp @@ -22,6 +22,7 @@ #include "ggml.h" #include "llama.h" #include "llama-context.h" +#include #ifdef _WIN32 # define WIN32_LEAN_AND_MEAN @@ -879,14 +880,11 @@ static std::vector get_cmd_params_instances(const cmd_param * @param path The input model path information. * @return Full name of the model. */ -static std::string get_modelfile_name(const std::string & path) { - size_t index = path.find_last_of('/'); - if (index != std::string::npos) { - std::string filename = path.substr(index + 1); - return filename; - } else { - return path; - } +static std::string get_modelfile_name(const std::string & path_str) { + namespace fs = std::filesystem; + fs::path path = path_str; + + return path.filename(); } struct test { From 3138f017ae8aa6032d7cae4117a1fd36278de742 Mon Sep 17 00:00:00 2001 From: bachelor-dou <15529241576@163.com> Date: Thu, 17 Apr 2025 17:34:29 +0800 Subject: [PATCH 3/4] delete trailing whitespace --- examples/llama-bench/llama-bench.cpp | 2 +- src/llama-context.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp index 92eef230407d7..3bfd3d9bf5296 100644 --- a/examples/llama-bench/llama-bench.cpp +++ b/examples/llama-bench/llama-bench.cpp @@ -883,7 +883,7 @@ static std::vector get_cmd_params_instances(const cmd_param static std::string get_modelfile_name(const std::string & path_str) { namespace fs = std::filesystem; fs::path path = path_str; - + return path.filename(); } diff --git a/src/llama-context.h b/src/llama-context.h index b0a8c6e703134..39783132d72c3 100644 --- a/src/llama-context.h +++ b/src/llama-context.h @@ -127,7 +127,7 @@ struct llama_context { llama_perf_context_data perf_get_data() const; void perf_reset(); - + int get_graph_splits() const; private: From feb049e12c10069c02f0635da426c5347a888857 Mon Sep 17 00:00:00 2001 From: bachelor-dou <15529241576@163.com> Date: Fri, 18 Apr 2025 14:30:58 +0800 Subject: [PATCH 4/4] Undo model name changes --- examples/llama-bench/llama-bench.cpp | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp index 3bfd3d9bf5296..b86093630c392 100644 --- a/examples/llama-bench/llama-bench.cpp +++ b/examples/llama-bench/llama-bench.cpp @@ -874,18 +874,6 @@ static std::vector get_cmd_params_instances(const cmd_param return instances; } -/** - * @brief Remove the input model path information and keep only the model name. - * - * @param path The input model path information. - * @return Full name of the model. - */ -static std::string get_modelfile_name(const std::string & path_str) { - namespace fs = std::filesystem; - fs::path path = path_str; - - return path.filename(); -} struct test { static const std::string build_commit; @@ -922,7 +910,7 @@ struct test { cpu_info(get_cpu_info()), gpu_info(get_gpu_info()) { - model_filename = get_modelfile_name(inst.model); + model_filename = inst.model; char buf[128]; llama_model_desc(lmodel, buf, sizeof(buf)); model_type = buf;