From bd6f1dd09a6e707369d7a0bdd99e95eb166df429 Mon Sep 17 00:00:00 2001
From: bachelor-dou <15529241576@163.com>
Date: Thu, 17 Apr 2025 16:19:20 +0800
Subject: [PATCH 1/4] Add the n_graph_splits metric to the performance test
 output parameters in llama-bench

---
 examples/llama-bench/llama-bench.cpp | 37 ++++++++++++++++++++++------
 src/llama-context.cpp                |  7 ++++++
 src/llama-context.h                  |  2 ++
 3 files changed, 38 insertions(+), 8 deletions(-)

diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp
index cbcbfcee861ee..3ed57359b6d25 100644
--- a/examples/llama-bench/llama-bench.cpp
+++ b/examples/llama-bench/llama-bench.cpp
@@ -21,6 +21,7 @@
 #include "common.h"
 #include "ggml.h"
 #include "llama.h"
+#include "llama-context.h"
 
 #ifdef _WIN32
 #    define WIN32_LEAN_AND_MEAN
@@ -872,6 +873,21 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
 
     return instances;
 }
+/**
+ * @brief Remove the input model path information and keep only the model name.
+ *
+ * @param path The input model path information.
+ * @return Full name of the model.
+ */
+static std::string get_modelfile_name(const std::string & path) {
+    size_t index = path.find_last_of('/');
+    if (index != std::string::npos) {
+        std::string filename = path.substr(index + 1);
+        return filename;
+    } else {
+        return path;
+    }
+}
 
 struct test {
     static const std::string build_commit;
@@ -879,6 +895,7 @@ struct test {
     const std::string        cpu_info;
     const std::string        gpu_info;
     std::string              model_filename;
+    int                      n_graph_splits;
     std::string              model_type;
     uint64_t                 model_size;
     uint64_t                 model_n_params;
@@ -907,7 +924,7 @@ struct test {
         cpu_info(get_cpu_info()),
         gpu_info(get_gpu_info()) {
 
-        model_filename = inst.model;
+        model_filename = get_modelfile_name(inst.model);
         char buf[128];
         llama_model_desc(lmodel, buf, sizeof(buf));
         model_type     = buf;
@@ -936,7 +953,7 @@ struct test {
         std::strftime(buf, sizeof(buf), "%FT%TZ", gmtime(&t));
         test_time = buf;
 
-        (void) ctx;
+        n_graph_splits = ctx->get_graph_splits();
     }
 
     uint64_t avg_ns() const { return ::avg(samples_ns); }
@@ -970,11 +987,11 @@ struct test {
     static const std::vector<std::string> & get_fields() {
         static const std::vector<std::string> fields = {
             "build_commit", "build_number", "cpu_info",       "gpu_info",   "backends",     "model_filename",
-            "model_type",   "model_size",   "model_n_params", "n_batch",    "n_ubatch",     "n_threads",
-            "cpu_mask",     "cpu_strict",   "poll",           "type_k",     "type_v",       "n_gpu_layers",
-            "split_mode",   "main_gpu",     "no_kv_offload",  "flash_attn", "tensor_split", "use_mmap",
-            "embeddings",   "n_prompt",     "n_gen",          "test_time",  "avg_ns",       "stddev_ns",
-            "avg_ts",       "stddev_ts",
+            "n_graph_splits", "model_type",  "model_size",   "model_n_params", "n_batch",    "n_ubatch",
+            "n_threads",     "cpu_mask",     "cpu_strict",   "poll",           "type_k",     "type_v",
+            "n_gpu_layers",  "split_mode",   "main_gpu",     "no_kv_offload",  "flash_attn", "tensor_split",
+            "use_mmap",       "embeddings",   "n_prompt",     "n_gen",          "test_time",  "avg_ns",
+            "stddev_ns",      "avg_ts",       "stddev_ts",
         };
         return fields;
     }
@@ -985,7 +1002,7 @@ struct test {
         if (field == "build_number" || field == "n_batch" || field == "n_ubatch" || field == "n_threads" ||
             field == "poll" || field == "model_size" || field == "model_n_params" || field == "n_gpu_layers" ||
             field == "main_gpu" || field == "n_prompt" || field == "n_gen" || field == "avg_ns" ||
-            field == "stddev_ns") {
+            field == "stddev_ns" || field == "n_graph_splits") {
             return INT;
         }
         if (field == "f16_kv" || field == "no_kv_offload" || field == "cpu_strict" || field == "flash_attn" ||
@@ -1020,6 +1037,7 @@ struct test {
                                             gpu_info,
                                             get_backend(),
                                             model_filename,
+                                            std::to_string(n_graph_splits),
                                             model_type,
                                             std::to_string(model_size),
                                             std::to_string(model_n_params),
@@ -1196,6 +1214,9 @@ struct markdown_printer : public printer {
         if (field == "n_gpu_layers") {
             return 3;
         }
+        if (field == "n_graph_splits") {
+            return 3;
+        }
         if (field == "n_threads") {
             return 7;
         }
diff --git a/src/llama-context.cpp b/src/llama-context.cpp
index 983385f86d494..cb4802ca9b195 100644
--- a/src/llama-context.cpp
+++ b/src/llama-context.cpp
@@ -2216,6 +2216,13 @@ void llama_context::perf_reset() {
     t_p_eval_us = n_p_eval = 0;
 }
 
+/**
+ * @brief Get the number of graph splits.
+ */
+int llama_context::get_graph_splits() const{
+    return ggml_backend_sched_get_n_splits(sched.get());
+}
+
 //
 // interface implementation
 //
diff --git a/src/llama-context.h b/src/llama-context.h
index 04facb544cb1a..b0a8c6e703134 100644
--- a/src/llama-context.h
+++ b/src/llama-context.h
@@ -127,6 +127,8 @@ struct llama_context {
 
     llama_perf_context_data perf_get_data() const;
     void perf_reset();
+    
+    int get_graph_splits() const;
 
 private:
     //

From b54eaa63dd9feece067ef4e636ef3a1fd2b44871 Mon Sep 17 00:00:00 2001
From: bachelor-dou <15529241576@163.com>
Date: Thu, 17 Apr 2025 17:29:53 +0800
Subject: [PATCH 2/4] Adaptation of model name extraction for different systems

---
 examples/llama-bench/llama-bench.cpp | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp
index 3ed57359b6d25..92eef230407d7 100644
--- a/examples/llama-bench/llama-bench.cpp
+++ b/examples/llama-bench/llama-bench.cpp
@@ -22,6 +22,7 @@
 #include "ggml.h"
 #include "llama.h"
 #include "llama-context.h"
+#include <filesystem>
 
 #ifdef _WIN32
 #    define WIN32_LEAN_AND_MEAN
@@ -879,14 +880,11 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
  * @param path The input model path information.
  * @return Full name of the model.
  */
-static std::string get_modelfile_name(const std::string & path) {
-    size_t index = path.find_last_of('/');
-    if (index != std::string::npos) {
-        std::string filename = path.substr(index + 1);
-        return filename;
-    } else {
-        return path;
-    }
+static std::string get_modelfile_name(const std::string & path_str) {
+    namespace fs = std::filesystem;
+    fs::path path = path_str;
+    
+    return path.filename();
 }
 
 struct test {

From 3138f017ae8aa6032d7cae4117a1fd36278de742 Mon Sep 17 00:00:00 2001
From: bachelor-dou <15529241576@163.com>
Date: Thu, 17 Apr 2025 17:34:29 +0800
Subject: [PATCH 3/4] delete trailing whitespace

---
 examples/llama-bench/llama-bench.cpp | 2 +-
 src/llama-context.h                  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp
index 92eef230407d7..3bfd3d9bf5296 100644
--- a/examples/llama-bench/llama-bench.cpp
+++ b/examples/llama-bench/llama-bench.cpp
@@ -883,7 +883,7 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
 static std::string get_modelfile_name(const std::string & path_str) {
     namespace fs = std::filesystem;
     fs::path path = path_str;
-    
+
     return path.filename();
 }
 
diff --git a/src/llama-context.h b/src/llama-context.h
index b0a8c6e703134..39783132d72c3 100644
--- a/src/llama-context.h
+++ b/src/llama-context.h
@@ -127,7 +127,7 @@ struct llama_context {
 
     llama_perf_context_data perf_get_data() const;
     void perf_reset();
-    
+
     int get_graph_splits() const;
 
 private:

From feb049e12c10069c02f0635da426c5347a888857 Mon Sep 17 00:00:00 2001
From: bachelor-dou <15529241576@163.com>
Date: Fri, 18 Apr 2025 14:30:58 +0800
Subject: [PATCH 4/4] Undo model name changes

---
 examples/llama-bench/llama-bench.cpp | 14 +-------------
 1 file changed, 1 insertion(+), 13 deletions(-)

diff --git a/examples/llama-bench/llama-bench.cpp b/examples/llama-bench/llama-bench.cpp
index 3bfd3d9bf5296..b86093630c392 100644
--- a/examples/llama-bench/llama-bench.cpp
+++ b/examples/llama-bench/llama-bench.cpp
@@ -874,18 +874,6 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
 
     return instances;
 }
-/**
- * @brief Remove the input model path information and keep only the model name.
- *
- * @param path The input model path information.
- * @return Full name of the model.
- */
-static std::string get_modelfile_name(const std::string & path_str) {
-    namespace fs = std::filesystem;
-    fs::path path = path_str;
-
-    return path.filename();
-}
 
 struct test {
     static const std::string build_commit;
@@ -922,7 +910,7 @@ struct test {
         cpu_info(get_cpu_info()),
         gpu_info(get_gpu_info()) {
 
-        model_filename = get_modelfile_name(inst.model);
+        model_filename = inst.model;
         char buf[128];
         llama_model_desc(lmodel, buf, sizeof(buf));
         model_type     = buf;