Skip to content

Commit d719fee

Browse files
committed
Add the n_graph_splits metric to the performance test output parameters in llama-bench
1 parent 12b1750 commit d719fee

File tree

3 files changed

+38
-8
lines changed

3 files changed

+38
-8
lines changed

examples/llama-bench/llama-bench.cpp

Lines changed: 29 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "common.h"
2222
#include "ggml.h"
2323
#include "llama.h"
24+
#include "llama-context.h"
2425

2526
#ifdef _WIN32
2627
# define WIN32_LEAN_AND_MEAN
@@ -872,13 +873,29 @@ static std::vector<cmd_params_instance> get_cmd_params_instances(const cmd_param
872873

873874
return instances;
874875
}
876+
/**
877+
* @brief Remove the input model path information and keep only the model name.
878+
*
879+
* @param path The input model path information.
880+
* @return Full name of the model.
881+
*/
882+
static std::string get_modelfile_name(const std::string & path) {
883+
size_t index = path.find_last_of('/');
884+
if (index != std::string::npos) {
885+
std::string filename = path.substr(index + 1);
886+
return filename;
887+
} else {
888+
return path;
889+
}
890+
}
875891

876892
struct test {
877893
static const std::string build_commit;
878894
static const int build_number;
879895
const std::string cpu_info;
880896
const std::string gpu_info;
881897
std::string model_filename;
898+
int n_graph_splits;
882899
std::string model_type;
883900
uint64_t model_size;
884901
uint64_t model_n_params;
@@ -907,7 +924,7 @@ struct test {
907924
cpu_info(get_cpu_info()),
908925
gpu_info(get_gpu_info()) {
909926

910-
model_filename = inst.model;
927+
model_filename = get_modelfile_name(inst.model);
911928
char buf[128];
912929
llama_model_desc(lmodel, buf, sizeof(buf));
913930
model_type = buf;
@@ -936,7 +953,7 @@ struct test {
936953
std::strftime(buf, sizeof(buf), "%FT%TZ", gmtime(&t));
937954
test_time = buf;
938955

939-
(void) ctx;
956+
n_graph_splits = ctx->get_graph_splits();
940957
}
941958

942959
uint64_t avg_ns() const { return ::avg(samples_ns); }
@@ -970,11 +987,11 @@ struct test {
970987
static const std::vector<std::string> & get_fields() {
971988
static const std::vector<std::string> fields = {
972989
"build_commit", "build_number", "cpu_info", "gpu_info", "backends", "model_filename",
973-
"model_type", "model_size", "model_n_params", "n_batch", "n_ubatch", "n_threads",
974-
"cpu_mask", "cpu_strict", "poll", "type_k", "type_v", "n_gpu_layers",
975-
"split_mode", "main_gpu", "no_kv_offload", "flash_attn", "tensor_split", "use_mmap",
976-
"embeddings", "n_prompt", "n_gen", "test_time", "avg_ns", "stddev_ns",
977-
"avg_ts", "stddev_ts",
990+
"n_graph_splits", "model_type", "model_size", "model_n_params", "n_batch", "n_ubatch",
991+
"n_threads", "cpu_mask", "cpu_strict", "poll", "type_k", "type_v",
992+
"n_gpu_layers", "split_mode", "main_gpu", "no_kv_offload", "flash_attn", "tensor_split",
993+
"use_mmap", "embeddings", "n_prompt", "n_gen", "test_time", "avg_ns",
994+
"stddev_ns", "avg_ts", "stddev_ts",
978995
};
979996
return fields;
980997
}
@@ -985,7 +1002,7 @@ struct test {
9851002
if (field == "build_number" || field == "n_batch" || field == "n_ubatch" || field == "n_threads" ||
9861003
field == "poll" || field == "model_size" || field == "model_n_params" || field == "n_gpu_layers" ||
9871004
field == "main_gpu" || field == "n_prompt" || field == "n_gen" || field == "avg_ns" ||
988-
field == "stddev_ns") {
1005+
field == "stddev_ns" || field == "n_graph_splits") {
9891006
return INT;
9901007
}
9911008
if (field == "f16_kv" || field == "no_kv_offload" || field == "cpu_strict" || field == "flash_attn" ||
@@ -1020,6 +1037,7 @@ struct test {
10201037
gpu_info,
10211038
get_backend(),
10221039
model_filename,
1040+
std::to_string(n_graph_splits),
10231041
model_type,
10241042
std::to_string(model_size),
10251043
std::to_string(model_n_params),
@@ -1196,6 +1214,9 @@ struct markdown_printer : public printer {
11961214
if (field == "n_gpu_layers") {
11971215
return 3;
11981216
}
1217+
if (field == "n_graph_splits") {
1218+
return 3;
1219+
}
11991220
if (field == "n_threads") {
12001221
return 7;
12011222
}

src/llama-context.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2216,6 +2216,13 @@ void llama_context::perf_reset() {
22162216
t_p_eval_us = n_p_eval = 0;
22172217
}
22182218

2219+
/**
2220+
* @brief Get the number of graph splits.
2221+
*/
2222+
int llama_context::get_graph_splits() const{
2223+
return ggml_backend_sched_get_n_splits(sched.get());
2224+
}
2225+
22192226
//
22202227
// interface implementation
22212228
//

src/llama-context.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,8 @@ struct llama_context {
127127

128128
llama_perf_context_data perf_get_data() const;
129129
void perf_reset();
130+
131+
int get_graph_splits() const;
130132

131133
private:
132134
//

0 commit comments

Comments
 (0)