Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 14 additions & 7 deletions examples/llama-bench/llama-bench.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
#include "common.h"
#include "ggml.h"
#include "llama.h"
#include "llama-context.h"
#include <filesystem>

#ifdef _WIN32
# define WIN32_LEAN_AND_MEAN
Expand Down Expand Up @@ -879,6 +881,7 @@ struct test {
const std::string cpu_info;
const std::string gpu_info;
std::string model_filename;
int n_graph_splits;
std::string model_type;
uint64_t model_size;
uint64_t model_n_params;
Expand Down Expand Up @@ -936,7 +939,7 @@ struct test {
std::strftime(buf, sizeof(buf), "%FT%TZ", gmtime(&t));
test_time = buf;

(void) ctx;
n_graph_splits = ctx->get_graph_splits();
}

uint64_t avg_ns() const { return ::avg(samples_ns); }
Expand Down Expand Up @@ -970,11 +973,11 @@ struct test {
static const std::vector<std::string> & get_fields() {
static const std::vector<std::string> fields = {
"build_commit", "build_number", "cpu_info", "gpu_info", "backends", "model_filename",
"model_type", "model_size", "model_n_params", "n_batch", "n_ubatch", "n_threads",
"cpu_mask", "cpu_strict", "poll", "type_k", "type_v", "n_gpu_layers",
"split_mode", "main_gpu", "no_kv_offload", "flash_attn", "tensor_split", "use_mmap",
"embeddings", "n_prompt", "n_gen", "test_time", "avg_ns", "stddev_ns",
"avg_ts", "stddev_ts",
"n_graph_splits", "model_type", "model_size", "model_n_params", "n_batch", "n_ubatch",
"n_threads", "cpu_mask", "cpu_strict", "poll", "type_k", "type_v",
"n_gpu_layers", "split_mode", "main_gpu", "no_kv_offload", "flash_attn", "tensor_split",
"use_mmap", "embeddings", "n_prompt", "n_gen", "test_time", "avg_ns",
"stddev_ns", "avg_ts", "stddev_ts",
};
return fields;
}
Expand All @@ -985,7 +988,7 @@ struct test {
if (field == "build_number" || field == "n_batch" || field == "n_ubatch" || field == "n_threads" ||
field == "poll" || field == "model_size" || field == "model_n_params" || field == "n_gpu_layers" ||
field == "main_gpu" || field == "n_prompt" || field == "n_gen" || field == "avg_ns" ||
field == "stddev_ns") {
field == "stddev_ns" || field == "n_graph_splits") {
return INT;
}
if (field == "f16_kv" || field == "no_kv_offload" || field == "cpu_strict" || field == "flash_attn" ||
Expand Down Expand Up @@ -1020,6 +1023,7 @@ struct test {
gpu_info,
get_backend(),
model_filename,
std::to_string(n_graph_splits),
model_type,
std::to_string(model_size),
std::to_string(model_n_params),
Expand Down Expand Up @@ -1196,6 +1200,9 @@ struct markdown_printer : public printer {
if (field == "n_gpu_layers") {
return 3;
}
if (field == "n_graph_splits") {
return 3;
}
if (field == "n_threads") {
return 7;
}
Expand Down
7 changes: 7 additions & 0 deletions src/llama-context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2216,6 +2216,13 @@ void llama_context::perf_reset() {
t_p_eval_us = n_p_eval = 0;
}

/**
* @brief Get the number of graph splits.
*/
int llama_context::get_graph_splits() const{
return ggml_backend_sched_get_n_splits(sched.get());
}

//
// interface implementation
//
Expand Down
2 changes: 2 additions & 0 deletions src/llama-context.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,8 @@ struct llama_context {
llama_perf_context_data perf_get_data() const;
void perf_reset();

int get_graph_splits() const;

private:
//
// output
Expand Down
Loading