@@ -4164,19 +4164,20 @@ def llama_log_set(
41644164# //
41654165# // Performance utils
41664166# //
4167- # // NOTE: Used by llama.cpp examples, avoid using in third-party apps. Instead, do your own performance measurements.
4167+ # // NOTE: Used by llama.cpp examples/tools , avoid using in third-party apps. Instead, do your own performance measurements.
41684168# //
41694169
41704170
41714171# struct llama_perf_context_data {
4172- # double t_start_ms;
4173- # double t_load_ms;
4174- # double t_p_eval_ms;
4175- # double t_eval_ms;
4176- #
4177- # int32_t n_p_eval;
4178- # int32_t n_eval;
4179- # int32_t n_reused; // number of times a ggml compute graph had been reused
4172+ # // ms == milliseconds
4173+ # double t_start_ms; // absolute start time
4174+ # double t_load_ms; // time needed for loading the model
4175+ # double t_p_eval_ms; // time needed for processing the prompt
4176+ # double t_eval_ms; // time needed for generating tokens
4177+
4178+ # int32_t n_p_eval; // number of prompt tokens
4179+ # int32_t n_eval; // number of generated tokens
4180+ # int32_t n_reused; // number of times a ggml compute graph had been reused
41804181# };
41814182class llama_perf_context_data (ctypes .Structure ):
41824183 _fields_ = [
@@ -4191,9 +4192,8 @@ class llama_perf_context_data(ctypes.Structure):
41914192
41924193
41934194# struct llama_perf_sampler_data {
4194- # double t_sample_ms;
4195- #
4196- # int32_t n_sample;
4195+ # double t_sample_ms; // time needed for sampling in ms
4196+ # int32_t n_sample; // number of sampled tokens
41974197# };
41984198class llama_perf_sampler_data (ctypes .Structure ):
41994199 _fields_ = [
@@ -4263,6 +4263,17 @@ def llama_perf_sampler_reset(chain: llama_sampler_p, /):
42634263 ...
42644264
42654265
4266+ # // print a breakdown of per-device memory use via LLAMA_LOG:
4267+ # LLAMA_API void llama_memory_breakdown_print(const struct llama_context * ctx);
4268+ @ctypes_function (
4269+ "llama_memory_breakdown_print" ,
4270+ [llama_context_p_ctypes ],
4271+ None ,
4272+ )
4273+ def llama_memory_breakdown_print (ctx : llama_context_p , / ):
4274+ ...
4275+
4276+
42664277# //
42674278# // training
42684279# //
0 commit comments