@@ -2907,9 +2907,15 @@ struct llama_model {
29072907 // for quantize-stats only
29082908 std::vector<std::pair<std::string, struct ggml_tensor *>> tensors_by_name;
29092909
2910- int64_t t_load_us = 0;
2910+ int64_t t_load_us = 0;
29112911 int64_t t_start_us = 0;
29122912
2913+ // total number of parameters in the model
2914+ uint64_t n_elements = 0;
2915+
2916+ // total size of all the tensors in the model in bytes
2917+ size_t n_bytes = 0;
2918+
29132919 // keep track of loaded lora adapters
29142920 std::set<struct llama_lora_adapter *> lora_adapters;
29152921
@@ -4275,8 +4281,8 @@ struct llama_model_loader {
42754281 int n_tensors = 0;
42764282 int n_created = 0;
42774283
4278- int64_t n_elements = 0;
4279- size_t n_bytes = 0;
4284+ uint64_t n_elements = 0;
4285+ size_t n_bytes = 0;
42804286
42814287 bool use_mmap = false;
42824288 bool check_tensors;
@@ -5344,6 +5350,11 @@ static const char * llama_model_vocab_type_name(enum llama_vocab_type type){
53445350 }
53455351}
53465352
5353+ static void llm_load_stats(llama_model_loader & ml, llama_model & model) {
5354+ model.n_elements = ml.n_elements;
5355+ model.n_bytes = ml.n_bytes;
5356+ }
5357+
53475358static void llm_load_arch(llama_model_loader & ml, llama_model & model) {
53485359 model.arch = ml.get_arch();
53495360 if (model.arch == LLM_ARCH_UNKNOWN) {
@@ -9256,6 +9267,7 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam
92569267 throw std::runtime_error("error loading model vocabulary: " + std::string(e.what()));
92579268 }
92589269
9270+ llm_load_stats(ml, model);
92599271 llm_load_print_meta(ml, model);
92609272
92619273 if (model.vocab.type != LLAMA_VOCAB_TYPE_NONE &&
@@ -18601,6 +18613,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
1860118613 llama_model model;
1860218614 llm_load_arch(ml, model);
1860318615 llm_load_hparams(ml, model);
18616+ llm_load_stats(ml, model);
1860418617
1860518618 struct quantize_state_internal qs(model, params);
1860618619
@@ -19953,19 +19966,11 @@ int32_t llama_model_desc(const struct llama_model * model, char * buf, size_t bu
1995319966}
1995419967
1995519968uint64_t llama_model_size(const struct llama_model * model) {
19956- uint64_t size = 0;
19957- for (const auto & it : model->tensors_by_name) {
19958- size += ggml_nbytes(it.second);
19959- }
19960- return size;
19969+ return model->n_bytes;
1996119970}
1996219971
1996319972uint64_t llama_model_n_params(const struct llama_model * model) {
19964- uint64_t nparams = 0;
19965- for (const auto & it : model->tensors_by_name) {
19966- nparams += ggml_nelements(it.second);
19967- }
19968- return nparams;
19973+ return model->n_elements;
1996919974}
1997019975
1997119976struct ggml_tensor * llama_get_model_tensor(struct llama_model * model, const char * name) {
0 commit comments