Skip to content

Commit 0a0f91d

Browse files
authored
save number of parameters and the size in llama_model
#10285
1 parent af148c9 commit 0a0f91d

File tree

1 file changed

+15
-12
lines changed

1 file changed

+15
-12
lines changed

src/llama.cpp

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2907,6 +2907,9 @@ struct llama_model {
29072907
// for quantize-stats only
29082908
std::vector<std::pair<std::string, struct ggml_tensor *>> tensors_by_name;
29092909

2910+
uint64_t n_bytes = 0;
2911+
uint64_t n_elements = 0;
2912+
29102913
int64_t t_load_us = 0;
29112914
int64_t t_start_us = 0;
29122915

@@ -5344,6 +5347,11 @@ static const char * llama_model_vocab_type_name(enum llama_vocab_type type){
53445347
}
53455348
}
53465349

5350+
static void llm_load_stats(llama_model_loader &ml, llama_model &model) {
5351+
model.n_elements = ml.n_elements;
5352+
model.n_bytes = ml.n_bytes;
5353+
}
5354+
53475355
static void llm_load_arch(llama_model_loader & ml, llama_model & model) {
53485356
model.arch = ml.get_arch();
53495357
if (model.arch == LLM_ARCH_UNKNOWN) {
@@ -9252,6 +9260,8 @@ static int llama_model_load(const std::string & fname, llama_model & model, llam
92529260
throw std::runtime_error("error loading model vocabulary: " + std::string(e.what()));
92539261
}
92549262

9263+
llm_load_stats(ml, model);
9264+
92559265
llm_load_print_meta(ml, model);
92569266

92579267
if (model.vocab.type != LLAMA_VOCAB_TYPE_NONE &&
@@ -18597,6 +18607,7 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
1859718607
llama_model model;
1859818608
llm_load_arch(ml, model);
1859918609
llm_load_hparams(ml, model);
18610+
llm_load_stats(ml, model);
1860018611

1860118612
struct quantize_state_internal qs(model, params);
1860218613

@@ -19948,20 +19959,12 @@ int32_t llama_model_desc(const struct llama_model * model, char * buf, size_t bu
1994819959
llama_model_ftype_name(model->ftype).c_str());
1994919960
}
1995019961

19951-
uint64_t llama_model_size(const struct llama_model * model) {
19952-
uint64_t size = 0;
19953-
for (const auto & it : model->tensors_by_name) {
19954-
size += ggml_nbytes(it.second);
19955-
}
19956-
return size;
19962+
uint64_t llama_model_size(const struct llama_model *model) {
19963+
return model->n_bytes;
1995719964
}
1995819965

19959-
uint64_t llama_model_n_params(const struct llama_model * model) {
19960-
uint64_t nparams = 0;
19961-
for (const auto & it : model->tensors_by_name) {
19962-
nparams += ggml_nelements(it.second);
19963-
}
19964-
return nparams;
19966+
uint64_t llama_model_n_params(const struct llama_model *model) {
19967+
return model->n_elements;
1996519968
}
1996619969

1996719970
struct ggml_tensor * llama_get_model_tensor(struct llama_model * model, const char * name) {

0 commit comments

Comments
 (0)