1111#include < sstream>
1212#include < stdexcept>
1313
14- static const size_t kiB = 1024 ;
15- static const size_t MiB = 1024 *kiB;
16- static const size_t GiB = 1024 *MiB;
17-
1814const char * llm_type_name (llm_type type) {
1915 switch (type) {
2016 case MODEL_14M: return " 14M" ;
@@ -83,49 +79,6 @@ const char * llm_type_name(llm_type type) {
8379 }
8480}
8581
86- static std::string llama_model_ftype_name (llama_ftype ftype) {
87- if (ftype & LLAMA_FTYPE_GUESSED) {
88- return llama_model_ftype_name ((enum llama_ftype) (ftype & ~LLAMA_FTYPE_GUESSED)) + " (guessed)" ;
89- }
90-
91- switch (ftype) {
92- case LLAMA_FTYPE_ALL_F32: return " all F32" ;
93- case LLAMA_FTYPE_MOSTLY_F16: return " F16" ;
94- case LLAMA_FTYPE_MOSTLY_BF16: return " BF16" ;
95- case LLAMA_FTYPE_MOSTLY_Q4_0: return " Q4_0" ;
96- case LLAMA_FTYPE_MOSTLY_Q4_1: return " Q4_1" ;
97- case LLAMA_FTYPE_MOSTLY_Q5_0: return " Q5_0" ;
98- case LLAMA_FTYPE_MOSTLY_Q5_1: return " Q5_1" ;
99- case LLAMA_FTYPE_MOSTLY_Q8_0: return " Q8_0" ;
100- case LLAMA_FTYPE_MOSTLY_Q2_K: return " Q2_K - Medium" ;
101- case LLAMA_FTYPE_MOSTLY_Q2_K_S: return " Q2_K - Small" ;
102- case LLAMA_FTYPE_MOSTLY_Q3_K_S: return " Q3_K - Small" ;
103- case LLAMA_FTYPE_MOSTLY_Q3_K_M: return " Q3_K - Medium" ;
104- case LLAMA_FTYPE_MOSTLY_Q3_K_L: return " Q3_K - Large" ;
105- case LLAMA_FTYPE_MOSTLY_Q4_K_S: return " Q4_K - Small" ;
106- case LLAMA_FTYPE_MOSTLY_Q4_K_M: return " Q4_K - Medium" ;
107- case LLAMA_FTYPE_MOSTLY_Q5_K_S: return " Q5_K - Small" ;
108- case LLAMA_FTYPE_MOSTLY_Q5_K_M: return " Q5_K - Medium" ;
109- case LLAMA_FTYPE_MOSTLY_Q6_K: return " Q6_K" ;
110- case LLAMA_FTYPE_MOSTLY_TQ1_0: return " TQ1_0 - 1.69 bpw ternary" ;
111- case LLAMA_FTYPE_MOSTLY_TQ2_0: return " TQ2_0 - 2.06 bpw ternary" ;
112- case LLAMA_FTYPE_MOSTLY_IQ2_XXS: return " IQ2_XXS - 2.0625 bpw" ;
113- case LLAMA_FTYPE_MOSTLY_IQ2_XS: return " IQ2_XS - 2.3125 bpw" ;
114- case LLAMA_FTYPE_MOSTLY_IQ2_S: return " IQ2_S - 2.5 bpw" ;
115- case LLAMA_FTYPE_MOSTLY_IQ2_M: return " IQ2_M - 2.7 bpw" ;
116- case LLAMA_FTYPE_MOSTLY_IQ3_XS: return " IQ3_XS - 3.3 bpw" ;
117- case LLAMA_FTYPE_MOSTLY_IQ3_XXS: return " IQ3_XXS - 3.0625 bpw" ;
118- case LLAMA_FTYPE_MOSTLY_IQ1_S: return " IQ1_S - 1.5625 bpw" ;
119- case LLAMA_FTYPE_MOSTLY_IQ1_M: return " IQ1_M - 1.75 bpw" ;
120- case LLAMA_FTYPE_MOSTLY_IQ4_NL: return " IQ4_NL - 4.5 bpw" ;
121- case LLAMA_FTYPE_MOSTLY_IQ4_XS: return " IQ4_XS - 4.25 bpw" ;
122- case LLAMA_FTYPE_MOSTLY_IQ3_S: return " IQ3_S - 3.4375 bpw" ;
123- case LLAMA_FTYPE_MOSTLY_IQ3_M: return " IQ3_S mix - 3.66 bpw" ;
124-
125- default : return " unknown, may not work" ;
126- }
127- }
128-
12982static const char * llama_expert_gating_func_name (llama_expert_gating_func_type type) {
13083 switch (type) {
13184 case LLAMA_EXPERT_GATING_FUNC_TYPE_SOFTMAX: return " softmax" ;
@@ -142,10 +95,6 @@ std::string llama_model::type_name() const {
14295 return llm_type_name (type);
14396}
14497
145- std::string llama_model::ftype_name () const {
146- return llama_model_ftype_name (ftype);
147- }
148-
14998template <typename F>
15099static bool buft_supported (ggml_backend_buffer_type_t buft, ggml_backend_dev_t dev, F & fn) {
151100 ggml_init_params params = {
@@ -208,6 +157,10 @@ const struct ggml_tensor * llama_model::get_tensor(const char * name) const {
208157 return it->second ;
209158}
210159
160+ size_t llama_model::size () const {
161+ return n_bytes;
162+ }
163+
211164size_t llama_model::max_nodes () const {
212165 return std::max<size_t >(8192 , tensors_by_name.size ()*5 );
213166}
@@ -1100,7 +1053,9 @@ void llama_model::load_hparams(llama_model_loader & ml) {
11001053 default : throw std::runtime_error (" unsupported model architecture" );
11011054 }
11021055
1103- ftype = ml.ftype ;
1056+ n_bytes = ml.n_bytes ;
1057+
1058+ desc_str = arch_name () + " " + type_name () + " " + ml.ftype_name ();
11041059
11051060 if (hparams.f_max_alibi_bias > 0 .0f ) {
11061061 hparams.use_alibi = true ;
@@ -1820,7 +1775,11 @@ void llama_model::load_vocab(llama_model_loader & ml) {
18201775 }
18211776}
18221777
1823- void llama_model::print_meta (llama_model_loader & ml) {
1778+ std::string llama_model::desc () const {
1779+ return desc_str;
1780+ }
1781+
1782+ void llama_model::print_info () const {
18241783 const char * rope_scaling_type = LLAMA_ROPE_SCALING_TYPES.at (hparams.rope_scaling_type_train );
18251784
18261785 auto print_f = [](const std::function<uint32_t (uint32_t )> & f, uint32_t n) {
@@ -1853,7 +1812,6 @@ void llama_model::print_meta(llama_model_loader & ml) {
18531812 };
18541813
18551814 // hparams
1856- LLAMA_LOG_INFO (" %s: format = %s\n " , __func__, llama_file_version_name (ml.fver ));
18571815 LLAMA_LOG_INFO (" %s: arch = %s\n " , __func__, arch_name ().c_str ());
18581816 LLAMA_LOG_INFO (" %s: vocab type = %s\n " , __func__, llama_model_vocab_type_name (vocab.type ));
18591817 LLAMA_LOG_INFO (" %s: n_vocab = %u\n " , __func__, hparams.n_vocab );
@@ -1897,20 +1855,14 @@ void llama_model::print_meta(llama_model_loader & ml) {
18971855 }
18981856
18991857 LLAMA_LOG_INFO (" %s: model type = %s\n " , __func__, type_name ().c_str ());
1900- LLAMA_LOG_INFO (" %s: model ftype = %s\n " , __func__, ftype_name ().c_str ());
1901- if (ml.n_elements >= 1e12 ) {
1902- LLAMA_LOG_INFO (" %s: model params = %.2f T\n " , __func__, ml.n_elements *1e-12 );
1903- } else if (ml.n_elements >= 1e9 ) {
1904- LLAMA_LOG_INFO (" %s: model params = %.2f B\n " , __func__, ml.n_elements *1e-9 );
1905- } else if (ml.n_elements >= 1e6 ) {
1906- LLAMA_LOG_INFO (" %s: model params = %.2f M\n " , __func__, ml.n_elements *1e-6 );
1907- } else {
1908- LLAMA_LOG_INFO (" %s: model params = %.2f K\n " , __func__, ml.n_elements *1e-3 );
1909- }
1910- if (ml.n_bytes < GiB) {
1911- LLAMA_LOG_INFO (" %s: model size = %.2f MiB (%.2f BPW) \n " , __func__, ml.n_bytes /1024.0 /1024.0 , ml.n_bytes *8.0 /ml.n_elements );
1858+ if (n_elements >= 1e12 ) {
1859+ LLAMA_LOG_INFO (" %s: model params = %.2f T\n " , __func__, n_elements*1e-12 );
1860+ } else if (n_elements >= 1e9 ) {
1861+ LLAMA_LOG_INFO (" %s: model params = %.2f B\n " , __func__, n_elements*1e-9 );
1862+ } else if (n_elements >= 1e6 ) {
1863+ LLAMA_LOG_INFO (" %s: model params = %.2f M\n " , __func__, n_elements*1e-6 );
19121864 } else {
1913- LLAMA_LOG_INFO (" %s: model size = %.2f GiB (%.2f BPW) \n " , __func__, ml. n_bytes / 1024.0 / 1024.0 / 1024.0 , ml. n_bytes * 8.0 /ml. n_elements );
1865+ LLAMA_LOG_INFO (" %s: model params = %.2f K \n " , __func__, n_elements* 1e-3 );
19141866 }
19151867
19161868 // general kv
@@ -2154,14 +2106,11 @@ int32_t llama_model_meta_val_str_by_index(const struct llama_model * model, int3
21542106}
21552107
21562108int32_t llama_model_desc (const struct llama_model * model, char * buf, size_t buf_size) {
2157- return snprintf (buf, buf_size, " %s %s %s" ,
2158- model->arch_name ().c_str (),
2159- model->type_name ().c_str (),
2160- model->ftype_name ().c_str ());
2109+ return snprintf (buf, buf_size, " %s" , model->desc ().c_str ());
21612110}
21622111
21632112uint64_t llama_model_size (const struct llama_model * model) {
2164- return model->n_bytes ;
2113+ return model->size () ;
21652114}
21662115
21672116uint64_t llama_model_n_params (const struct llama_model * model) {
0 commit comments