1515#define LLAMA_MAX_LAYERS 512
1616#define LLAMA_MAX_EXPERTS 160 // DeepSeekV2
1717
18- // available llama models
19- enum e_model {
18+ // available models
19+ // TODO: this enum does not follow the enum naming convention
20+ enum llm_type {
2021 MODEL_UNKNOWN,
2122 MODEL_14M,
2223 MODEL_17M,
@@ -81,73 +82,6 @@ enum e_model {
8182 MODEL_27B,
8283};
8384
84- static const char * llama_model_type_name (e_model type) {
85- switch (type) {
86- case MODEL_14M: return " 14M" ;
87- case MODEL_17M: return " 17M" ;
88- case MODEL_22M: return " 22M" ;
89- case MODEL_33M: return " 33M" ;
90- case MODEL_60M: return " 60M" ;
91- case MODEL_70M: return " 70M" ;
92- case MODEL_80M: return " 80M" ;
93- case MODEL_109M: return " 109M" ;
94- case MODEL_137M: return " 137M" ;
95- case MODEL_160M: return " 160M" ;
96- case MODEL_220M: return " 220M" ;
97- case MODEL_250M: return " 250M" ;
98- case MODEL_270M: return " 270M" ;
99- case MODEL_335M: return " 335M" ;
100- case MODEL_410M: return " 410M" ;
101- case MODEL_450M: return " 450M" ;
102- case MODEL_770M: return " 770M" ;
103- case MODEL_780M: return " 780M" ;
104- case MODEL_0_5B: return " 0.5B" ;
105- case MODEL_1B: return " 1B" ;
106- case MODEL_1_3B: return " 1.3B" ;
107- case MODEL_1_4B: return " 1.4B" ;
108- case MODEL_1_5B: return " 1.5B" ;
109- case MODEL_1_6B: return " 1.6B" ;
110- case MODEL_2B: return " 2B" ;
111- case MODEL_2_8B: return " 2.8B" ;
112- case MODEL_3B: return " 3B" ;
113- case MODEL_4B: return " 4B" ;
114- case MODEL_6B: return " 6B" ;
115- case MODEL_6_9B: return " 6.9B" ;
116- case MODEL_7B: return " 7B" ;
117- case MODEL_8B: return " 8B" ;
118- case MODEL_9B: return " 9B" ;
119- case MODEL_11B: return " 11B" ;
120- case MODEL_12B: return " 12B" ;
121- case MODEL_13B: return " 13B" ;
122- case MODEL_14B: return " 14B" ;
123- case MODEL_15B: return " 15B" ;
124- case MODEL_16B: return " 16B" ;
125- case MODEL_20B: return " 20B" ;
126- case MODEL_30B: return " 30B" ;
127- case MODEL_32B: return " 32B" ;
128- case MODEL_34B: return " 34B" ;
129- case MODEL_35B: return " 35B" ;
130- case MODEL_40B: return " 40B" ;
131- case MODEL_65B: return " 65B" ;
132- case MODEL_70B: return " 70B" ;
133- case MODEL_236B: return " 236B" ;
134- case MODEL_314B: return " 314B" ;
135- case MODEL_SMALL: return " 0.1B" ;
136- case MODEL_MEDIUM: return " 0.4B" ;
137- case MODEL_LARGE: return " 0.8B" ;
138- case MODEL_XL: return " 1.5B" ;
139- case MODEL_A1_7B: return " A1.7B" ;
140- case MODEL_A2_7B: return " A2.7B" ;
141- case MODEL_8x7B: return " 8x7B" ;
142- case MODEL_8x22B: return " 8x22B" ;
143- case MODEL_16x12B: return " 16x12B" ;
144- case MODEL_10B_128x3_66B: return " 10B+128x3.66B" ;
145- case MODEL_57B_A14B: return " 57B.A14B" ;
146- case MODEL_27B: return " 27B" ;
147- default : return " ?B" ;
148- }
149- }
150-
15185struct llama_hparams_posnet {
15286 uint32_t n_embd;
15387 uint32_t n_layer;
@@ -187,27 +121,27 @@ struct llama_hparams {
187121 std::array<uint32_t , LLAMA_MAX_LAYERS> n_ff_arr;
188122
189123 uint32_t n_layer_dense_lead = 0 ;
190- uint32_t n_lora_q = 0 ;
191- uint32_t n_lora_kv = 0 ;
192- uint32_t n_ff_exp = 0 ;
193- uint32_t n_ff_shexp = 0 ;
194- uint32_t n_expert_shared = 0 ;
195- float expert_weights_scale = 0.0 ;
124+ uint32_t n_lora_q = 0 ;
125+ uint32_t n_lora_kv = 0 ;
126+ uint32_t n_ff_exp = 0 ;
127+ uint32_t n_ff_shexp = 0 ;
128+ uint32_t n_expert_shared = 0 ;
129+ uint32_t n_norm_groups = 0 ;
130+
131+ float expert_weights_scale = 0.0 ;
196132
197133 float f_norm_eps;
198134 float f_norm_rms_eps;
199135 float f_norm_group_eps;
200136
201- uint32_t n_norm_groups;
202-
203- float f_attn_logit_softcapping = 50 .0f ;
137+ float f_attn_logit_softcapping = 50 .0f ;
204138 float f_final_logit_softcapping = 30 .0f ;
205139
206140 // for RWKV
207141 uint32_t rescale_every_n_layers = 0 ;
208- uint32_t time_mix_extra_dim = 0 ;
209- uint32_t time_decay_extra_dim = 0 ;
210- uint32_t wkv_head_size = 0 ;
142+ uint32_t time_mix_extra_dim = 0 ;
143+ uint32_t time_decay_extra_dim = 0 ;
144+ uint32_t wkv_head_size = 0 ;
211145
212146 float rope_attn_factor = 1 .0f ;
213147 float rope_freq_base_train;
@@ -221,6 +155,7 @@ struct llama_hparams {
221155 uint32_t ssm_d_inner = 0 ;
222156 uint32_t ssm_d_state = 0 ;
223157 uint32_t ssm_dt_rank = 0 ;
158+
224159 bool ssm_dt_b_c_rms = false ;
225160
226161 float f_clamp_kqv = 0 .0f ;
@@ -518,34 +453,35 @@ struct llama_layer {
518453};
519454
520455struct llama_model {
521- e_model type = MODEL_UNKNOWN;
522- llm_arch arch = LLM_ARCH_UNKNOWN;
456+ llm_type type = MODEL_UNKNOWN;
457+ llm_arch arch = LLM_ARCH_UNKNOWN;
458+
523459 llama_ftype ftype = LLAMA_FTYPE_ALL_F32;
524460
525461 std::string name = " n/a" ;
526462
527463 llama_hparams hparams = {};
528464 llama_vocab vocab;
529465
530- struct ggml_tensor * tok_embd = nullptr ;
531- struct ggml_tensor * type_embd = nullptr ;
532- struct ggml_tensor * pos_embd = nullptr ;
533- struct ggml_tensor * tok_norm = nullptr ;
466+ struct ggml_tensor * tok_embd = nullptr ;
467+ struct ggml_tensor * type_embd = nullptr ;
468+ struct ggml_tensor * pos_embd = nullptr ;
469+ struct ggml_tensor * tok_norm = nullptr ;
534470 struct ggml_tensor * tok_norm_b = nullptr ;
535471
536- struct ggml_tensor * output_norm = nullptr ;
537- struct ggml_tensor * output_norm_b = nullptr ;
538- struct ggml_tensor * output = nullptr ;
539- struct ggml_tensor * output_b = nullptr ;
472+ struct ggml_tensor * output_norm = nullptr ;
473+ struct ggml_tensor * output_norm_b = nullptr ;
474+ struct ggml_tensor * output = nullptr ;
475+ struct ggml_tensor * output_b = nullptr ;
540476 struct ggml_tensor * output_norm_enc = nullptr ;
541477
542478 // classifier
543- struct ggml_tensor * cls = nullptr ;
544- struct ggml_tensor * cls_b = nullptr ;
479+ struct ggml_tensor * cls = nullptr ;
480+ struct ggml_tensor * cls_b = nullptr ;
545481 struct ggml_tensor * cls_out = nullptr ;
546482 struct ggml_tensor * cls_out_b = nullptr ;
547483
548- struct ggml_tensor * conv1d = nullptr ;
484+ struct ggml_tensor * conv1d = nullptr ;
549485 struct ggml_tensor * conv1d_b = nullptr ;
550486
551487 std::vector<llama_layer> layers;
@@ -611,6 +547,11 @@ struct llama_model {
611547 }
612548};
613549
614- ggml_backend_buffer_type_t llama_model_select_buft ( const llama_model & model, int il );
550+ const char * llm_type_name (llm_type type );
615551
616- std::string llama_model_ftype_name (llama_ftype ftype);
552+ std::string llama_model_arch_name (const llama_model & model);
553+ std::string llama_model_type_name (const llama_model & model);
554+ std::string llama_model_ftype_name (const llama_model & model);
555+
556+ // TODO: this probably belongs to llama-adapter
557+ ggml_backend_buffer_type_t llama_model_select_buft (const llama_model & model, int il);
0 commit comments