@@ -189,3 +189,225 @@ struct ggml_tensor * llama_model_get_tensor(const struct llama_model & model, co
189189
190190 return it->second ;
191191}
192+
193+ size_t llama_model_max_nodes (const llama_model & model) {
194+ return std::max<size_t >(8192 , model.tensors_by_name .size ()*5 );
195+ }
196+
197+ //
198+ // interface implementation
199+ //
200+
201+ struct llama_model_params llama_model_default_params () {
202+ struct llama_model_params result = {
203+ /* .devices =*/ nullptr ,
204+ /* .n_gpu_layers =*/ 0 ,
205+ /* .split_mode =*/ LLAMA_SPLIT_MODE_LAYER,
206+ /* .main_gpu =*/ 0 ,
207+ /* .tensor_split =*/ nullptr ,
208+ /* .rpc_servers =*/ nullptr ,
209+ /* .progress_callback =*/ nullptr ,
210+ /* .progress_callback_user_data =*/ nullptr ,
211+ /* .kv_overrides =*/ nullptr ,
212+ /* .vocab_only =*/ false ,
213+ /* .use_mmap =*/ true ,
214+ /* .use_mlock =*/ false ,
215+ /* .check_tensors =*/ false ,
216+ };
217+
218+ #ifdef GGML_USE_METAL
219+ // note: we usually have plenty of VRAM, so by default offload all layers to the GPU
220+ result.n_gpu_layers = 999 ;
221+ #endif
222+
223+ return result;
224+ }
225+
226+ void llama_free_model (struct llama_model * model) {
227+ delete model;
228+ }
229+
230+ enum llama_vocab_type llama_vocab_type (const struct llama_model * model) {
231+ return model->vocab .type ;
232+ }
233+
234+ int32_t llama_n_vocab (const struct llama_model * model) {
235+ return model->hparams .n_vocab ;
236+ }
237+
238+ int32_t llama_n_ctx_train (const struct llama_model * model) {
239+ return model->hparams .n_ctx_train ;
240+ }
241+
242+ int32_t llama_n_embd (const struct llama_model * model) {
243+ return model->hparams .n_embd ;
244+ }
245+
246+ int32_t llama_n_layer (const struct llama_model * model) {
247+ return model->hparams .n_layer ;
248+ }
249+
250+ int32_t llama_n_head (const struct llama_model * model) {
251+ return model->hparams .n_head ();
252+ }
253+
254+ enum llama_rope_type llama_rope_type (const struct llama_model * model) {
255+ switch (model->arch ) {
256+ // these models do not use RoPE
257+ case LLM_ARCH_GPT2:
258+ case LLM_ARCH_GPTJ:
259+ case LLM_ARCH_MPT:
260+ case LLM_ARCH_REFACT:
261+ case LLM_ARCH_BLOOM:
262+ case LLM_ARCH_MAMBA:
263+ case LLM_ARCH_JINA_BERT_V2:
264+ case LLM_ARCH_T5:
265+ case LLM_ARCH_T5ENCODER:
266+ case LLM_ARCH_JAIS:
267+ case LLM_ARCH_RWKV6:
268+ case LLM_ARCH_WAVTOKENIZER_DEC:
269+ return LLAMA_ROPE_TYPE_NONE;
270+
271+ // use what we call a normal RoPE, operating on pairs of consecutive head values
272+ case LLM_ARCH_LLAMA:
273+ case LLM_ARCH_DECI:
274+ case LLM_ARCH_BAICHUAN:
275+ case LLM_ARCH_STARCODER:
276+ case LLM_ARCH_PLAMO:
277+ case LLM_ARCH_ORION:
278+ case LLM_ARCH_INTERNLM2:
279+ case LLM_ARCH_MINICPM:
280+ case LLM_ARCH_XVERSE:
281+ case LLM_ARCH_COMMAND_R:
282+ case LLM_ARCH_OLMO:
283+ case LLM_ARCH_ARCTIC:
284+ case LLM_ARCH_DEEPSEEK:
285+ case LLM_ARCH_DEEPSEEK2:
286+ case LLM_ARCH_CHATGLM:
287+ case LLM_ARCH_GRANITE:
288+ case LLM_ARCH_GRANITE_MOE:
289+ case LLM_ARCH_CHAMELEON:
290+ return LLAMA_ROPE_TYPE_NORM;
291+
292+ // the pairs of head values are offset by n_rot/2
293+ case LLM_ARCH_FALCON:
294+ case LLM_ARCH_GROK:
295+ case LLM_ARCH_DBRX:
296+ case LLM_ARCH_BERT:
297+ case LLM_ARCH_NOMIC_BERT:
298+ case LLM_ARCH_STABLELM:
299+ case LLM_ARCH_BITNET:
300+ case LLM_ARCH_QWEN:
301+ case LLM_ARCH_QWEN2:
302+ case LLM_ARCH_QWEN2MOE:
303+ case LLM_ARCH_OLMO2:
304+ case LLM_ARCH_OLMOE:
305+ case LLM_ARCH_PHI2:
306+ case LLM_ARCH_PHI3:
307+ case LLM_ARCH_GEMMA:
308+ case LLM_ARCH_GEMMA2:
309+ case LLM_ARCH_STARCODER2:
310+ case LLM_ARCH_OPENELM:
311+ case LLM_ARCH_GPTNEOX:
312+ case LLM_ARCH_CODESHELL:
313+ case LLM_ARCH_NEMOTRON:
314+ case LLM_ARCH_EXAONE:
315+ case LLM_ARCH_MINICPM3:
316+ return LLAMA_ROPE_TYPE_NEOX;
317+
318+ case LLM_ARCH_QWEN2VL:
319+ return LLAMA_ROPE_TYPE_MROPE;
320+
321+ // all model arches should be listed explicitly here
322+ case LLM_ARCH_UNKNOWN:
323+ GGML_ABORT (" unknown architecture" );
324+ }
325+
326+ return LLAMA_ROPE_TYPE_NONE;
327+ }
328+
329+ float llama_rope_freq_scale_train (const struct llama_model * model) {
330+ return model->hparams .rope_freq_scale_train ;
331+ }
332+
333+ int32_t llama_model_meta_val_str (const struct llama_model * model, const char * key, char * buf, size_t buf_size) {
334+ const auto & it = model->gguf_kv .find (key);
335+ if (it == model->gguf_kv .end ()) {
336+ if (buf_size > 0 ) {
337+ buf[0 ] = ' \0 ' ;
338+ }
339+ return -1 ;
340+ }
341+ return snprintf (buf, buf_size, " %s" , it->second .c_str ());
342+ }
343+
344+ int32_t llama_model_meta_count (const struct llama_model * model) {
345+ return (int )model->gguf_kv .size ();
346+ }
347+
348+ int32_t llama_model_meta_key_by_index (const struct llama_model * model, int i, char * buf, size_t buf_size) {
349+ if (i < 0 || i >= (int )model->gguf_kv .size ()) {
350+ if (buf_size > 0 ) {
351+ buf[0 ] = ' \0 ' ;
352+ }
353+ return -1 ;
354+ }
355+ auto it = model->gguf_kv .begin ();
356+ std::advance (it, i);
357+ return snprintf (buf, buf_size, " %s" , it->first .c_str ());
358+ }
359+
360+ int32_t llama_model_meta_val_str_by_index (const struct llama_model * model, int32_t i, char * buf, size_t buf_size) {
361+ if (i < 0 || i >= (int )model->gguf_kv .size ()) {
362+ if (buf_size > 0 ) {
363+ buf[0 ] = ' \0 ' ;
364+ }
365+ return -1 ;
366+ }
367+ auto it = model->gguf_kv .begin ();
368+ std::advance (it, i);
369+ return snprintf (buf, buf_size, " %s" , it->second .c_str ());
370+ }
371+
372+ int32_t llama_model_desc (const struct llama_model * model, char * buf, size_t buf_size) {
373+ return snprintf (buf, buf_size, " %s %s %s" ,
374+ llama_model_arch_name (*model).c_str (),
375+ llama_model_type_name (*model).c_str (),
376+ llama_model_ftype_name (*model).c_str ());
377+ }
378+
379+ uint64_t llama_model_size (const struct llama_model * model) {
380+ return model->n_bytes ;
381+ }
382+
383+ uint64_t llama_model_n_params (const struct llama_model * model) {
384+ return model->n_elements ;
385+ }
386+
387+ bool llama_model_has_encoder (const struct llama_model * model) {
388+ switch (model->arch ) {
389+ case LLM_ARCH_T5: return true ;
390+ case LLM_ARCH_T5ENCODER: return true ;
391+ default : return false ;
392+ }
393+ }
394+
395+ bool llama_model_has_decoder (const struct llama_model * model) {
396+ switch (model->arch ) {
397+ case LLM_ARCH_T5ENCODER: return false ;
398+ default : return true ;
399+ }
400+ }
401+
402+ llama_token llama_model_decoder_start_token (const struct llama_model * model) {
403+ return model->hparams .dec_start_token_id ;
404+ }
405+
406+ bool llama_model_is_recurrent (const struct llama_model * model) {
407+ switch (model->arch ) {
408+ case LLM_ARCH_MAMBA: return true ;
409+ case LLM_ARCH_RWKV6: return true ;
410+ default : return false ;
411+ }
412+ }
413+
0 commit comments