@@ -296,7 +296,9 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
296296# LLAMA_VOCAB_PRE_TYPE_GPT2 = 7,
297297# LLAMA_VOCAB_PRE_TYPE_REFACT = 8,
298298# LLAMA_VOCAB_PRE_TYPE_COMMAND_R = 9,
299- # LLAMA_VOCAB_PRE_TYPE_OLMO = 10,
299+ # LLAMA_VOCAB_PRE_TYPE_QWEN2 = 10,
300+ # LLAMA_VOCAB_PRE_TYPE_OLMO = 11,
301+ # LLAMA_VOCAB_PRE_TYPE_DBRX = 12,
300302# };
301303LLAMA_VOCAB_PRE_TYPE_DEFAULT = 0
302304LLAMA_VOCAB_PRE_TYPE_LLAMA3 = 1
@@ -308,7 +310,9 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
308310LLAMA_VOCAB_PRE_TYPE_GPT2 = 7
309311LLAMA_VOCAB_PRE_TYPE_REFACT = 8
310312LLAMA_VOCAB_PRE_TYPE_COMMAND_R = 9
311- LLAMA_VOCAB_PRE_TYPE_OLMO = 10
313+ LLAMA_VOCAB_PRE_TYPE_QWEN2 = 10
314+ LLAMA_VOCAB_PRE_TYPE_OLMO = 11
315+ LLAMA_VOCAB_PRE_TYPE_DBRX = 12
312316
313317
314318# // note: these values should be synchronized with ggml_rope
@@ -377,6 +381,7 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
377381# LLAMA_FTYPE_MOSTLY_IQ2_M = 29, // except 1d tensors
378382# LLAMA_FTYPE_MOSTLY_IQ4_XS = 30, // except 1d tensors
379383# LLAMA_FTYPE_MOSTLY_IQ1_M = 31, // except 1d tensors
384+ # LLAMA_FTYPE_MOSTLY_BF16 = 32, // except 1d tensors
380385
381386# LLAMA_FTYPE_GUESSED = 1024, // not specified in the model file
382387# };
@@ -409,6 +414,8 @@ def byref(obj: CtypesCData, offset: Optional[int] = None) -> CtypesRef[CtypesCDa
409414LLAMA_FTYPE_MOSTLY_IQ2_S = 28
410415LLAMA_FTYPE_MOSTLY_IQ2_M = 29
411416LLAMA_FTYPE_MOSTLY_IQ4_XS = 30
417+ LLAMA_FTYPE_MOSTLY_IQ1_M = 31
418+ LLAMA_FTYPE_MOSTLY_BF16 = 32
412419LLAMA_FTYPE_GUESSED = 1024
413420
414421# enum llama_rope_scaling_type {
0 commit comments