| 
8 | 8 | 
 
  | 
9 | 9 | // pre-tokenization types  | 
10 | 10 | enum llama_vocab_pre_type {  | 
11 |  | -    LLAMA_VOCAB_PRE_TYPE_DEFAULT        = 0,  | 
12 |  | -    LLAMA_VOCAB_PRE_TYPE_LLAMA3         = 1,  | 
13 |  | -    LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM   = 2,  | 
14 |  | -    LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER = 3,  | 
15 |  | -    LLAMA_VOCAB_PRE_TYPE_FALCON         = 4,  | 
16 |  | -    LLAMA_VOCAB_PRE_TYPE_MPT            = 5,  | 
17 |  | -    LLAMA_VOCAB_PRE_TYPE_STARCODER      = 6,  | 
18 |  | -    LLAMA_VOCAB_PRE_TYPE_GPT2           = 7,  | 
19 |  | -    LLAMA_VOCAB_PRE_TYPE_REFACT         = 8,  | 
20 |  | -    LLAMA_VOCAB_PRE_TYPE_COMMAND_R      = 9,  | 
21 |  | -    LLAMA_VOCAB_PRE_TYPE_STABLELM2      = 10,  | 
22 |  | -    LLAMA_VOCAB_PRE_TYPE_QWEN2          = 11,  | 
23 |  | -    LLAMA_VOCAB_PRE_TYPE_OLMO           = 12,  | 
24 |  | -    LLAMA_VOCAB_PRE_TYPE_DBRX           = 13,  | 
25 |  | -    LLAMA_VOCAB_PRE_TYPE_SMAUG          = 14,  | 
26 |  | -    LLAMA_VOCAB_PRE_TYPE_PORO           = 15,  | 
27 |  | -    LLAMA_VOCAB_PRE_TYPE_CHATGLM3       = 16,  | 
28 |  | -    LLAMA_VOCAB_PRE_TYPE_CHATGLM4       = 17,  | 
29 |  | -    LLAMA_VOCAB_PRE_TYPE_VIKING         = 18,  | 
30 |  | -    LLAMA_VOCAB_PRE_TYPE_JAIS           = 19,  | 
31 |  | -    LLAMA_VOCAB_PRE_TYPE_TEKKEN         = 20,  | 
32 |  | -    LLAMA_VOCAB_PRE_TYPE_SMOLLM         = 21,  | 
33 |  | -    LLAMA_VOCAB_PRE_TYPE_CODESHELL      = 22,  | 
34 |  | -    LLAMA_VOCAB_PRE_TYPE_BLOOM          = 23,  | 
35 |  | -    LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH   = 24,  | 
36 |  | -    LLAMA_VOCAB_PRE_TYPE_EXAONE         = 25,  | 
37 |  | -    LLAMA_VOCAB_PRE_TYPE_CHAMELEON      = 26,  | 
38 |  | -    LLAMA_VOCAB_PRE_TYPE_MINERVA        = 27,  | 
39 |  | -    LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM  = 28,  | 
40 |  | -    LLAMA_VOCAB_PRE_TYPE_GPT4O          = 29,  | 
41 |  | -    LLAMA_VOCAB_PRE_TYPE_SUPERBPE       = 30,  | 
42 |  | -    LLAMA_VOCAB_PRE_TYPE_TRILLION       = 31,  | 
43 |  | -    LLAMA_VOCAB_PRE_TYPE_BAILINGMOE     = 32,  | 
44 |  | -    LLAMA_VOCAB_PRE_TYPE_LLAMA4         = 33,  | 
45 |  | -    LLAMA_VOCAB_PRE_TYPE_PIXTRAL        = 34,  | 
46 |  | -    LLAMA_VOCAB_PRE_TYPE_SEED_CODER     = 35,  | 
47 |  | -    LLAMA_VOCAB_PRE_TYPE_HUNYUAN        = 36,  | 
48 |  | -    LLAMA_VOCAB_PRE_TYPE_KIMI_K2        = 37,  | 
49 |  | -    LLAMA_VOCAB_PRE_TYPE_HUNYUAN_DENSE  = 38,  | 
50 |  | -    LLAMA_VOCAB_PRE_TYPE_GROK_2         = 39,  | 
 | 11 | +    LLAMA_VOCAB_PRE_TYPE_DEFAULT         = 0,  | 
 | 12 | +    LLAMA_VOCAB_PRE_TYPE_LLAMA3          = 1,  | 
 | 13 | +    LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM    = 2,  | 
 | 14 | +    LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER  = 3,  | 
 | 15 | +    LLAMA_VOCAB_PRE_TYPE_FALCON          = 4,  | 
 | 16 | +    LLAMA_VOCAB_PRE_TYPE_MPT             = 5,  | 
 | 17 | +    LLAMA_VOCAB_PRE_TYPE_STARCODER       = 6,  | 
 | 18 | +    LLAMA_VOCAB_PRE_TYPE_GPT2            = 7,  | 
 | 19 | +    LLAMA_VOCAB_PRE_TYPE_REFACT          = 8,  | 
 | 20 | +    LLAMA_VOCAB_PRE_TYPE_COMMAND_R       = 9,  | 
 | 21 | +    LLAMA_VOCAB_PRE_TYPE_STABLELM2       = 10,  | 
 | 22 | +    LLAMA_VOCAB_PRE_TYPE_QWEN2           = 11,  | 
 | 23 | +    LLAMA_VOCAB_PRE_TYPE_OLMO            = 12,  | 
 | 24 | +    LLAMA_VOCAB_PRE_TYPE_DBRX            = 13,  | 
 | 25 | +    LLAMA_VOCAB_PRE_TYPE_SMAUG           = 14,  | 
 | 26 | +    LLAMA_VOCAB_PRE_TYPE_PORO            = 15,  | 
 | 27 | +    LLAMA_VOCAB_PRE_TYPE_CHATGLM3        = 16,  | 
 | 28 | +    LLAMA_VOCAB_PRE_TYPE_CHATGLM4        = 17,  | 
 | 29 | +    LLAMA_VOCAB_PRE_TYPE_VIKING          = 18,  | 
 | 30 | +    LLAMA_VOCAB_PRE_TYPE_JAIS            = 19,  | 
 | 31 | +    LLAMA_VOCAB_PRE_TYPE_TEKKEN          = 20,  | 
 | 32 | +    LLAMA_VOCAB_PRE_TYPE_SMOLLM          = 21,  | 
 | 33 | +    LLAMA_VOCAB_PRE_TYPE_CODESHELL       = 22,  | 
 | 34 | +    LLAMA_VOCAB_PRE_TYPE_BLOOM           = 23,  | 
 | 35 | +    LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH    = 24,  | 
 | 36 | +    LLAMA_VOCAB_PRE_TYPE_EXAONE          = 25,  | 
 | 37 | +    LLAMA_VOCAB_PRE_TYPE_CHAMELEON       = 26,  | 
 | 38 | +    LLAMA_VOCAB_PRE_TYPE_MINERVA         = 27,  | 
 | 39 | +    LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM   = 28,  | 
 | 40 | +    LLAMA_VOCAB_PRE_TYPE_GPT4O           = 29,  | 
 | 41 | +    LLAMA_VOCAB_PRE_TYPE_SUPERBPE        = 30,  | 
 | 42 | +    LLAMA_VOCAB_PRE_TYPE_TRILLION        = 31,  | 
 | 43 | +    LLAMA_VOCAB_PRE_TYPE_BAILINGMOE      = 32,  | 
 | 44 | +    LLAMA_VOCAB_PRE_TYPE_LLAMA4          = 33,  | 
 | 45 | +    LLAMA_VOCAB_PRE_TYPE_PIXTRAL         = 34,  | 
 | 46 | +    LLAMA_VOCAB_PRE_TYPE_SEED_CODER      = 35,  | 
 | 47 | +    LLAMA_VOCAB_PRE_TYPE_HUNYUAN         = 36,  | 
 | 48 | +    LLAMA_VOCAB_PRE_TYPE_KIMI_K2         = 37,  | 
 | 49 | +    LLAMA_VOCAB_PRE_TYPE_HUNYUAN_DENSE   = 38,  | 
 | 50 | +    LLAMA_VOCAB_PRE_TYPE_GROK_2          = 39,  | 
 | 51 | +    LLAMA_VOCAB_PRE_TYPE_GRANITE_DOCLING = 40,  | 
51 | 52 | };  | 
52 | 53 | 
 
  | 
53 | 54 | struct LLM_KV;  | 
 | 
0 commit comments