|
8 | 8 |
|
9 | 9 | // pre-tokenization types
|
10 | 10 | enum llama_vocab_pre_type {
|
11 |
| - LLAMA_VOCAB_PRE_TYPE_DEFAULT = 0, |
12 |
| - LLAMA_VOCAB_PRE_TYPE_LLAMA3 = 1, |
13 |
| - LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM = 2, |
14 |
| - LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER = 3, |
15 |
| - LLAMA_VOCAB_PRE_TYPE_FALCON = 4, |
16 |
| - LLAMA_VOCAB_PRE_TYPE_MPT = 5, |
17 |
| - LLAMA_VOCAB_PRE_TYPE_STARCODER = 6, |
18 |
| - LLAMA_VOCAB_PRE_TYPE_GPT2 = 7, |
19 |
| - LLAMA_VOCAB_PRE_TYPE_REFACT = 8, |
20 |
| - LLAMA_VOCAB_PRE_TYPE_COMMAND_R = 9, |
21 |
| - LLAMA_VOCAB_PRE_TYPE_STABLELM2 = 10, |
22 |
| - LLAMA_VOCAB_PRE_TYPE_QWEN2 = 11, |
23 |
| - LLAMA_VOCAB_PRE_TYPE_OLMO = 12, |
24 |
| - LLAMA_VOCAB_PRE_TYPE_DBRX = 13, |
25 |
| - LLAMA_VOCAB_PRE_TYPE_SMAUG = 14, |
26 |
| - LLAMA_VOCAB_PRE_TYPE_PORO = 15, |
27 |
| - LLAMA_VOCAB_PRE_TYPE_CHATGLM3 = 16, |
28 |
| - LLAMA_VOCAB_PRE_TYPE_CHATGLM4 = 17, |
29 |
| - LLAMA_VOCAB_PRE_TYPE_VIKING = 18, |
30 |
| - LLAMA_VOCAB_PRE_TYPE_JAIS = 19, |
31 |
| - LLAMA_VOCAB_PRE_TYPE_TEKKEN = 20, |
32 |
| - LLAMA_VOCAB_PRE_TYPE_SMOLLM = 21, |
33 |
| - LLAMA_VOCAB_PRE_TYPE_CODESHELL = 22, |
34 |
| - LLAMA_VOCAB_PRE_TYPE_BLOOM = 23, |
35 |
| - LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH = 24, |
36 |
| - LLAMA_VOCAB_PRE_TYPE_EXAONE = 25, |
37 |
| - LLAMA_VOCAB_PRE_TYPE_CHAMELEON = 26, |
38 |
| - LLAMA_VOCAB_PRE_TYPE_MINERVA = 27, |
39 |
| - LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM = 28, |
40 |
| - LLAMA_VOCAB_PRE_TYPE_GPT4O = 29, |
41 |
| - LLAMA_VOCAB_PRE_TYPE_SUPERBPE = 30, |
42 |
| - LLAMA_VOCAB_PRE_TYPE_TRILLION = 31, |
43 |
| - LLAMA_VOCAB_PRE_TYPE_BAILINGMOE = 32, |
44 |
| - LLAMA_VOCAB_PRE_TYPE_LLAMA4 = 33, |
45 |
| - LLAMA_VOCAB_PRE_TYPE_PIXTRAL = 34, |
46 |
| - LLAMA_VOCAB_PRE_TYPE_SEED_CODER = 35, |
47 |
| - LLAMA_VOCAB_PRE_TYPE_HUNYUAN = 36, |
48 |
| - LLAMA_VOCAB_PRE_TYPE_KIMI_K2 = 37, |
49 |
| - LLAMA_VOCAB_PRE_TYPE_HUNYUAN_DENSE = 38, |
50 |
| - LLAMA_VOCAB_PRE_TYPE_GROK_2 = 39, |
| 11 | + LLAMA_VOCAB_PRE_TYPE_DEFAULT = 0, |
| 12 | + LLAMA_VOCAB_PRE_TYPE_LLAMA3 = 1, |
| 13 | + LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_LLM = 2, |
| 14 | + LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER = 3, |
| 15 | + LLAMA_VOCAB_PRE_TYPE_FALCON = 4, |
| 16 | + LLAMA_VOCAB_PRE_TYPE_MPT = 5, |
| 17 | + LLAMA_VOCAB_PRE_TYPE_STARCODER = 6, |
| 18 | + LLAMA_VOCAB_PRE_TYPE_GPT2 = 7, |
| 19 | + LLAMA_VOCAB_PRE_TYPE_REFACT = 8, |
| 20 | + LLAMA_VOCAB_PRE_TYPE_COMMAND_R = 9, |
| 21 | + LLAMA_VOCAB_PRE_TYPE_STABLELM2 = 10, |
| 22 | + LLAMA_VOCAB_PRE_TYPE_QWEN2 = 11, |
| 23 | + LLAMA_VOCAB_PRE_TYPE_OLMO = 12, |
| 24 | + LLAMA_VOCAB_PRE_TYPE_DBRX = 13, |
| 25 | + LLAMA_VOCAB_PRE_TYPE_SMAUG = 14, |
| 26 | + LLAMA_VOCAB_PRE_TYPE_PORO = 15, |
| 27 | + LLAMA_VOCAB_PRE_TYPE_CHATGLM3 = 16, |
| 28 | + LLAMA_VOCAB_PRE_TYPE_CHATGLM4 = 17, |
| 29 | + LLAMA_VOCAB_PRE_TYPE_VIKING = 18, |
| 30 | + LLAMA_VOCAB_PRE_TYPE_JAIS = 19, |
| 31 | + LLAMA_VOCAB_PRE_TYPE_TEKKEN = 20, |
| 32 | + LLAMA_VOCAB_PRE_TYPE_SMOLLM = 21, |
| 33 | + LLAMA_VOCAB_PRE_TYPE_CODESHELL = 22, |
| 34 | + LLAMA_VOCAB_PRE_TYPE_BLOOM = 23, |
| 35 | + LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH = 24, |
| 36 | + LLAMA_VOCAB_PRE_TYPE_EXAONE = 25, |
| 37 | + LLAMA_VOCAB_PRE_TYPE_CHAMELEON = 26, |
| 38 | + LLAMA_VOCAB_PRE_TYPE_MINERVA = 27, |
| 39 | + LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM = 28, |
| 40 | + LLAMA_VOCAB_PRE_TYPE_GPT4O = 29, |
| 41 | + LLAMA_VOCAB_PRE_TYPE_SUPERBPE = 30, |
| 42 | + LLAMA_VOCAB_PRE_TYPE_TRILLION = 31, |
| 43 | + LLAMA_VOCAB_PRE_TYPE_BAILINGMOE = 32, |
| 44 | + LLAMA_VOCAB_PRE_TYPE_LLAMA4 = 33, |
| 45 | + LLAMA_VOCAB_PRE_TYPE_PIXTRAL = 34, |
| 46 | + LLAMA_VOCAB_PRE_TYPE_SEED_CODER = 35, |
| 47 | + LLAMA_VOCAB_PRE_TYPE_HUNYUAN = 36, |
| 48 | + LLAMA_VOCAB_PRE_TYPE_KIMI_K2 = 37, |
| 49 | + LLAMA_VOCAB_PRE_TYPE_HUNYUAN_DENSE = 38, |
| 50 | + LLAMA_VOCAB_PRE_TYPE_GROK_2 = 39, |
| 51 | + LLAMA_VOCAB_PRE_TYPE_GRANITE_DOCLING = 40, |
51 | 52 | };
|
52 | 53 |
|
53 | 54 | struct LLM_KV;
|
|
0 commit comments