@@ -54,6 +54,7 @@ export enum TransformerLLMPoolingType {
5454
5555export const LLM_ARCHITECTURES = [
5656 "llama" ,
57+ "deci" ,
5758 "falcon" ,
5859 "grok" ,
5960 "gpt2" ,
@@ -71,34 +72,52 @@ export const LLM_ARCHITECTURES = [
7172 "qwen" ,
7273 "qwen2" ,
7374 "qwen2moe" ,
75+ "qwen2vl" ,
7476 "phi2" ,
7577 "phi3" ,
78+ "phimoe" ,
7679 "plamo" ,
7780 "codeshell" ,
7881 "orion" ,
7982 "internlm2" ,
8083 "minicpm" ,
84+ "minicpm3" ,
8185 "gemma" ,
8286 "gemma2" ,
8387 "starcoder2" ,
8488 "mamba" ,
8589 "xverse" ,
8690 "command-r" ,
91+ "cohere2" ,
8792 "dbrx" ,
8893 "olmo" ,
94+ "olmo2" ,
95+ "olmoe" ,
8996 "openelm" ,
9097 "arctic" ,
98+ "deepseek" ,
9199 "deepseek2" ,
92100 "chatglm" ,
93101 "bitnet" ,
94102 "t5" ,
95103 "t5encoder" ,
96104 "jais" ,
105+ "nemotron" ,
106+ "exaone" ,
107+ "rwkv6" ,
108+ "rwkv6qwen2" ,
109+ "granite" ,
110+ "granitemoe" ,
111+ "chameleon" ,
112+ "wavtokenizer-dec" ,
97113] as const ;
98114type LLMArchitecture = ( typeof LLM_ARCHITECTURES ) [ number ] ;
99115export type ArchLlama = TransformerLLMBase < "llama" > & {
100116 "llama.attention.layer_norm_rms_epsilon" : number ;
101117} ;
118+ export type ArchDeci = TransformerLLMBase < "deci" > & {
119+ "deci.attention.layer_norm_rms_epsilon" : number ;
120+ } ;
102121export type ArchFalcon = TransformerLLMBase < "falcon" > & {
103122 "falcon.attention.layer_norm_epsilon" : number ;
104123} ;
@@ -130,19 +149,16 @@ export type ArchRefact = TransformerLLMBase<"refact"> & {
130149export type ArchBert = TransformerLLMBase < "bert" > & {
131150 "bert.attention.layer_norm_epsilon" : number ;
132151 "bert.attention.causal" : boolean ;
133- "tokenizer.ggml.token_type_count" : number ;
134152 "bert.pooling_type" : TransformerLLMPoolingType ;
135153} ;
136154export type ArchNomicBert = TransformerLLMBase < "nomic-bert" > & {
137155 "nomic-bert.attention.layer_norm_epsilon" : number ;
138156 "nomic-bert.attention.causal" : boolean ;
139- "tokenizer.ggml.token_type_count" : number ;
140157 "nomic-bert.pooling_type" : TransformerLLMPoolingType ;
141158} ;
142159export type ArchJinaBertV2 = TransformerLLMBase < "jina-bert-v2" > & {
143160 "jina-bert-v2.attention.layer_norm_epsilon" : number ;
144161 "jina-bert-v2.attention.causal" : boolean ;
145- "tokenizer.ggml.token_type_count" : number ;
146162 "jina-bert-v2.pooling_type" : TransformerLLMPoolingType ;
147163} ;
148164export type ArchBloom = TransformerLLMBase < "bloom" > & {
@@ -162,13 +178,19 @@ export type ArchQwen2moe = TransformerLLMBase<"qwen2moe"> & {
162178 "qwen2moe.expert_shared_feed_forward_length" : number ;
163179 "qwen2moe.attention.layer_norm_rms_epsilon" : number ;
164180} ;
181+ export type ArchQwen2vl = TransformerLLMBase < "qwen2vl" > & {
182+ "qwen2vl.rope.dimension_sections" : number [ ] ;
183+ } ;
165184export type ArchPhi2 = TransformerLLMBase < "phi2" > & {
166185 "phi2.attention.layer_norm_epsilon" : number ;
167186} ;
168187export type ArchPhi3 = TransformerLLMBase < "phi3" > & {
169188 "phi3.attention.layer_norm_rms_epsilon" : number ;
170189 "phi3.attention.sliding_window" : number ;
171190} ;
191+ export type ArchPhimoe = TransformerLLMBase < "phimoe" > & {
192+ "phimoe.attention.layer_norm_rms_epsilon" : number ;
193+ } ;
172194export type ArchPlamo = TransformerLLMBase < "plamo" > & {
173195 "plamo.attention.layer_norm_rms_epsilon" : number ;
174196} ;
@@ -183,6 +205,14 @@ export type ArchInternlm2 = TransformerLLMBase<"internlm2"> & {
183205} ;
184206export type ArchMinicpm = TransformerLLMBase < "minicpm" > & {
185207 "minicpm.attention.layer_norm_rms_epsilon" : number ;
208+ "minicpm.embedding_scale" : number ;
209+ "minicpm.residual_scale" : number ;
210+ "minicpm.logit_scale" : number ;
211+ } ;
212+ export type ArchMinicpm3 = TransformerLLMBase < "minicpm3" > & {
213+ "minicpm3.attention.layer_norm_rms_epsilon" : number ;
214+ "minicpm3.attention.q_lora_rank" : number ;
215+ "minicpm3.attention.kv_lora_rank" : number ;
186216} ;
187217export type ArchGemma = TransformerLLMBase < "gemma" > & {
188218 "gemma.attention.layer_norm_rms_epsilon" : number ;
@@ -201,6 +231,7 @@ export type ArchMamba = TransformerLLMBase<"mamba"> & {
201231 "mamba.ssm.inner_size" : number ;
202232 "mamba.ssm.state_size" : number ;
203233 "mamba.ssm.time_step_rank" : number ;
234+ "mamba.ssm.dt_b_c_rms" : boolean ;
204235 "mamba.attention.layer_norm_rms_epsilon" : number ;
205236} ;
206237export type ArchXverse = TransformerLLMBase < "xverse" > & {
@@ -210,6 +241,11 @@ export type ArchCommandR = TransformerLLMBase<"command-r"> & {
210241 "command-r.logit_scale" : number ;
211242 "command-r.attention.layer_norm_epsilon" : number ;
212243} ;
244+ export type ArchCohere2 = TransformerLLMBase < "cohere2" > & {
245+ "cohere2.attention.sliding_window" : number ;
246+ "cohere2.logit_scale" : number ;
247+ "cohere2.attention.layer_norm_epsilon" : number ;
248+ } ;
213249export type ArchDbrx = TransformerLLMBase < "dbrx" > & {
214250 "dbrx.attention.layer_norm_epsilon" : number ;
215251 "dbrx.attention.clamp_kqv" : number ;
@@ -218,12 +254,25 @@ export type ArchOlmo = TransformerLLMBase<"olmo"> & {
218254 "olmo.attention.layer_norm_epsilon" : number ;
219255 "olmo.attention.clamp_kqv" : number ;
220256} ;
257+ export type ArchOlmo2 = TransformerLLMBase < "olmo2" > & {
258+ "olmo2.attention.layer_norm_rms_epsilon" : number ;
259+ } ;
260+ export type ArchOlmoe = TransformerLLMBase < "olmoe" > & {
261+ "olmoe.attention.layer_norm_rms_epsilon" : number ;
262+ } ;
221263export type ArchOpenelm = TransformerLLMBase < "openelm" > & {
222264 "openelm.attention.layer_norm_rms_epsilon" : number ;
223265} ;
224266export type ArchArctic = TransformerLLMBase < "arctic" > & {
225267 "arctic.attention.layer_norm_rms_epsilon" : number ;
226268} ;
269+ export type ArchDeepseek = TransformerLLMBase < "deepseek" > & {
270+ "deepseek.attention.layer_norm_rms_epsilon" : number ;
271+ "deepseek.leading_dense_block_count" : number ;
272+ "deepseek.expert_feed_forward_length" : number ;
273+ "deepseek.expert_shared_count" : number ;
274+ "deepseek.expert_weights_scale" : number ;
275+ } ;
227276export type ArchDeepseek2 = TransformerLLMBase < "deepseek2" > & {
228277 "deepseek2.attention.layer_norm_rms_epsilon" : number ;
229278 "deepseek2.leading_dense_block_count" : number ;
@@ -232,6 +281,8 @@ export type ArchDeepseek2 = TransformerLLMBase<"deepseek2"> & {
232281 "deepseek2.expert_feed_forward_length" : number ;
233282 "deepseek2.expert_shared_count" : number ;
234283 "deepseek2.expert_weights_scale" : number ;
284+ "deepseek2.expert_weights_norm" : boolean ;
285+ "deepseek2.expert_gating_func" : boolean ;
235286 "deepseek2.rope.scaling.yarn_log_multiplier" : number ;
236287} ;
237288export type ArchChatglm = TransformerLLMBase < "chatglm" > & {
@@ -253,9 +304,44 @@ export type ArchJais = TransformerLLMBase<"jais"> & {
253304 "jais.attention.layer_norm_epsilon" : number ;
254305 "jais.attention.max_alibi_bias" : number ;
255306} ;
307+ export type ArchNemotron = TransformerLLMBase < "nemotron" > & {
308+ "nemotron.attention.layer_norm_epsilon" : number ;
309+ } ;
310+ export type ArchExaone = TransformerLLMBase < "exaone" > & {
311+ "exaone.attention.layer_norm_rms_epsilon" : number ;
312+ } ;
313+ export type ArchRwkv6 = TransformerLLMBase < "rwkv6" > ;
314+ export type ArchRwkv6qwen2 = TransformerLLMBase < "rwkv6qwen2" > & {
315+ "rwkv6qwen2.attention.layer_norm_epsilon" : number ;
316+ "rwkv6qwen2.attention.layer_norm_rms_epsilon" : number ;
317+ "rwkv6qwen2.wkv.head_size" : number ;
318+ "rwkv6qwen2.time_mix_extra_dim" : number ;
319+ "rwkv6qwen2.time_decay_extra_dim" : number ;
320+ "rwkv6qwen2.rescale_every_n_layers" : boolean ;
321+ "rwkv6qwen2.token_shift_count" : boolean ;
322+ } ;
323+ export type ArchGranite = TransformerLLMBase < "granite" > ;
324+ export type ArchGraniteMoe = TransformerLLMBase < "granitemoe" > & {
325+ "granitemoe.attention.layer_norm_rms_epsilon" : number ;
326+ "granitemoe.logit_scale" : number ;
327+ "granitemoe.residual_scale" : number ;
328+ "granitemoe.embedding_scale" : number ;
329+ "granitemoe.attention.scale" : number ;
330+ } ;
331+ export type ArchChameleon = TransformerLLMBase < "chameleon" > & {
332+ "chameleon.attention.layer_norm_rms_epsilon" : number ;
333+ "chameleon.swin_norm" : boolean ;
334+ } ;
335+ export type ArchWavtokenizerDec = TransformerLLMBase < "wavtokenizer-dec" > & {
336+ "wavtokenizer-dec.attention.layer_norm_epsilon" : number ;
337+ "wavtokenizer-dec.attention.group_norm_epsilon" : number ;
338+ "wavtokenizer-dec.attention.group_norm_groups" : number ;
339+ "wavtokenizer-dec.attention.causal" : boolean ;
340+ } ;
256341
257342export type TransformerLLM =
258343 | ArchLlama
344+ | ArchDeci
259345 | ArchFalcon
260346 | ArchGrok
261347 | ArchGpt2
@@ -273,26 +359,41 @@ export type TransformerLLM =
273359 | ArchQwen
274360 | ArchQwen2
275361 | ArchQwen2moe
362+ | ArchQwen2vl
276363 | ArchPhi2
277364 | ArchPhi3
365+ | ArchPhimoe
278366 | ArchPlamo
279367 | ArchCodeshell
280368 | ArchOrion
281369 | ArchInternlm2
282370 | ArchMinicpm
371+ | ArchMinicpm3
283372 | ArchGemma
284373 | ArchGemma2
285374 | ArchStarcoder2
286375 | ArchMamba
287376 | ArchXverse
288377 | ArchCommandR
378+ | ArchCohere2
289379 | ArchDbrx
290380 | ArchOlmo
381+ | ArchOlmo2
382+ | ArchOlmoe
291383 | ArchOpenelm
292384 | ArchArctic
385+ | ArchDeepseek
293386 | ArchDeepseek2
294387 | ArchChatglm
295388 | ArchBitnet
296389 | ArchT5
297390 | ArchT5encoder
298- | ArchJais ;
391+ | ArchJais
392+ | ArchNemotron
393+ | ArchExaone
394+ | ArchRwkv6
395+ | ArchRwkv6qwen2
396+ | ArchGranite
397+ | ArchGraniteMoe
398+ | ArchChameleon
399+ | ArchWavtokenizerDec ;
0 commit comments