@@ -62,10 +62,10 @@ export const LLM_ARCHITECTURES = [
6262 "mpt" ,
6363 "baichuan" ,
6464 "starcoder" ,
65- "persimmon" ,
6665 "refact" ,
6766 "bert" ,
6867 "nomic-bert" ,
68+ "jina-bert-v2" ,
6969 "bloom" ,
7070 "stablelm" ,
7171 "qwen" ,
@@ -79,12 +79,21 @@ export const LLM_ARCHITECTURES = [
7979 "internlm2" ,
8080 "minicpm" ,
8181 "gemma" ,
82+ "gemma2" ,
8283 "starcoder2" ,
8384 "mamba" ,
8485 "xverse" ,
8586 "command-r" ,
8687 "dbrx" ,
8788 "olmo" ,
89+ "openelm" ,
90+ "arctic" ,
91+ "deepseek2" ,
92+ "chatglm" ,
93+ "bitnet" ,
94+ "t5" ,
95+ "t5encoder" ,
96+ "jais" ,
8897] as const ;
8998type LLMArchitecture = ( typeof LLM_ARCHITECTURES ) [ number ] ;
9099export type ArchLlama = TransformerLLMBase < "llama" > & {
@@ -100,7 +109,10 @@ export type ArchGpt2 = TransformerLLMBase<"gpt2"> & {
100109 "gpt2.attention.layer_norm_epsilon" : number ;
101110} ;
102111export type ArchGptj = TransformerLLMBase < "gptj" > ;
103- export type ArchGptneox = TransformerLLMBase < "gptneox" > ;
112+ export type ArchGptneox = TransformerLLMBase < "gptneox" > & {
113+ "gptneox.attention.layer_norm_epsilon" : number ;
114+ "gptneox.use_parallel_residual" : boolean ;
115+ } ;
104116export type ArchMpt = TransformerLLMBase < "mpt" > & {
105117 "mpt.attention.layer_norm_epsilon" : number ;
106118 "mpt.attention.clamp_kqv" : number ;
@@ -112,9 +124,6 @@ export type ArchBaichuan = TransformerLLMBase<"baichuan"> & {
112124export type ArchStarcoder = TransformerLLMBase < "starcoder" > & {
113125 "starcoder.attention.layer_norm_epsilon" : number ;
114126} ;
115- export type ArchPersimmon = TransformerLLMBase < "persimmon" > & {
116- "persimmon.attention.layer_norm_epsilon" : number ;
117- } ;
118127export type ArchRefact = TransformerLLMBase < "refact" > & {
119128 "refact.attention.layer_norm_rms_epsilon" : number ;
120129} ;
@@ -130,6 +139,12 @@ export type ArchNomicBert = TransformerLLMBase<"nomic-bert"> & {
130139 "tokenizer.ggml.token_type_count" : number ;
131140 "nomic-bert.pooling_type" : TransformerLLMPoolingType ;
132141} ;
142+ export type ArchJinaBertV2 = TransformerLLMBase < "jina-bert-v2" > & {
143+ "jina-bert-v2.attention.layer_norm_epsilon" : number ;
144+ "jina-bert-v2.attention.causal" : boolean ;
145+ "tokenizer.ggml.token_type_count" : number ;
146+ "jina-bert-v2.pooling_type" : TransformerLLMPoolingType ;
147+ } ;
133148export type ArchBloom = TransformerLLMBase < "bloom" > & {
134149 "bloom.attention.layer_norm_epsilon" : number ;
135150} ;
@@ -143,13 +158,16 @@ export type ArchQwen2 = TransformerLLMBase<"qwen2"> & {
143158 "qwen2.attention.layer_norm_rms_epsilon" : number ;
144159} ;
145160export type ArchQwen2moe = TransformerLLMBase < "qwen2moe" > & {
161+ "qwen2moe.expert_feed_forward_length" : number ;
162+ "qwen2moe.expert_shared_feed_forward_length" : number ;
146163 "qwen2moe.attention.layer_norm_rms_epsilon" : number ;
147164} ;
148165export type ArchPhi2 = TransformerLLMBase < "phi2" > & {
149166 "phi2.attention.layer_norm_epsilon" : number ;
150167} ;
151168export type ArchPhi3 = TransformerLLMBase < "phi3" > & {
152169 "phi3.attention.layer_norm_rms_epsilon" : number ;
170+ "phi3.attention.sliding_window" : number ;
153171} ;
154172export type ArchPlamo = TransformerLLMBase < "plamo" > & {
155173 "plamo.attention.layer_norm_rms_epsilon" : number ;
@@ -169,6 +187,12 @@ export type ArchMinicpm = TransformerLLMBase<"minicpm"> & {
169187export type ArchGemma = TransformerLLMBase < "gemma" > & {
170188 "gemma.attention.layer_norm_rms_epsilon" : number ;
171189} ;
190+ export type ArchGemma2 = TransformerLLMBase < "gemma2" > & {
191+ "gemma2.attention.sliding_window" : number ;
192+ "gemma2.attention.layer_norm_rms_epsilon" : number ;
193+ "gemma2.attn_logit_softcapping" : number ;
194+ "gemma2.final_logit_softcapping" : number ;
195+ } ;
172196export type ArchStarcoder2 = TransformerLLMBase < "starcoder2" > & {
173197 "starcoder2.attention.layer_norm_epsilon" : number ;
174198} ;
@@ -194,6 +218,41 @@ export type ArchOlmo = TransformerLLMBase<"olmo"> & {
194218 "olmo.attention.layer_norm_epsilon" : number ;
195219 "olmo.attention.clamp_kqv" : number ;
196220} ;
221+ export type ArchOpenelm = TransformerLLMBase < "openelm" > & {
222+ "openelm.attention.layer_norm_rms_epsilon" : number ;
223+ } ;
224+ export type ArchArctic = TransformerLLMBase < "arctic" > & {
225+ "arctic.attention.layer_norm_rms_epsilon" : number ;
226+ } ;
227+ export type ArchDeepseek2 = TransformerLLMBase < "deepseek2" > & {
228+ "deepseek2.attention.layer_norm_rms_epsilon" : number ;
229+ "deepseek2.leading_dense_block_count" : number ;
230+ "deepseek2.attention.q_lora_rank" : number ;
231+ "deepseek2.attention.kv_lora_rank" : number ;
232+ "deepseek2.expert_feed_forward_length" : number ;
233+ "deepseek2.expert_shared_count" : number ;
234+ "deepseek2.expert_weights_scale" : number ;
235+ "deepseek2.rope.scaling.yarn_log_multiplier" : number ;
236+ } ;
237+ export type ArchChatglm = TransformerLLMBase < "chatglm" > & {
238+ "chatglm.attention.layer_norm_rms_epsilon" : number ;
239+ } ;
240+ export type ArchBitnet = TransformerLLMBase < "bitnet" > & {
241+ "bitnet.attention.layer_norm_rms_epsilon" : number ;
242+ } ;
243+ export type ArchT5 = TransformerLLMBase < "t5" > & {
244+ "t5.attention.layer_norm_rms_epsilon" : number ;
245+ "t5.attention.relative_buckets_count" : number ;
246+ "t5.decoder_start_token_id" : number ;
247+ } ;
248+ export type ArchT5encoder = TransformerLLMBase < "t5encoder" > & {
249+ "t5encoder.attention.layer_norm_rms_epsilon" : number ;
250+ "t5encoder.attention.relative_buckets_count" : number ;
251+ } ;
252+ export type ArchJais = TransformerLLMBase < "jais" > & {
253+ "jais.attention.layer_norm_epsilon" : number ;
254+ "jais.attention.max_alibi_bias" : number ;
255+ } ;
197256
198257export type TransformerLLM =
199258 | ArchLlama
@@ -205,10 +264,10 @@ export type TransformerLLM =
205264 | ArchMpt
206265 | ArchBaichuan
207266 | ArchStarcoder
208- | ArchPersimmon
209267 | ArchRefact
210268 | ArchBert
211269 | ArchNomicBert
270+ | ArchJinaBertV2
212271 | ArchBloom
213272 | ArchStablelm
214273 | ArchQwen
@@ -222,9 +281,18 @@ export type TransformerLLM =
222281 | ArchInternlm2
223282 | ArchMinicpm
224283 | ArchGemma
284+ | ArchGemma2
225285 | ArchStarcoder2
226286 | ArchMamba
227287 | ArchXverse
228288 | ArchCommandR
229289 | ArchDbrx
230- | ArchOlmo ;
290+ | ArchOlmo
291+ | ArchOpenelm
292+ | ArchArctic
293+ | ArchDeepseek2
294+ | ArchChatglm
295+ | ArchBitnet
296+ | ArchT5
297+ | ArchT5encoder
298+ | ArchJais ;
0 commit comments