@@ -20,6 +20,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
20
20
{ LLM_ARCH_BERT, " bert" },
21
21
{ LLM_ARCH_NOMIC_BERT, " nomic-bert" },
22
22
{ LLM_ARCH_NOMIC_BERT_MOE, " nomic-bert-moe" },
23
+ { LLM_ARCH_NEO_BERT, " neo-bert" },
23
24
{ LLM_ARCH_JINA_BERT_V2, " jina-bert-v2" },
24
25
{ LLM_ARCH_BLOOM, " bloom" },
25
26
{ LLM_ARCH_STABLELM, " stablelm" },
@@ -72,6 +73,8 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
72
73
{ LLM_ARCH_WAVTOKENIZER_DEC, " wavtokenizer-dec" },
73
74
{ LLM_ARCH_PLM, " plm" },
74
75
{ LLM_ARCH_BAILINGMOE, " bailingmoe" },
76
+ { LLM_ARCH_DOTS1, " dots1" },
77
+ { LLM_ARCH_ARCEE, " arcee" },
75
78
{ LLM_ARCH_UNKNOWN, " (unknown)" },
76
79
};
77
80
@@ -243,6 +246,24 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
243
246
{ LLM_TENSOR_FFN_UP_EXPS, " blk.%d.ffn_up_exps" },
244
247
},
245
248
},
249
+ {
250
+ LLM_ARCH_ARCEE,
251
+ {
252
+ { LLM_TENSOR_TOKEN_EMBD, " token_embd" },
253
+ { LLM_TENSOR_OUTPUT_NORM, " output_norm" },
254
+ { LLM_TENSOR_OUTPUT, " output" },
255
+ { LLM_TENSOR_ROPE_FREQS, " rope_freqs" },
256
+ { LLM_TENSOR_ATTN_NORM, " blk.%d.attn_norm" },
257
+ { LLM_TENSOR_ATTN_Q, " blk.%d.attn_q" },
258
+ { LLM_TENSOR_ATTN_K, " blk.%d.attn_k" },
259
+ { LLM_TENSOR_ATTN_V, " blk.%d.attn_v" },
260
+ { LLM_TENSOR_ATTN_OUT, " blk.%d.attn_output" },
261
+ { LLM_TENSOR_ATTN_ROT_EMBD, " blk.%d.attn_rot_embd" },
262
+ { LLM_TENSOR_FFN_NORM, " blk.%d.ffn_norm" },
263
+ { LLM_TENSOR_FFN_DOWN, " blk.%d.ffn_down" },
264
+ { LLM_TENSOR_FFN_UP, " blk.%d.ffn_up" },
265
+ },
266
+ },
246
267
{
247
268
LLM_ARCH_LLAMA4,
248
269
{
@@ -494,6 +515,21 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
494
515
{ LLM_TENSOR_FFN_UP_EXPS, " blk.%d.ffn_up_exps" },
495
516
},
496
517
},
518
+ {
519
+ LLM_ARCH_NEO_BERT,
520
+ {
521
+ { LLM_TENSOR_TOKEN_EMBD, " token_embd" },
522
+ { LLM_TENSOR_ATTN_NORM, " blk.%d.attn_norm" },
523
+ { LLM_TENSOR_ATTN_QKV, " blk.%d.attn_qkv" },
524
+ { LLM_TENSOR_ATTN_OUT, " blk.%d.attn_output" },
525
+ { LLM_TENSOR_FFN_NORM, " blk.%d.ffn_norm" },
526
+ { LLM_TENSOR_FFN_DOWN, " blk.%d.ffn_down" },
527
+ { LLM_TENSOR_FFN_UP, " blk.%d.ffn_up" },
528
+ { LLM_TENSOR_ENC_OUTPUT_NORM, " enc.output_norm" },
529
+ { LLM_TENSOR_CLS, " cls" },
530
+ { LLM_TENSOR_CLS_OUT, " cls.output" },
531
+ },
532
+ },
497
533
{
498
534
LLM_ARCH_JINA_BERT_V2,
499
535
{
@@ -1555,6 +1591,34 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
1555
1591
{ LLM_TENSOR_FFN_UP_SHEXP, " blk.%d.ffn_up_shexp" },
1556
1592
},
1557
1593
},
1594
+ {
1595
+ LLM_ARCH_DOTS1,
1596
+ {
1597
+ { LLM_TENSOR_TOKEN_EMBD, " token_embd" },
1598
+ { LLM_TENSOR_OUTPUT_NORM, " output_norm" },
1599
+ { LLM_TENSOR_OUTPUT, " output" },
1600
+ { LLM_TENSOR_ATTN_NORM, " blk.%d.attn_norm" },
1601
+ { LLM_TENSOR_ATTN_Q, " blk.%d.attn_q" },
1602
+ { LLM_TENSOR_ATTN_Q_NORM, " blk.%d.attn_q_norm" },
1603
+ { LLM_TENSOR_ATTN_K, " blk.%d.attn_k" },
1604
+ { LLM_TENSOR_ATTN_K_NORM, " blk.%d.attn_k_norm" },
1605
+ { LLM_TENSOR_ATTN_V, " blk.%d.attn_v" },
1606
+ { LLM_TENSOR_ATTN_OUT, " blk.%d.attn_output" },
1607
+ { LLM_TENSOR_FFN_NORM, " blk.%d.ffn_norm" },
1608
+ { LLM_TENSOR_FFN_GATE, " blk.%d.ffn_gate" },
1609
+ { LLM_TENSOR_FFN_UP, " blk.%d.ffn_up" },
1610
+ { LLM_TENSOR_FFN_DOWN, " blk.%d.ffn_down" },
1611
+ { LLM_TENSOR_FFN_GATE_INP, " blk.%d.ffn_gate_inp" },
1612
+ { LLM_TENSOR_FFN_GATE_EXPS, " blk.%d.ffn_gate_exps" },
1613
+ { LLM_TENSOR_FFN_DOWN_EXPS, " blk.%d.ffn_down_exps" },
1614
+ { LLM_TENSOR_FFN_UP_EXPS, " blk.%d.ffn_up_exps" },
1615
+ { LLM_TENSOR_FFN_GATE_INP_SHEXP, " blk.%d.ffn_gate_inp_shexp" },
1616
+ { LLM_TENSOR_FFN_GATE_SHEXP, " blk.%d.ffn_gate_shexp" },
1617
+ { LLM_TENSOR_FFN_DOWN_SHEXP, " blk.%d.ffn_down_shexp" },
1618
+ { LLM_TENSOR_FFN_UP_SHEXP, " blk.%d.ffn_up_shexp" },
1619
+ { LLM_TENSOR_FFN_EXP_PROBS_B, " blk.%d.exp_probs_b" },
1620
+ }
1621
+ },
1558
1622
{
1559
1623
LLM_ARCH_UNKNOWN,
1560
1624
{
0 commit comments