Skip to content

Commit 2f60ebc

Browse files
committed
talk-llama : sync llama.cpp
ggml-ci
1 parent 69061e3 commit 2f60ebc

26 files changed

+1446
-496
lines changed

examples/talk-llama/llama-arch.cpp

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
2020
{ LLM_ARCH_BERT, "bert" },
2121
{ LLM_ARCH_NOMIC_BERT, "nomic-bert" },
2222
{ LLM_ARCH_NOMIC_BERT_MOE, "nomic-bert-moe" },
23+
{ LLM_ARCH_NEO_BERT, "neo-bert" },
2324
{ LLM_ARCH_JINA_BERT_V2, "jina-bert-v2" },
2425
{ LLM_ARCH_BLOOM, "bloom" },
2526
{ LLM_ARCH_STABLELM, "stablelm" },
@@ -72,6 +73,8 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
7273
{ LLM_ARCH_WAVTOKENIZER_DEC, "wavtokenizer-dec" },
7374
{ LLM_ARCH_PLM, "plm" },
7475
{ LLM_ARCH_BAILINGMOE, "bailingmoe" },
76+
{ LLM_ARCH_DOTS1, "dots1" },
77+
{ LLM_ARCH_ARCEE, "arcee" },
7578
{ LLM_ARCH_UNKNOWN, "(unknown)" },
7679
};
7780

@@ -243,6 +246,24 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
243246
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
244247
},
245248
},
249+
{
250+
LLM_ARCH_ARCEE,
251+
{
252+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
253+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
254+
{ LLM_TENSOR_OUTPUT, "output" },
255+
{ LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
256+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
257+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
258+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
259+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
260+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
261+
{ LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
262+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
263+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
264+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
265+
},
266+
},
246267
{
247268
LLM_ARCH_LLAMA4,
248269
{
@@ -494,6 +515,21 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
494515
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
495516
},
496517
},
518+
{
519+
LLM_ARCH_NEO_BERT,
520+
{
521+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
522+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
523+
{ LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
524+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
525+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
526+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
527+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
528+
{ LLM_TENSOR_ENC_OUTPUT_NORM, "enc.output_norm" },
529+
{ LLM_TENSOR_CLS, "cls" },
530+
{ LLM_TENSOR_CLS_OUT, "cls.output" },
531+
},
532+
},
497533
{
498534
LLM_ARCH_JINA_BERT_V2,
499535
{
@@ -1555,6 +1591,34 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
15551591
{ LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
15561592
},
15571593
},
1594+
{
1595+
LLM_ARCH_DOTS1,
1596+
{
1597+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1598+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1599+
{ LLM_TENSOR_OUTPUT, "output" },
1600+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1601+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1602+
{ LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
1603+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1604+
{ LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
1605+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1606+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1607+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1608+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1609+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1610+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1611+
{ LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1612+
{ LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1613+
{ LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1614+
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1615+
{ LLM_TENSOR_FFN_GATE_INP_SHEXP, "blk.%d.ffn_gate_inp_shexp" },
1616+
{ LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
1617+
{ LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
1618+
{ LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
1619+
{ LLM_TENSOR_FFN_EXP_PROBS_B, "blk.%d.exp_probs_b" },
1620+
}
1621+
},
15581622
{
15591623
LLM_ARCH_UNKNOWN,
15601624
{

examples/talk-llama/llama-arch.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ enum llm_arch {
2424
LLM_ARCH_BERT,
2525
LLM_ARCH_NOMIC_BERT,
2626
LLM_ARCH_NOMIC_BERT_MOE,
27+
LLM_ARCH_NEO_BERT,
2728
LLM_ARCH_JINA_BERT_V2,
2829
LLM_ARCH_BLOOM,
2930
LLM_ARCH_STABLELM,
@@ -76,6 +77,8 @@ enum llm_arch {
7677
LLM_ARCH_WAVTOKENIZER_DEC,
7778
LLM_ARCH_PLM,
7879
LLM_ARCH_BAILINGMOE,
80+
LLM_ARCH_DOTS1,
81+
LLM_ARCH_ARCEE,
7982
LLM_ARCH_UNKNOWN,
8083
};
8184

0 commit comments

Comments
 (0)