Skip to content

Commit d0a9d8c

Browse files
committed
talk-llama : sync llama.cpp
1 parent 5b4646d commit d0a9d8c

27 files changed

+3571
-1000
lines changed

examples/talk-llama/llama-arch.cpp

Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
3434
{ LLM_ARCH_PHI3, "phi3" },
3535
{ LLM_ARCH_PHIMOE, "phimoe" },
3636
{ LLM_ARCH_PLAMO, "plamo" },
37+
{ LLM_ARCH_PLAMO2, "plamo2" },
3738
{ LLM_ARCH_CODESHELL, "codeshell" },
3839
{ LLM_ARCH_ORION, "orion" },
3940
{ LLM_ARCH_INTERNLM2, "internlm2" },
@@ -67,6 +68,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
6768
{ LLM_ARCH_JAIS, "jais" },
6869
{ LLM_ARCH_NEMOTRON, "nemotron" },
6970
{ LLM_ARCH_EXAONE, "exaone" },
71+
{ LLM_ARCH_EXAONE4, "exaone4" },
7072
{ LLM_ARCH_RWKV6, "rwkv6" },
7173
{ LLM_ARCH_RWKV6QWEN2, "rwkv6qwen2" },
7274
{ LLM_ARCH_RWKV7, "rwkv7" },
@@ -81,9 +83,11 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
8183
{ LLM_ARCH_DOTS1, "dots1" },
8284
{ LLM_ARCH_ARCEE, "arcee" },
8385
{ LLM_ARCH_ERNIE4_5, "ernie4_5" },
86+
{ LLM_ARCH_ERNIE4_5_MOE, "ernie4_5-moe" },
8487
{ LLM_ARCH_HUNYUAN_MOE, "hunyuan-moe" },
8588
{ LLM_ARCH_SMOLLM3, "smollm3" },
8689
{ LLM_ARCH_LFM2, "lfm2" },
90+
{ LLM_ARCH_DREAM, "dream" },
8791
{ LLM_ARCH_UNKNOWN, "(unknown)" },
8892
};
8993

@@ -784,6 +788,36 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
784788
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
785789
},
786790
},
791+
{
792+
LLM_ARCH_PLAMO2,
793+
{
794+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
795+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
796+
{ LLM_TENSOR_OUTPUT, "output" },
797+
{ LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
798+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
799+
{ LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
800+
{ LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
801+
{ LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
802+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
803+
{ LLM_TENSOR_ATTN_ROT_EMBD, "blk.%d.attn_rot_embd" },
804+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
805+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
806+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
807+
{ LLM_TENSOR_SSM_IN, "blk.%d.ssm_in" },
808+
{ LLM_TENSOR_SSM_CONV1D, "blk.%d.ssm_conv1d" },
809+
{ LLM_TENSOR_SSM_X, "blk.%d.ssm_x" },
810+
{ LLM_TENSOR_SSM_DT, "blk.%d.ssm_dt" },
811+
{ LLM_TENSOR_SSM_A, "blk.%d.ssm_a" },
812+
{ LLM_TENSOR_SSM_D, "blk.%d.ssm_d" },
813+
{ LLM_TENSOR_SSM_OUT, "blk.%d.ssm_out" },
814+
{ LLM_TENSOR_SSM_DT_NORM, "blk.%d.ssm_dt_norm" },
815+
{ LLM_TENSOR_SSM_B_NORM, "blk.%d.ssm_b_norm" },
816+
{ LLM_TENSOR_SSM_C_NORM, "blk.%d.ssm_c_norm" },
817+
{ LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
818+
{ LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
819+
},
820+
},
787821
{
788822
LLM_ARCH_CODESHELL,
789823
{
@@ -1477,6 +1511,26 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
14771511
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
14781512
},
14791513
},
1514+
{
1515+
LLM_ARCH_EXAONE4,
1516+
{
1517+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1518+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1519+
{ LLM_TENSOR_OUTPUT, "output" },
1520+
{ LLM_TENSOR_ROPE_FREQS, "rope_freqs" },
1521+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1522+
{ LLM_TENSOR_ATTN_Q_NORM, "blk.%d.attn_q_norm" },
1523+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1524+
{ LLM_TENSOR_ATTN_K_NORM, "blk.%d.attn_k_norm" },
1525+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1526+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1527+
{ LLM_TENSOR_ATTN_POST_NORM, "blk.%d.post_attention_norm" },
1528+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1529+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1530+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1531+
{ LLM_TENSOR_FFN_POST_NORM, "blk.%d.post_ffw_norm" },
1532+
}
1533+
},
14801534
{
14811535
LLM_ARCH_RWKV6,
14821536
{
@@ -1793,6 +1847,31 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
17931847
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
17941848
},
17951849
},
1850+
{
1851+
LLM_ARCH_ERNIE4_5_MOE,
1852+
{
1853+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1854+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1855+
{ LLM_TENSOR_OUTPUT, "output" },
1856+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1857+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1858+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1859+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1860+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1861+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1862+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1863+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1864+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1865+
{ LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
1866+
{ LLM_TENSOR_FFN_GATE_SHEXP, "blk.%d.ffn_gate_shexp" },
1867+
{ LLM_TENSOR_FFN_DOWN_SHEXP, "blk.%d.ffn_down_shexp" },
1868+
{ LLM_TENSOR_FFN_UP_SHEXP, "blk.%d.ffn_up_shexp" },
1869+
{ LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
1870+
{ LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
1871+
{ LLM_TENSOR_FFN_UP_EXPS, "blk.%d.ffn_up_exps" },
1872+
{ LLM_TENSOR_FFN_EXP_PROBS_B, "blk.%d.exp_probs_b" },
1873+
},
1874+
},
17961875
{
17971876
LLM_ARCH_HUNYUAN_MOE,
17981877
{
@@ -1854,6 +1933,23 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
18541933
{ LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
18551934
}
18561935
},
1936+
{
1937+
LLM_ARCH_DREAM,
1938+
{
1939+
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
1940+
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
1941+
{ LLM_TENSOR_OUTPUT, "output" },
1942+
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
1943+
{ LLM_TENSOR_ATTN_Q, "blk.%d.attn_q" },
1944+
{ LLM_TENSOR_ATTN_K, "blk.%d.attn_k" },
1945+
{ LLM_TENSOR_ATTN_V, "blk.%d.attn_v" },
1946+
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
1947+
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
1948+
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
1949+
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
1950+
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
1951+
},
1952+
},
18571953
{
18581954
LLM_ARCH_UNKNOWN,
18591955
{
@@ -2094,10 +2190,20 @@ bool llm_arch_is_hybrid(const llm_arch & arch) {
20942190
switch (arch) {
20952191
case LLM_ARCH_JAMBA:
20962192
case LLM_ARCH_FALCON_H1:
2193+
case LLM_ARCH_PLAMO2:
20972194
case LLM_ARCH_GRANITE_HYBRID:
20982195
case LLM_ARCH_LFM2:
20992196
return true;
21002197
default:
21012198
return false;
21022199
}
21032200
}
2201+
2202+
bool llm_arch_is_diffusion(const llm_arch & arch) {
2203+
switch (arch) {
2204+
case LLM_ARCH_DREAM:
2205+
return true;
2206+
default:
2207+
return false;
2208+
}
2209+
}

examples/talk-llama/llama-arch.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ enum llm_arch {
3838
LLM_ARCH_PHI3,
3939
LLM_ARCH_PHIMOE,
4040
LLM_ARCH_PLAMO,
41+
LLM_ARCH_PLAMO2,
4142
LLM_ARCH_CODESHELL,
4243
LLM_ARCH_ORION,
4344
LLM_ARCH_INTERNLM2,
@@ -71,6 +72,7 @@ enum llm_arch {
7172
LLM_ARCH_JAIS,
7273
LLM_ARCH_NEMOTRON,
7374
LLM_ARCH_EXAONE,
75+
LLM_ARCH_EXAONE4,
7476
LLM_ARCH_RWKV6,
7577
LLM_ARCH_RWKV6QWEN2,
7678
LLM_ARCH_RWKV7,
@@ -85,9 +87,11 @@ enum llm_arch {
8587
LLM_ARCH_DOTS1,
8688
LLM_ARCH_ARCEE,
8789
LLM_ARCH_ERNIE4_5,
90+
LLM_ARCH_ERNIE4_5_MOE,
8891
LLM_ARCH_HUNYUAN_MOE,
8992
LLM_ARCH_SMOLLM3,
9093
LLM_ARCH_LFM2,
94+
LLM_ARCH_DREAM,
9195
LLM_ARCH_UNKNOWN,
9296
};
9397

@@ -478,3 +482,4 @@ const llm_tensor_info & llm_tensor_info_for(llm_tensor tensor);
478482

479483
bool llm_arch_is_recurrent(const llm_arch & arch);
480484
bool llm_arch_is_hybrid (const llm_arch & arch);
485+
bool llm_arch_is_diffusion(const llm_arch & arch);

0 commit comments

Comments
 (0)