@@ -42,6 +42,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
42
42
{ LLM_ARCH_GEMMA, " gemma" },
43
43
{ LLM_ARCH_GEMMA2, " gemma2" },
44
44
{ LLM_ARCH_GEMMA3, " gemma3" },
45
+ { LLM_ARCH_GEMMA3N, " gemma3n" },
45
46
{ LLM_ARCH_STARCODER2, " starcoder2" },
46
47
{ LLM_ARCH_MAMBA, " mamba" },
47
48
{ LLM_ARCH_XVERSE, " xverse" },
@@ -75,6 +76,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
75
76
{ LLM_ARCH_BAILINGMOE, " bailingmoe" },
76
77
{ LLM_ARCH_DOTS1, " dots1" },
77
78
{ LLM_ARCH_ARCEE, " arcee" },
79
+ { LLM_ARCH_ERNIE4_5, " ernie4_5" },
78
80
{ LLM_ARCH_UNKNOWN, " (unknown)" },
79
81
};
80
82
@@ -932,6 +934,42 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
932
934
{ LLM_TENSOR_FFN_POST_NORM, " blk.%d.post_ffw_norm" },
933
935
},
934
936
},
937
+ {
938
+ LLM_ARCH_GEMMA3N,
939
+ {
940
+ { LLM_TENSOR_TOKEN_EMBD, " token_embd" },
941
+ { LLM_TENSOR_OUTPUT_NORM, " output_norm" },
942
+ { LLM_TENSOR_ATTN_NORM, " blk.%d.attn_norm" },
943
+ { LLM_TENSOR_ATTN_Q, " blk.%d.attn_q" },
944
+ { LLM_TENSOR_ATTN_Q_NORM, " blk.%d.attn_q_norm" },
945
+ { LLM_TENSOR_ATTN_K, " blk.%d.attn_k" },
946
+ { LLM_TENSOR_ATTN_K_NORM, " blk.%d.attn_k_norm" },
947
+ { LLM_TENSOR_ATTN_V, " blk.%d.attn_v" },
948
+ { LLM_TENSOR_ATTN_OUT, " blk.%d.attn_output" },
949
+ { LLM_TENSOR_ATTN_POST_NORM, " blk.%d.post_attention_norm" },
950
+ { LLM_TENSOR_FFN_NORM, " blk.%d.ffn_norm" },
951
+ { LLM_TENSOR_FFN_GATE, " blk.%d.ffn_gate" },
952
+ { LLM_TENSOR_FFN_DOWN, " blk.%d.ffn_down" },
953
+ { LLM_TENSOR_FFN_UP, " blk.%d.ffn_up" },
954
+ { LLM_TENSOR_FFN_POST_NORM, " blk.%d.post_ffw_norm" },
955
+ { LLM_TENSOR_PER_LAYER_TOKEN_EMBD, " per_layer_token_embd" },
956
+ { LLM_TENSOR_PER_LAYER_MODEL_PROJ, " per_layer_model_proj" },
957
+ { LLM_TENSOR_PER_LAYER_PROJ_NORM, " per_layer_proj_norm" },
958
+ { LLM_TENSOR_ALTUP_UNEMBD_PROJ, " altup_unembd_proj" },
959
+ { LLM_TENSOR_ALTUP_PROJ, " altup_proj" },
960
+ { LLM_TENSOR_PER_LAYER_INP_GATE, " blk.%d.inp_gate" },
961
+ { LLM_TENSOR_PER_LAYER_PROJ, " blk.%d.proj" },
962
+ { LLM_TENSOR_PER_LAYER_POST_NORM, " blk.%d.post_norm" },
963
+ { LLM_TENSOR_ALTUP_CORRECT_COEF, " blk.%d.altup_correct_coef" },
964
+ { LLM_TENSOR_ALTUP_CORRECT_SCALE, " blk.%d.altup_correct_scale" },
965
+ { LLM_TENSOR_ALTUP_PREDICT_COEF, " blk.%d.altup_predict_coef" },
966
+ { LLM_TENSOR_ALTUP_ROUTER, " blk.%d.altup_router" },
967
+ { LLM_TENSOR_ALTUP_ROUTER_NORM, " blk.%d.altup_router_norm" },
968
+ { LLM_TENSOR_LAUREL_L, " blk.%d.laurel_l" },
969
+ { LLM_TENSOR_LAUREL_R, " blk.%d.laurel_r" },
970
+ { LLM_TENSOR_LAUREL_POST_NORM, " blk.%d.laurel_post_norm" },
971
+ },
972
+ },
935
973
{
936
974
LLM_ARCH_STARCODER2,
937
975
{
@@ -1621,6 +1659,23 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
1621
1659
{ LLM_TENSOR_FFN_EXP_PROBS_B, " blk.%d.exp_probs_b" },
1622
1660
}
1623
1661
},
1662
+ {
1663
+ LLM_ARCH_ERNIE4_5,
1664
+ {
1665
+ { LLM_TENSOR_TOKEN_EMBD, " token_embd" },
1666
+ { LLM_TENSOR_OUTPUT_NORM, " output_norm" },
1667
+ { LLM_TENSOR_OUTPUT, " output" },
1668
+ { LLM_TENSOR_ATTN_NORM, " blk.%d.attn_norm" },
1669
+ { LLM_TENSOR_ATTN_Q, " blk.%d.attn_q" },
1670
+ { LLM_TENSOR_ATTN_K, " blk.%d.attn_k" },
1671
+ { LLM_TENSOR_ATTN_V, " blk.%d.attn_v" },
1672
+ { LLM_TENSOR_ATTN_OUT, " blk.%d.attn_output" },
1673
+ { LLM_TENSOR_FFN_NORM, " blk.%d.ffn_norm" },
1674
+ { LLM_TENSOR_FFN_GATE, " blk.%d.ffn_gate" },
1675
+ { LLM_TENSOR_FFN_DOWN, " blk.%d.ffn_down" },
1676
+ { LLM_TENSOR_FFN_UP, " blk.%d.ffn_up" },
1677
+ },
1678
+ },
1624
1679
{
1625
1680
LLM_ARCH_UNKNOWN,
1626
1681
{
@@ -1749,6 +1804,23 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
1749
1804
{LLM_TENSOR_FFN_GATE_EXPS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
1750
1805
{LLM_TENSOR_FFN_UP_EXPS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
1751
1806
{LLM_TENSOR_FFN_EXP_PROBS_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
1807
+ // altup / laurel (gemma 3n)
1808
+ {LLM_TENSOR_PER_LAYER_TOKEN_EMBD, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_GET_ROWS}},
1809
+ {LLM_TENSOR_PER_LAYER_MODEL_PROJ, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
1810
+ {LLM_TENSOR_PER_LAYER_PROJ_NORM, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL}},
1811
+ {LLM_TENSOR_ALTUP_PROJ, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
1812
+ {LLM_TENSOR_ALTUP_UNEMBD_PROJ, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
1813
+ {LLM_TENSOR_PER_LAYER_INP_GATE, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1814
+ {LLM_TENSOR_PER_LAYER_PROJ, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1815
+ {LLM_TENSOR_PER_LAYER_POST_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1816
+ {LLM_TENSOR_ALTUP_CORRECT_COEF, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1817
+ {LLM_TENSOR_ALTUP_CORRECT_SCALE, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1818
+ {LLM_TENSOR_ALTUP_PREDICT_COEF, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1819
+ {LLM_TENSOR_ALTUP_ROUTER, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1820
+ {LLM_TENSOR_ALTUP_ROUTER_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1821
+ {LLM_TENSOR_LAUREL_L, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1822
+ {LLM_TENSOR_LAUREL_R, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1823
+ {LLM_TENSOR_LAUREL_POST_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1752
1824
// this tensor is loaded for T5, but never used
1753
1825
{LLM_TENSOR_DEC_CROSS_ATTN_REL_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_NONE}},
1754
1826
{LLM_TENSOR_CONV1D, {LLM_TENSOR_LAYER_INPUT, GGML_OP_IM2COL}},
0 commit comments