Skip to content

Commit dddee85

Browse files
committed
loadable, missing cgraph now
1 parent 571a45d commit dddee85

File tree

5 files changed

+24
-3
lines changed

5 files changed

+24
-3
lines changed

gguf-py/gguf/tensor_mapping.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -462,15 +462,15 @@ class TensorNameMap:
462462
),
463463

464464
MODEL_TENSOR.MTP_INP_PROJ: (
465-
"model.layers.{bid}.input_proj.weight", # xiaomi mimo
465+
"model.layers.{bid}.input_proj", # xiaomi mimo
466466
),
467467

468468
MODEL_TENSOR.MTP_TOKEN_NORM: (
469-
"model.layers.{bid}.token_layernorm.weight", # xiaomi mimo
469+
"model.layers.{bid}.token_layernorm", # xiaomi mimo
470470
),
471471

472472
MODEL_TENSOR.MTP_HIDDEN_NORM: (
473-
"model.layers.{bid}.hidden_layernorm.weight", # xiaomi mimo
473+
"model.layers.{bid}.hidden_layernorm", # xiaomi mimo
474474
),
475475

476476
MODEL_TENSOR.SSM_IN: (

src/llama-arch.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -579,6 +579,10 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
579579
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
580580
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
581581
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
582+
{ LLM_TENSOR_MTP_INP_PROJ, "blk.%d.mtp_inp_proj" },
583+
{ LLM_TENSOR_MTP_TOKEN_NORM, "blk.%d.mtp_token_norm" },
584+
{ LLM_TENSOR_MTP_HIDDEN_NORM, "blk.%d.mtp_hidden_norm" },
585+
{ LLM_TENSOR_LAYER_OUT_NORM, "blk.%d.layer_output_norm" },
582586
},
583587
},
584588
{
@@ -1678,6 +1682,9 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
16781682
{LLM_TENSOR_FFN_GATE_EXPS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
16791683
{LLM_TENSOR_FFN_UP_EXPS, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT_ID}},
16801684
{LLM_TENSOR_FFN_EXP_PROBS_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_ADD}},
1685+
{LLM_TENSOR_MTP_INP_PROJ, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
1686+
{LLM_TENSOR_MTP_TOKEN_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
1687+
{LLM_TENSOR_MTP_HIDDEN_NORM, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL}},
16811688
// this tensor is loaded for T5, but never used
16821689
{LLM_TENSOR_DEC_CROSS_ATTN_REL_B, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_NONE}},
16831690
{LLM_TENSOR_CONV1D, {LLM_TENSOR_LAYER_INPUT, GGML_OP_IM2COL}},

src/llama-arch.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -362,6 +362,9 @@ enum llm_tensor {
362362
LLM_TENSOR_POS_NET_ATTN_K,
363363
LLM_TENSOR_POS_NET_ATTN_V,
364364
LLM_TENSOR_POS_NET_ATTN_OUT,
365+
LLM_TENSOR_MTP_INP_PROJ,
366+
LLM_TENSOR_MTP_TOKEN_NORM,
367+
LLM_TENSOR_MTP_HIDDEN_NORM,
365368
};
366369

367370
enum llm_tensor_layer {

src/llama-model.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2364,6 +2364,12 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
23642364
layer.ffn_gate = create_tensor(tn(LLM_TENSOR_FFN_GATE, "weight", i), {n_embd, n_ff}, 0);
23652365
layer.ffn_down = create_tensor(tn(LLM_TENSOR_FFN_DOWN, "weight", i), { n_ff, n_embd}, 0);
23662366
layer.ffn_up = create_tensor(tn(LLM_TENSOR_FFN_UP, "weight", i), {n_embd, n_ff}, 0);
2367+
2368+
// optional MTP (multi token predict), used by Xiaomi Mimo
2369+
layer.mtp_inp_proj = create_tensor(tn(LLM_TENSOR_MTP_INP_PROJ, "weight", i), {n_embd*2, n_embd}, TENSOR_NOT_REQUIRED);
2370+
layer.mtp_token_norm = create_tensor(tn(LLM_TENSOR_MTP_TOKEN_NORM, "weight", i), {n_embd}, TENSOR_NOT_REQUIRED);
2371+
layer.mtp_hidden_norm = create_tensor(tn(LLM_TENSOR_MTP_HIDDEN_NORM, "weight", i), {n_embd}, TENSOR_NOT_REQUIRED);
2372+
layer.layer_out_norm = create_tensor(tn(LLM_TENSOR_LAYER_OUT_NORM, "weight", i), {n_embd}, TENSOR_NOT_REQUIRED);
23672373
}
23682374
} break;
23692375
case LLM_ARCH_QWEN2MOE:

src/llama-model.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,11 @@ struct llama_layer {
312312
struct ggml_tensor * ffn_up_scale = nullptr;
313313
struct ggml_tensor * ffn_down_scale = nullptr;
314314

315+
// MTP (multi token predict)
316+
struct ggml_tensor * mtp_inp_proj = nullptr;
317+
struct ggml_tensor * mtp_token_norm = nullptr;
318+
struct ggml_tensor * mtp_hidden_norm = nullptr;
319+
315320
struct llama_layer_posnet posnet;
316321

317322
struct llama_layer_convnext convnext;

0 commit comments

Comments
 (0)