Skip to content

Commit 6783b11

Browse files
authored
models : fix LFM2 tensors (#17548)
1 parent 909072a commit 6783b11

File tree

3 files changed

+12
-9
lines changed

3 files changed

+12
-9
lines changed

src/llama-arch.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2237,7 +2237,7 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
22372237
{ LLM_TENSOR_SHORTCONV_INPROJ, "blk.%d.shortconv.in_proj" },
22382238
{ LLM_TENSOR_SHORTCONV_OUTPROJ, "blk.%d.shortconv.out_proj" },
22392239
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
2240-
{ LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
2240+
{ LLM_TENSOR_OUTPUT_NORM, "token_embd_norm" }, // note: wrong tensor name
22412241
{ LLM_TENSOR_OUTPUT, "output" },
22422242
}
22432243
},
@@ -2259,7 +2259,7 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
22592259
{ LLM_TENSOR_SHORTCONV_INPROJ, "blk.%d.shortconv.in_proj" },
22602260
{ LLM_TENSOR_SHORTCONV_OUTPROJ, "blk.%d.shortconv.out_proj" },
22612261
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
2262-
{ LLM_TENSOR_TOKEN_EMBD_NORM, "token_embd_norm" },
2262+
{ LLM_TENSOR_OUTPUT_NORM, "token_embd_norm" }, // note: wrong tensor name
22632263
{ LLM_TENSOR_FFN_GATE_INP, "blk.%d.ffn_gate_inp" },
22642264
{ LLM_TENSOR_FFN_GATE_EXPS, "blk.%d.ffn_gate_exps" },
22652265
{ LLM_TENSOR_FFN_DOWN_EXPS, "blk.%d.ffn_down_exps" },
@@ -2490,8 +2490,8 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
24902490
static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
24912491
{LLM_TENSOR_TOKEN_EMBD, {LLM_TENSOR_LAYER_INPUT, GGML_OP_GET_ROWS}},
24922492
{LLM_TENSOR_POS_EMBD, {LLM_TENSOR_LAYER_INPUT, GGML_OP_GET_ROWS}},
2493-
{LLM_TENSOR_TOKEN_EMBD_NORM, {LLM_TENSOR_LAYER_INPUT, GGML_OP_GET_ROWS}},
24942493
{LLM_TENSOR_TOKEN_TYPES, {LLM_TENSOR_LAYER_INPUT, GGML_OP_GET_ROWS}},
2494+
{LLM_TENSOR_TOKEN_EMBD_NORM, {LLM_TENSOR_LAYER_INPUT, GGML_OP_MUL}},
24952495
{LLM_TENSOR_OUTPUT, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
24962496
{LLM_TENSOR_CLS, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
24972497
{LLM_TENSOR_CLS_OUT, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},

src/llama-model.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6133,9 +6133,10 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
61336133
case LLM_ARCH_LFM2:
61346134
case LLM_ARCH_LFM2MOE:
61356135
{
6136-
tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);
6137-
tok_norm = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD_NORM, "weight"), {n_embd}, 0);
6138-
output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, TENSOR_NOT_REQUIRED);
6136+
tok_embd = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, 0);
6137+
6138+
output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0);
6139+
output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, TENSOR_NOT_REQUIRED);
61396140

61406141
if (output == NULL) {
61416142
output = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, TENSOR_DUPLICATED);

src/models/lfm2.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ llm_build_lfm2::llm_build_lfm2(const llama_model & model, const llm_graph_params
99
ggml_tensor * cur = build_inp_embd(model.tok_embd);
1010
cb(cur, "model.embed_tokens", -1);
1111

12+
ggml_build_forward_expand(gf, cur);
13+
1214
ggml_tensor * inp_pos = build_inp_pos();
1315
auto * inp_hybrid = build_inp_mem_hybrid();
1416
ggml_tensor * inp_out_ids = build_inp_out_ids();
@@ -40,12 +42,12 @@ llm_build_lfm2::llm_build_lfm2(const llama_model & model, const llm_graph_params
4042
cur = ggml_add(ctx0, cur, ffn_out);
4143
}
4244

43-
cur = build_norm(cur, model.tok_norm, NULL, LLM_NORM_RMS, -1);
44-
cb(cur, "model.embedding_norm", -1);
45+
cur = build_norm(cur, model.output_norm, NULL, LLM_NORM_RMS, -1);
46+
cb(cur, "result_norm", -1);
4547
res->t_embd = cur;
4648

4749
cur = build_lora_mm(model.output, cur);
48-
cb(cur, "lm_head", -1);
50+
cb(cur, "result_output", -1);
4951

5052
res->t_logits = cur;
5153

0 commit comments

Comments
 (0)