diff --git a/src/llama-model.cpp b/src/llama-model.cpp index 2a83d66279b79..a6a6fa7d7198a 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -19339,6 +19339,7 @@ struct llm_build_smallthinker : public llm_graph_context{ cur = build_norm(cur, model.output_norm, NULL, LLM_NORM_RMS, -1); cb(cur, "result_norm", -1); + res->t_embd = cur; // lm_head cur = build_lora_mm(model.output, cur);