Skip to content

Commit 248f866

Browse files
committed
Use embed_tokens as output.weight
1 parent 6c7b2cc commit 248f866

File tree

3 files changed

+19
-3
lines changed

3 files changed

+19
-3
lines changed

convert_hf_to_gguf.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3538,7 +3538,18 @@ def set_gguf_parameters(self):
35383538
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
35393539
del bid # unused
35403540

3541-
if name.endswith(".dt_bias"):
3541+
if name.endswith(".embed_tokens.weight"):
3542+
# If there is no lm_head, we need to map the token embedding to the output layer
3543+
assert self.tensor_names is not None
3544+
if all(['lm_head' not in name for name in self.tensor_names]):
3545+
name_base = name.replace(".embed_tokens.weight", "")
3546+
output_name = "lm_head"
3547+
3548+
embed_tokens_mapped = self.map_tensor_name(name)
3549+
output_mapped = self.map_tensor_name(output_name) + ".weight"
3550+
3551+
return [(embed_tokens_mapped, data_torch), (output_mapped, data_torch)]
3552+
elif name.endswith(".dt_bias"):
35423553
name = name.rpartition(".dt_bias")[0] + ".dt_proj.bias"
35433554
elif name.endswith(".dt_norm_weight"):
35443555
name = name.rpartition(".dt_norm_weight")[0] + ".dt_norm.weight"
@@ -3561,6 +3572,8 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
35613572
data_torch += 1.0
35623573
elif name.endswith(".post_mlp_norm.weight"):
35633574
data_torch += 1.0 / (5**1.5)
3575+
elif name.endswith(".norm.weight"):
3576+
data_torch += 1.0
35643577
elif name.endswith(".gate_up_proj.weight"):
35653578
# Split the combined gate_up tensor
35663579
split_size = data_torch.shape[0] // 2

gguf-py/gguf/tensor_mapping.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,11 +62,10 @@ class TensorNameMap:
6262
# Output
6363
MODEL_TENSOR.OUTPUT: (
6464
"embed_out", # gptneox
65-
"lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone olmoe olmo2 phimoe
65+
"lm_head", # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone olmoe olmo2 phimoe plamo2
6666
"output", # llama-pth bloom internlm2
6767
"word_embeddings_for_head", # persimmon
6868
"lm_head.linear", # phi2
69-
"lm_head", # plamo2
7069
"output_layer", # chatglm
7170
"head", # rwkv
7271
"head.out", # wavtokenizer

src/llama-model.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -949,6 +949,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
949949
}
950950

951951
switch (hparams.n_layer) {
952+
case 16: type = LLM_TYPE_1B; break;
952953
case 32: type = LLM_TYPE_8B; break;
953954
default: type = LLM_TYPE_UNKNOWN;
954955
}
@@ -8026,6 +8027,9 @@ struct llm_build_plamo2 : public llm_graph_context {
80268027
// ggml_graph_add_node(gf, model.layers[il].attn_norm);
80278028
// cb(model.layers[il].attn_norm, "attn_norm", il);
80288029

8030+
ggml_graph_add_node(gf, model.layers[il].attn_norm);
8031+
cb(model.layers[il].attn_norm, "attn_norm_weight", il);
8032+
80298033
// pre_mixer_norm
80308034
cb(inpL, "attn_pre_norm_input", il);
80318035
cur = build_norm(inpL, model.layers[il].attn_norm, NULL, LLM_NORM_RMS, il);

0 commit comments

Comments
 (0)