Skip to content

Commit ea95a1d

Browse files
committed
Do not duplicate embedding weights for output.weight
1 parent 0424a76 commit ea95a1d

File tree

2 files changed

+6
-13
lines changed

2 files changed

+6
-13
lines changed

convert_hf_to_gguf.py

Lines changed: 1 addition & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3594,18 +3594,7 @@ def set_gguf_parameters(self):
35943594
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
35953595
del bid # unused
35963596

3597-
if name.endswith(".embed_tokens.weight"):
3598-
# If there is no lm_head, we need to map the token embedding to the output layer
3599-
assert self.tensor_names is not None
3600-
if all(['lm_head' not in name for name in self.tensor_names]):
3601-
name_base = name.replace(".embed_tokens.weight", "")
3602-
output_name = "lm_head"
3603-
3604-
embed_tokens_mapped = self.map_tensor_name(name)
3605-
output_mapped = self.map_tensor_name(output_name) + ".weight"
3606-
3607-
return [(embed_tokens_mapped, data_torch), (output_mapped, data_torch)]
3608-
elif name.endswith(".A_log"):
3597+
if name.endswith(".A_log"):
36093598
data_torch = -torch.exp(data_torch)
36103599
elif name.endswith(".dt_bias"):
36113600
name = name.rpartition(".dt_bias")[0] + ".dt_proj.bias"

src/llama-model.cpp

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2865,7 +2865,11 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
28652865

28662866
// output
28672867
output_norm = create_tensor(tn(LLM_TENSOR_OUTPUT_NORM, "weight"), {n_embd}, 0);
2868-
output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, 0);
2868+
output = create_tensor(tn(LLM_TENSOR_OUTPUT, "weight"), {n_embd, n_vocab}, TENSOR_NOT_REQUIRED);
2869+
// if output is NULL, init from the input tok embed
2870+
if (output == NULL) {
2871+
output = create_tensor(tn(LLM_TENSOR_TOKEN_EMBD, "weight"), {n_embd, n_vocab}, TENSOR_DUPLICATED);
2872+
}
28692873

28702874
for (int i = 0; i < n_layer; ++i) {
28712875
auto & layer = layers[i];

0 commit comments

Comments
 (0)