Skip to content

Commit d219580

Browse files
committed
skip embed, fix bos
1 parent 5e78e88 commit d219580

File tree

1 file changed

+17
-11
lines changed

1 file changed

+17
-11
lines changed

convert_hf_to_gguf.py

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6399,25 +6399,25 @@ class HunYuanMoEModel(TextModel):
63996399

64006400
def __init__(self, *args, **kwargs):
64016401
super().__init__(*args, **kwargs)
6402+
# FIX for tied embeddings: Capture the token embeddings.
6403+
self._tok_embd = None
64026404

64036405
def set_vocab(self):
64046406
self._set_vocab_gpt2(load_merges=False)
6407+
# FIX for BOS token: Manually set the correct BOS token ID.
6408+
# The SpecialVocab helper gets incorrect id `bos_token_id: 1` from config.json.
6409+
self.gguf_writer.add_bos_token_id(127959) # <|bos|>
64056410

64066411
def get_vocab_base(self) -> tuple[list[str], list[int], str]:
6407-
tokens: list[str] = []
6408-
toktypes: list[int] = []
6409-
64106412
from transformers import AutoTokenizer
64116413
tokenizer = AutoTokenizer.from_pretrained(self.dir_model, trust_remote_code=True)
64126414

6415+
# Fake merges
64136416
merges = []
64146417
mergeable_ranks = tokenizer.mergeable_ranks
64156418
for token, rank in mergeable_ranks.items():
64166419
if len(token) == 1:
64176420
continue
6418-
# bpe() will decompose the token into its smallest parts and then
6419-
# re-merge them. If the token is a valid merge, bpe() will return
6420-
# the two pieces that were merged to create it.
64216421
merged = QwenModel.bpe(mergeable_ranks, token, max_rank=rank)
64226422
if len(merged) == 2:
64236423
merges.append(' '.join(map(QwenModel.token_bytes_to_string, merged)))
@@ -6472,16 +6472,22 @@ def set_gguf_parameters(self):
64726472
rope_scaling = self.hparams.get("rope_scaling", {})
64736473
if rope_scaling.get("type") == "dynamic":
64746474
logger.warning("Model uses 'dynamic' rope scaling, which is not yet supported in GGUF. "
6475-
"The resulting model may not work correctly with contexts longer than the training length.")
6475+
"Long-context extrapolation will not work correctly. Setting rope scaling type to NONE.")
64766476
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.NONE)
6477-
else:
6478-
# Fallback for other potential scaling types
6479-
# This part is inherited from TextModel and will handle standard rope_theta
6480-
pass
64816477

64826478
_experts: list[dict[str, Tensor]] | None = None
64836479

64846480
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
6481+
# FIX for tied embeddings: Capture the token embeddings.
6482+
if name == "model.embed_tokens.weight":
6483+
self._tok_embd = data_torch.clone()
6484+
6485+
# FIX for tied embeddings: Skip the lm_head if it's tied.
6486+
if name == "lm_head.weight":
6487+
if self.hparams.get("tie_word_embeddings", False):
6488+
logger.info("Skipping tied output layer 'lm_head.weight'")
6489+
return []
6490+
64856491
# process the experts separately
64866492
if name.find("mlp.experts") != -1:
64876493
n_experts = self.hparams["num_experts"]

0 commit comments

Comments
 (0)