Skip to content

Commit 1221d94

Browse files
committed
cleanup and pr changes
1 parent 99d9e94 commit 1221d94

File tree

3 files changed

+15
-14
lines changed

3 files changed

+15
-14
lines changed

convert_hf_to_gguf.py

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -844,14 +844,14 @@ def get_vocab_base_pre(self, tokenizer) -> str:
844844
def _set_vocab_none(self) -> None:
845845
self.gguf_writer.add_tokenizer_model("none")
846846

847-
def _set_vocab_gpt2(self, load_merges=True) -> None:
847+
def _set_vocab_gpt2(self) -> None:
848848
tokens, toktypes, tokpre = self.get_vocab_base()
849849
self.gguf_writer.add_tokenizer_model("gpt2")
850850
self.gguf_writer.add_tokenizer_pre(tokpre)
851851
self.gguf_writer.add_token_list(tokens)
852852
self.gguf_writer.add_token_types(toktypes)
853853

854-
special_vocab = gguf.SpecialVocab(self.dir_model, load_merges)
854+
special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=True)
855855
special_vocab.add_to_gguf(self.gguf_writer)
856856

857857
def _set_vocab_qwen(self):
@@ -6405,11 +6405,6 @@ def __init__(self, *args, **kwargs):
64056405
self._tok_embd = None
64066406

64076407
def set_vocab(self):
6408-
"""
6409-
A self-contained vocab implementation for the HunYuan tiktoken-based tokenizer.
6410-
This method correctly generates tokens, types, and the required "fake" merges
6411-
to satisfy the llama.cpp GGUF loader.
6412-
"""
64136408
from transformers import AutoTokenizer
64146409
tokenizer = AutoTokenizer.from_pretrained(self.dir_model, trust_remote_code=True)
64156410

@@ -6456,7 +6451,7 @@ def set_vocab(self):
64566451
# 5. Add special tokens and chat templates
64576452
special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=False)
64586453
special_vocab.add_to_gguf(self.gguf_writer)
6459-
# FIX for BOS token: Manually set the correct BOS token ID.
6454+
# FIX for BOS token: Overwrite incorrect id read from config.json
64606455
self.gguf_writer.add_bos_token_id(127959) # <|bos|>
64616456

64626457
def set_gguf_parameters(self):
@@ -6478,11 +6473,11 @@ def set_gguf_parameters(self):
64786473
assert all(n == moe_shared_expert[0] for n in moe_shared_expert)
64796474
self.gguf_writer.add_expert_shared_count(moe_shared_expert[0])
64806475

6481-
self.gguf_writer.add_qk_norm(hparams.get("use_qk_norm", True))
6482-
64836476
# Rope
64846477
rope_scaling = hparams.get("rope_scaling", {})
64856478
if rope_scaling.get("type") == "dynamic":
6479+
# Not sure if YARN is correct here, and the factor in the config is only 1 anyway
6480+
# but the release claims to scale to 256k, which would be a factor of 8
64866481
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.YARN)
64876482
self.gguf_writer.add_rope_scaling_factor(rope_scaling["factor"])
64886483
self.gguf_writer.add_rope_scaling_orig_ctx_len(self.hparams["max_position_embeddings"])
@@ -6492,31 +6487,41 @@ def set_gguf_parameters(self):
64926487
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
64936488
if name == "model.embed_tokens.weight":
64946489
self._tok_embd = data_torch.clone()
6490+
64956491
if name == "lm_head.weight":
64966492
if self.hparams.get("tie_word_embeddings", False):
64976493
logger.info("Skipping tied output layer 'lm_head.weight'")
64986494
return []
6495+
64996496
if name.find("mlp.experts") != -1:
65006497
n_experts = self.hparams["num_experts"]
65016498
assert bid is not None
6499+
65026500
if self._experts is None:
65036501
self._experts = [{} for _ in range(self.block_count)]
6502+
65046503
self._experts[bid][name] = data_torch
6504+
65056505
if len(self._experts[bid]) >= n_experts * 3:
6506+
# merge the experts into a single 3d tensor
65066507
tensors: list[tuple[str, Tensor]] = []
65076508
for w_name in ["down_proj", "gate_proj", "up_proj"]:
65086509
datas: list[Tensor] = []
6510+
65096511
for xid in range(n_experts):
65106512
ename = f"model.layers.{bid}.mlp.experts.{xid}.{w_name}.weight"
65116513
datas.append(self._experts[bid][ename])
65126514
del self._experts[bid][ename]
6515+
65136516
data_torch = torch.stack(datas, dim=0)
65146517
merged_name = f"model.layers.{bid}.mlp.experts.{w_name}.weight"
65156518
new_name = self.map_tensor_name(merged_name)
65166519
tensors.append((new_name, data_torch))
6520+
65176521
return tensors
65186522
else:
65196523
return []
6524+
65206525
return [(self.map_tensor_name(name), data_torch)]
65216526

65226527
def prepare_tensors(self):

gguf-py/gguf/constants.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -148,7 +148,6 @@ class Attention:
148148
VALUE_LENGTH_MLA = "{arch}.attention.value_length_mla"
149149
SHARED_KV_LAYERS = "{arch}.attention.shared_kv_layers"
150150
SLIDING_WINDOW_PATTERN = "{arch}.attention.sliding_window_pattern"
151-
QK_NORM = "{arch}.attention.qk_norm"
152151

153152
class Rope:
154153
DIMENSION_COUNT = "{arch}.rope.dimension_count"

gguf-py/gguf/gguf_writer.py

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -792,9 +792,6 @@ def add_group_norm_groups(self, value: int) -> None:
792792
def add_causal_attention(self, value: bool) -> None:
793793
self.add_bool(Keys.Attention.CAUSAL.format(arch=self.arch), value)
794794

795-
def add_qk_norm(self, value: bool) -> None:
796-
self.add_bool(Keys.Attention.QK_NORM.format(arch=self.arch), value)
797-
798795
def add_q_lora_rank(self, length: int) -> None:
799796
self.add_uint32(Keys.Attention.Q_LORA_RANK.format(arch=self.arch), length)
800797

0 commit comments

Comments
 (0)