Skip to content

Commit 35591a9

Browse files
committed
fix tensor name
1 parent 38acf7f commit 35591a9

File tree

2 files changed

+6
-5
lines changed

2 files changed

+6
-5
lines changed

convert_hf_to_gguf.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from enum import IntEnum
1515
from pathlib import Path
1616
from hashlib import sha256
17-
from typing import TYPE_CHECKING, Any, Callable, ContextManager, Iterable, Iterator, Literal, Sequence, TypeVar, Dict, cast
17+
from typing import TYPE_CHECKING, Any, Callable, ContextManager, Iterable, Iterator, Literal, Sequence, TypeVar, cast
1818
from itertools import chain
1919
from transformers import AutoConfig
2020

@@ -6411,6 +6411,7 @@ def get_vocab_base(self) -> tuple[list[str], list[int], str]:
64116411
from transformers import AutoTokenizer
64126412
tokenizer = AutoTokenizer.from_pretrained(self.dir_model, trust_remote_code=True)
64136413

6414+
# merge logic is copied from QwenModel, maybe incorrect
64146415
merges = []
64156416
vocab = {}
64166417
mergeable_ranks = tokenizer.mergeable_ranks

gguf-py/gguf/tensor_mapping.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,7 @@ class TensorNameMap:
303303
"model.layers.{bid}.block_sparse_moe.router.layer", # granitemoe
304304
"model.layers.{bid}.feed_forward.router", # llama4
305305
"encoder.layers.{bid}.mlp.router.layer", # nomic-bert-moe
306-
"model.layers.{bid}.mlp.gate.wg.weight", # hunyuan
306+
"model.layers.{bid}.mlp.gate.wg", # hunyuan
307307
),
308308

309309
MODEL_TENSOR.FFN_GATE_INP_SHEXP: (
@@ -363,7 +363,7 @@ class TensorNameMap:
363363
"model.layers.{bid}.mlp.shared_expert.up_proj", # qwen2moe
364364
"model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek deepseek2
365365
"model.layers.{bid}.feed_forward.shared_expert.up_proj", # llama4
366-
"model.layers.{bid}.mlp.shared_mlp.up_proj.weight", # hunyuan
366+
"model.layers.{bid}.mlp.shared_mlp.up_proj", # hunyuan
367367
),
368368

369369
# AWQ-activation gate
@@ -400,7 +400,7 @@ class TensorNameMap:
400400
"model.layers.{bid}.mlp.shared_expert.gate_proj", # qwen2moe
401401
"model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek deepseek2
402402
"model.layers.{bid}.feed_forward.shared_expert.gate_proj", # llama4
403-
"model.layers.{bid}.mlp.shared_mlp.gate_proj.weight", # hunyuan
403+
"model.layers.{bid}.mlp.shared_mlp.gate_proj", # hunyuan
404404
),
405405

406406
# Feed-forward down
@@ -450,7 +450,7 @@ class TensorNameMap:
450450
"model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek deepseek2
451451
"model.layers.{bid}.feed_forward.shared_expert.down_proj", # llama4
452452
"model.layers.{bid}.shared_mlp.output_linear", # granitemoe
453-
"model.layers.{bid}.mlp.shared_mlp.down_proj.weight", # hunyuan
453+
"model.layers.{bid}.mlp.shared_mlp.down_proj", # hunyuan
454454
),
455455

456456
MODEL_TENSOR.ATTN_Q_NORM: (

0 commit comments

Comments
 (0)