Skip to content

Commit f9f59f9

Browse files
committed
should be ok
1 parent 29df61c commit f9f59f9

File tree

3 files changed

+19
-1
lines changed

3 files changed

+19
-1
lines changed

convert_hf_to_gguf.py

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3071,6 +3071,21 @@ class Qwen3MoeModel(Qwen2MoeModel):
30713071
class Dots1Model(Qwen2MoeModel):
30723072
model_arch = gguf.MODEL_ARCH.DOTS1
30733073

3074+
def __init__(self, *args, **kwargs):
3075+
super().__init__(*args, **kwargs)
3076+
self.hparams["num_experts"] = self.hparams["n_routed_experts"]
3077+
3078+
def set_gguf_parameters(self):
3079+
super().set_gguf_parameters()
3080+
self.gguf_writer.add_leading_dense_block_count(self.hparams["first_k_dense_replace"])
3081+
3082+
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None):
3083+
if name.endswith("e_score_correction_bias"):
3084+
name = name.replace("e_score_correction_bias", "e_score_correction.bias")
3085+
if "shared_experts" in name:
3086+
return [(self.map_tensor_name(name), data_torch)]
3087+
return super().modify_tensors(data_torch, name, bid)
3088+
30743089

30753090
@ModelBase.register("GPT2LMHeadModel")
30763091
class GPT2Model(TextModel):

gguf-py/gguf/constants.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1282,6 +1282,9 @@ class MODEL_TENSOR(IntEnum):
12821282
MODEL_TENSOR.FFN_GATE_EXP,
12831283
MODEL_TENSOR.FFN_DOWN_EXP,
12841284
MODEL_TENSOR.FFN_UP_EXP,
1285+
MODEL_TENSOR.FFN_GATE_SHEXP,
1286+
MODEL_TENSOR.FFN_UP_SHEXP,
1287+
MODEL_TENSOR.FFN_DOWN_SHEXP,
12851288
],
12861289
MODEL_ARCH.PLAMO: [
12871290
MODEL_TENSOR.TOKEN_EMBD,

gguf-py/gguf/tensor_mapping.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,7 @@ class TensorNameMap:
305305
),
306306

307307
MODEL_TENSOR.FFN_EXP_PROBS_B: (
308-
"model.layers.{bid}.mlp.gate.e_score_correction", # deepseek-v3
308+
"model.layers.{bid}.mlp.gate.e_score_correction", # deepseek-v3 dots1
309309
),
310310

311311
# Feed-forward up

0 commit comments

Comments
 (0)