Skip to content

Commit 29df61c

Browse files
committed
fix
1 parent 58dbd1c commit 29df61c

File tree

2 files changed

+29
-5
lines changed

2 files changed

+29
-5
lines changed

convert_hf_to_gguf.py

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3067,6 +3067,11 @@ class Qwen3MoeModel(Qwen2MoeModel):
30673067
model_arch = gguf.MODEL_ARCH.QWEN3MOE
30683068

30693069

3070+
@ModelBase.register("Dots1ForCausalLM")
3071+
class Dots1Model(Qwen2MoeModel):
3072+
model_arch = gguf.MODEL_ARCH.DOTS1
3073+
3074+
30703075
@ModelBase.register("GPT2LMHeadModel")
30713076
class GPT2Model(TextModel):
30723077
model_arch = gguf.MODEL_ARCH.GPT2
@@ -5158,11 +5163,8 @@ def prepare_tensors(self):
51585163
raise ValueError(f"Unprocessed experts: {experts}")
51595164

51605165

5161-
@ModelBase.register(
5162-
"DeepseekV2ForCausalLM",
5163-
"DeepseekV3ForCausalLM",
5164-
"Dots1ForCausalLM",
5165-
)
5166+
@ModelBase.register("DeepseekV2ForCausalLM")
5167+
@ModelBase.register("DeepseekV3ForCausalLM")
51665168
class DeepseekV2Model(TextModel):
51675169
model_arch = gguf.MODEL_ARCH.DEEPSEEK2
51685170

gguf-py/gguf/constants.py

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -300,6 +300,7 @@ class MODEL_ARCH(IntEnum):
300300
QWEN2VL = auto()
301301
QWEN3 = auto()
302302
QWEN3MOE = auto()
303+
DOTS1 = auto()
303304
PHI2 = auto()
304305
PHI3 = auto()
305306
PHIMOE = auto()
@@ -580,6 +581,7 @@ class MODEL_TENSOR(IntEnum):
580581
MODEL_ARCH.QWEN2VL: "qwen2vl",
581582
MODEL_ARCH.QWEN3: "qwen3",
582583
MODEL_ARCH.QWEN3MOE: "qwen3moe",
584+
MODEL_ARCH.DOTS1: "dots1",
583585
MODEL_ARCH.PHI2: "phi2",
584586
MODEL_ARCH.PHI3: "phi3",
585587
MODEL_ARCH.PHIMOE: "phimoe",
@@ -1261,6 +1263,26 @@ class MODEL_TENSOR(IntEnum):
12611263
MODEL_TENSOR.FFN_DOWN_EXP,
12621264
MODEL_TENSOR.FFN_UP_EXP,
12631265
],
1266+
MODEL_ARCH.DOTS1: [
1267+
MODEL_TENSOR.TOKEN_EMBD,
1268+
MODEL_TENSOR.OUTPUT_NORM,
1269+
MODEL_TENSOR.OUTPUT,
1270+
MODEL_TENSOR.ATTN_NORM,
1271+
MODEL_TENSOR.ATTN_Q,
1272+
MODEL_TENSOR.ATTN_Q_NORM,
1273+
MODEL_TENSOR.ATTN_K,
1274+
MODEL_TENSOR.ATTN_K_NORM,
1275+
MODEL_TENSOR.ATTN_V,
1276+
MODEL_TENSOR.ATTN_OUT,
1277+
MODEL_TENSOR.FFN_NORM,
1278+
MODEL_TENSOR.FFN_GATE,
1279+
MODEL_TENSOR.FFN_UP,
1280+
MODEL_TENSOR.FFN_DOWN,
1281+
MODEL_TENSOR.FFN_GATE_INP,
1282+
MODEL_TENSOR.FFN_GATE_EXP,
1283+
MODEL_TENSOR.FFN_DOWN_EXP,
1284+
MODEL_TENSOR.FFN_UP_EXP,
1285+
],
12641286
MODEL_ARCH.PLAMO: [
12651287
MODEL_TENSOR.TOKEN_EMBD,
12661288
MODEL_TENSOR.OUTPUT_NORM,

0 commit comments

Comments
 (0)