Skip to content

Commit 43c6795

Browse files
committed
fix order of deepseek and deepseek2 in constants; mark shared exp as deepseek arch need
1 parent b32159c commit 43c6795

File tree

2 files changed

+5
-5
lines changed

2 files changed

+5
-5
lines changed

gguf-py/gguf/constants.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -247,8 +247,8 @@ class MODEL_ARCH(IntEnum):
247247
OLMOE = auto()
248248
OPENELM = auto()
249249
ARCTIC = auto()
250-
DEEPSEEK2 = auto()
251250
DEEPSEEK = auto()
251+
DEEPSEEK2 = auto()
252252
CHATGLM = auto()
253253
BITNET = auto()
254254
T5 = auto()
@@ -410,8 +410,8 @@ class MODEL_TENSOR(IntEnum):
410410
MODEL_ARCH.OLMOE: "olmoe",
411411
MODEL_ARCH.OPENELM: "openelm",
412412
MODEL_ARCH.ARCTIC: "arctic",
413-
MODEL_ARCH.DEEPSEEK2: "deepseek2",
414413
MODEL_ARCH.DEEPSEEK: "deepseek",
414+
MODEL_ARCH.DEEPSEEK2: "deepseek2",
415415
MODEL_ARCH.CHATGLM: "chatglm",
416416
MODEL_ARCH.BITNET: "bitnet",
417417
MODEL_ARCH.T5: "t5",

gguf-py/gguf/tensor_mapping.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -306,7 +306,7 @@ class TensorNameMap:
306306

307307
MODEL_TENSOR.FFN_UP_SHEXP: (
308308
"model.layers.{bid}.mlp.shared_expert.up_proj", # qwen2moe
309-
"model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek2
309+
"model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek deepseek2
310310
),
311311

312312
# AWQ-activation gate
@@ -338,7 +338,7 @@ class TensorNameMap:
338338

339339
MODEL_TENSOR.FFN_GATE_SHEXP: (
340340
"model.layers.{bid}.mlp.shared_expert.gate_proj", # qwen2moe
341-
"model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek2
341+
"model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek deepseek2
342342
),
343343

344344
# Feed-forward down
@@ -379,7 +379,7 @@ class TensorNameMap:
379379

380380
MODEL_TENSOR.FFN_DOWN_SHEXP: (
381381
"model.layers.{bid}.mlp.shared_expert.down_proj", # qwen2moe
382-
"model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek2
382+
"model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek deepseek2
383383
),
384384

385385
MODEL_TENSOR.ATTN_Q_NORM: (

0 commit comments

Comments
 (0)