Skip to content

Commit 2195632

Browse files
saood06sszymczy
andauthored
Deepseek V3 support added (#176)
Co-authored-by: Stanisław Szymczyk <[email protected]>
1 parent c2624b2 commit 2195632

File tree

9 files changed

+136
-5
lines changed

9 files changed

+136
-5
lines changed

convert_hf_to_gguf.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -590,6 +590,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
590590
if chkhsh == "855059429035d75a914d1eda9f10a876752e281a054a7a3d421ef0533e5b6249":
591591
# ref: https://huggingface.co/HuggingFaceTB/SmolLM-135M
592592
res = "smollm"
593+
if chkhsh == "877081d19cf6996e2c4ff0e1236341e9b7bde288f5311a56a937f0afbbb3aeb5":
594+
# ref: https://huggingface.co/deepseek-ai/DeepSeek-V3
595+
res = "deepseek-v3"
593596

594597
if res is None:
595598
logger.warning("\n")

convert_hf_to_gguf_update.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,7 @@ class TOKENIZER_TYPE(IntEnum):
9494
{"name": "codeshell", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/WisdomShell/CodeShell-7B", },
9595
{"name": "tekken", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/mistralai/Mistral-Nemo-Base-2407", },
9696
{"name": "smollm", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/HuggingFaceTB/SmolLM-135M", },
97+
{"name": "deepseek-v3", "tokt": TOKENIZER_TYPE.BPE, "repo": "https://huggingface.co/deepseek-ai/DeepSeek-V3"},
9798
]
9899

99100

gguf-py/gguf/constants.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,8 @@ class LLM:
8989
EXPERT_USED_COUNT = "{arch}.expert_used_count"
9090
EXPERT_SHARED_COUNT = "{arch}.expert_shared_count"
9191
EXPERT_WEIGHTS_SCALE = "{arch}.expert_weights_scale"
92+
EXPERT_WEIGHTS_NORM = "{arch}.expert_weights_norm"
93+
EXPERT_GATING_FUNC = "{arch}.expert_gating_func"
9294
POOLING_TYPE = "{arch}.pooling_type"
9395
LOGIT_SCALE = "{arch}.logit_scale"
9496
DECODER_START_TOKEN_ID = "{arch}.decoder_start_token_id"
@@ -257,6 +259,7 @@ class MODEL_TENSOR(IntEnum):
257259
FFN_GATE_SHEXP = auto()
258260
FFN_DOWN_SHEXP = auto()
259261
FFN_UP_SHEXP = auto()
262+
FFN_EXP_PROBS_B = auto()
260263
ATTN_Q_NORM = auto()
261264
ATTN_K_NORM = auto()
262265
LAYER_OUT_NORM = auto()
@@ -387,6 +390,7 @@ class MODEL_TENSOR(IntEnum):
387390
MODEL_TENSOR.FFN_GATE_EXP: "blk.{bid}.ffn_gate_exps",
388391
MODEL_TENSOR.FFN_DOWN_EXP: "blk.{bid}.ffn_down_exps",
389392
MODEL_TENSOR.FFN_UP_EXP: "blk.{bid}.ffn_up_exps",
393+
MODEL_TENSOR.FFN_EXP_PROBS_B: "blk.{bid}.exp_probs_b",
390394
MODEL_TENSOR.LAYER_OUT_NORM: "blk.{bid}.layer_output_norm",
391395
MODEL_TENSOR.SSM_IN: "blk.{bid}.ssm_in",
392396
MODEL_TENSOR.SSM_CONV1D: "blk.{bid}.ssm_conv1d",
@@ -978,6 +982,7 @@ class MODEL_TENSOR(IntEnum):
978982
MODEL_TENSOR.FFN_GATE_SHEXP,
979983
MODEL_TENSOR.FFN_DOWN_SHEXP,
980984
MODEL_TENSOR.FFN_UP_SHEXP,
985+
MODEL_TENSOR.FFN_EXP_PROBS_B
981986
],
982987
MODEL_ARCH.CHATGLM : [
983988
MODEL_TENSOR.TOKEN_EMBD,
@@ -1177,6 +1182,10 @@ class GGMLQuantizationType(IntEnum):
11771182
IQ2_TN = 42,
11781183

11791184

1185+
class ExpertGatingFuncType(IntEnum):
1186+
SOFTMAX = 1
1187+
SIGMOID = 2
1188+
11801189

11811190
# TODO: add GGMLFileType from ggml_ftype in ggml.h
11821191

gguf-py/gguf/gguf_writer.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
RopeScalingType,
2727
PoolingType,
2828
TokenType,
29+
ExpertGatingFuncType,
2930
)
3031

3132
from .quants import quant_shape_from_byte_shape
@@ -670,6 +671,12 @@ def add_expert_shared_count(self, count: int) -> None:
670671
def add_expert_weights_scale(self, value: float) -> None:
671672
self.add_float32(Keys.LLM.EXPERT_WEIGHTS_SCALE.format(arch=self.arch), value)
672673

674+
def add_expert_weights_norm(self, value: bool) -> None:
675+
self.add_bool(Keys.LLM.EXPERT_WEIGHTS_NORM.format(arch=self.arch), value)
676+
677+
def add_expert_gating_func(self, value: ExpertGatingFuncType) -> None:
678+
self.add_uint32(Keys.LLM.EXPERT_GATING_FUNC.format(arch=self.arch), value.value)
679+
673680
def add_layer_norm_eps(self, value: float) -> None:
674681
self.add_float32(Keys.Attention.LAYERNORM_EPS.format(arch=self.arch), value)
675682

gguf-py/gguf/tensor_mapping.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,10 @@ class TensorNameMap:
251251
"model.layers.{bid}.mlp.shared_expert_gate", # qwen2moe
252252
),
253253

254+
MODEL_TENSOR.FFN_EXP_PROBS_B: (
255+
"model.layers.{bid}.mlp.gate.e_score_correction", # deepseek-v3
256+
),
257+
254258
# Feed-forward up
255259
MODEL_TENSOR.FFN_UP: (
256260
"gpt_neox.layers.{bid}.mlp.dense_h_to_4h", # gptneox

include/llama.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ extern "C" {
9393
LLAMA_VOCAB_PRE_TYPE_TEKKEN = 20,
9494
LLAMA_VOCAB_PRE_TYPE_SMOLLM = 21,
9595
LLAMA_VOCAB_PRE_TYPE_CODESHELL = 22,
96+
LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM = 23, //llama.cpp lists this as 28
9697
};
9798

9899
// note: these values should be synchronized with ggml_rope

src/llama-vocab.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -367,6 +367,13 @@ struct llm_tokenizer_bpe {
367367
"\\p{N}+",
368368
};
369369
break;
370+
case LLAMA_VOCAB_PRE_TYPE_DEEPSEEK3_LLM:
371+
regex_exprs = {
372+
"\\p{N}{1,3}",
373+
"[一-龥぀-ゟ゠-ヿ]+",
374+
"[!\"#$%&'()*+,\\-./:;<=>?@\\[\\\\\\]^_`{|}~][A-Za-z]+|[^\r\n\\p{L}\\p{P}\\p{S}]?[\\p{L}\\p{M}]+| ?[\\p{P}\\p{S}]+[\r\n]*|\\s*[\r\n]+|\\s+(?!\\S)|\\s+",
375+
};
376+
break;
370377
case LLAMA_VOCAB_PRE_TYPE_DEEPSEEK_CODER:
371378
regex_exprs = {
372379
"[\r\n]",

0 commit comments

Comments
 (0)