Skip to content

Commit 91fdfed

Browse files
committed
kimi k2
Signed-off-by: Xiaodong Ye <[email protected]>
1 parent c31e606 commit 91fdfed

File tree

2 files changed

+69
-2
lines changed

2 files changed

+69
-2
lines changed

convert_hf_to_gguf.py

Lines changed: 68 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -780,6 +780,9 @@ def get_vocab_base_pre(self, tokenizer) -> str:
780780
if chkhsh == "877081d19cf6996e2c4ff0e1236341e9b7bde288f5311a56a937f0afbbb3aeb5":
781781
# ref: https://huggingface.co/deepseek-ai/DeepSeek-V3
782782
res = "deepseek-v3"
783+
if chkhsh == "81212dc7cdb7e0c1074ca62c5aeab0d43c9f52b8a737be7b12a777c953027890":
784+
# ref: https://huggingface.co/moonshotai/Kimi-K2-Instruct
785+
res = "deepseek-v3"
783786
if chkhsh == "b3f499bb4255f8ca19fccd664443283318f2fd2414d5e0b040fbdd0cc195d6c5":
784787
# ref: https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B
785788
res = "deepseek-r1-qwen"
@@ -5562,8 +5565,69 @@ def prepare_tensors(self):
55625565
class DeepseekV2Model(TextModel):
55635566
model_arch = gguf.MODEL_ARCH.DEEPSEEK2
55645567

5568+
def __init__(self, *args, **kwargs):
5569+
super().__init__(*args, **kwargs)
5570+
5571+
print("yeahdongcn: __init__")
5572+
# For handling tied embeddings
5573+
self._tok_embd = None
5574+
5575+
55655576
def set_vocab(self):
5566-
self._set_vocab_gpt2()
5577+
print("yeahdongcn: set_vocab")
5578+
from transformers import AutoTokenizer
5579+
tokenizer = AutoTokenizer.from_pretrained(self.dir_model, trust_remote_code=True)
5580+
5581+
# 1. Get the pre-tokenizer identifier hash
5582+
tokpre = self.get_vocab_base_pre(tokenizer)
5583+
5584+
# 2. Reverse-engineer the merges list from mergeable_ranks
5585+
merges = []
5586+
vocab = {}
5587+
print(f"yeahdongcn: tokenizer={tokenizer}")
5588+
# mergeable_ranks = tokenizer.mergeable_ranks
5589+
# for token, rank in mergeable_ranks.items():
5590+
# vocab[QwenModel.token_bytes_to_string(token)] = rank
5591+
# if len(token) == 1:
5592+
# continue
5593+
# merged = QwenModel.bpe(mergeable_ranks, token, max_rank=rank)
5594+
# if len(merged) == 2: # todo this is an assert in Qwen, why?
5595+
# merges.append(' '.join(map(QwenModel.token_bytes_to_string, merged)))
5596+
# Hardcoded to make merge not empty
5597+
merges.append("<|endoftext|> <|endoftext|>")
5598+
5599+
# 3. Generate the tokens and toktypes lists
5600+
vocab_size = self.hparams["vocab_size"]
5601+
print(f"yeahdongcn: vocab_size={vocab_size}")
5602+
print(f"yeahdongcn: tokenizer.vocab_size={tokenizer.vocab_size}")
5603+
# assert tokenizer.vocab_size == vocab_size
5604+
special_tokens = tokenizer.special_tokens
5605+
reverse_vocab = {id_ : encoded_tok for encoded_tok, id_ in {**vocab, **special_tokens}.items()}
5606+
tokens: list[str] = []
5607+
toktypes: list[int] = []
5608+
for i in range(vocab_size):
5609+
if i not in reverse_vocab:
5610+
tokens.append(f"[PAD{i}]")
5611+
toktypes.append(gguf.TokenType.UNUSED)
5612+
else:
5613+
token = reverse_vocab[i]
5614+
tokens.append(token)
5615+
if i in special_tokens.values():
5616+
toktypes.append(gguf.TokenType.CONTROL)
5617+
else:
5618+
toktypes.append(gguf.TokenType.NORMAL)
5619+
5620+
# 4. Write all vocab-related fields to the GGUF writer
5621+
self.gguf_writer.add_tokenizer_model("gpt2")
5622+
self.gguf_writer.add_tokenizer_pre(tokpre)
5623+
self.gguf_writer.add_token_list(tokens)
5624+
self.gguf_writer.add_token_types(toktypes)
5625+
self.gguf_writer.add_token_merges(merges)
5626+
5627+
# 5. Add special tokens and chat templates
5628+
special_vocab = gguf.SpecialVocab(self.dir_model, load_merges=False)
5629+
special_vocab.add_to_gguf(self.gguf_writer)
5630+
55675631

55685632
def set_gguf_parameters(self):
55695633

@@ -5610,6 +5674,9 @@ def set_gguf_parameters(self):
56105674
_experts: list[dict[str, Tensor]] | None = None
56115675

56125676
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
5677+
if name == "model.embed_tokens.weight":
5678+
self._tok_embd = data_torch.clone()
5679+
56135680
# rename e_score_correction_bias tensors
56145681
if name.endswith("e_score_correction_bias"):
56155682
name = name.replace("e_score_correction_bias", "e_score_correction.bias")

src/llama-hparams.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
// bump if necessary
88
#define LLAMA_MAX_LAYERS 512
9-
#define LLAMA_MAX_EXPERTS 256 // DeepSeekV3
9+
#define LLAMA_MAX_EXPERTS 512 // DeepSeekV3
1010

1111
enum llama_expert_gating_func_type {
1212
LLAMA_EXPERT_GATING_FUNC_TYPE_NONE = 0,

0 commit comments

Comments
 (0)