Skip to content

Commit 78ef426

Browse files
committed
move deepseek above deepseek2
1 parent 5806435 commit 78ef426

File tree

2 files changed

+77
-77
lines changed

2 files changed

+77
-77
lines changed

convert_hf_to_gguf.py

Lines changed: 48 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -3430,40 +3430,52 @@ def prepare_tensors(self):
34303430
raise ValueError(f"Unprocessed experts: {experts}")
34313431

34323432

3433-
@Model.register("DeepseekV2ForCausalLM")
3434-
class DeepseekV2Model(Model):
3435-
model_arch = gguf.MODEL_ARCH.DEEPSEEK2
3433+
@Model.register("DeepseekForCausalLM")
3434+
class DeepseekModel(Model):
3435+
model_arch = gguf.MODEL_ARCH.DEEPSEEK
34363436

34373437
def set_vocab(self):
3438-
self._set_vocab_gpt2()
3438+
try:
3439+
self._set_vocab_sentencepiece()
3440+
except FileNotFoundError:
3441+
self._set_vocab_gpt2()
34393442

34403443
def set_gguf_parameters(self):
34413444
super().set_gguf_parameters()
34423445
hparams = self.hparams
3446+
if "head_dim" in hparams:
3447+
rope_dim = hparams["head_dim"]
3448+
else:
3449+
rope_dim = hparams["hidden_size"] // hparams["num_attention_heads"]
34433450

3451+
self.gguf_writer.add_rope_dimension_count(rope_dim)
3452+
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.NONE)
34443453
self.gguf_writer.add_leading_dense_block_count(hparams["first_k_dense_replace"])
34453454
self.gguf_writer.add_vocab_size(hparams["vocab_size"])
3446-
if "q_lora_rank" in hparams and hparams["q_lora_rank"] is not None:
3447-
self.gguf_writer.add_q_lora_rank(hparams["q_lora_rank"])
3448-
self.gguf_writer.add_kv_lora_rank(hparams["kv_lora_rank"])
3449-
self.gguf_writer.add_key_length(hparams["qk_nope_head_dim"] + hparams["qk_rope_head_dim"])
3450-
self.gguf_writer.add_value_length(hparams["v_head_dim"])
34513455
self.gguf_writer.add_expert_feed_forward_length(hparams["moe_intermediate_size"])
3456+
self.gguf_writer.add_expert_weights_scale(1.0)
34523457
self.gguf_writer.add_expert_count(hparams["n_routed_experts"])
34533458
self.gguf_writer.add_expert_shared_count(hparams["n_shared_experts"])
3454-
self.gguf_writer.add_expert_weights_scale(hparams["routed_scaling_factor"])
3455-
self.gguf_writer.add_rope_dimension_count(hparams["qk_rope_head_dim"])
3456-
3457-
if self.hparams.get("rope_scaling") is not None and "factor" in self.hparams["rope_scaling"]:
3458-
if self.hparams["rope_scaling"].get("type") == "yarn":
3459-
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.YARN)
3460-
self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"])
3461-
self.gguf_writer.add_rope_scaling_orig_ctx_len(self.hparams["rope_scaling"]["original_max_position_embeddings"])
3462-
self.gguf_writer.add_rope_scaling_yarn_log_mul(0.1 * hparams["rope_scaling"]["mscale_all_dim"])
34633459

34643460
_experts: list[dict[str, Tensor]] | None = None
34653461

3462+
@staticmethod
3463+
def permute(weights: Tensor, n_head: int, n_head_kv: int | None):
3464+
if n_head_kv is not None and n_head != n_head_kv:
3465+
n_head = n_head_kv
3466+
return (weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:])
3467+
.swapaxes(1, 2)
3468+
.reshape(weights.shape))
3469+
34663470
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
3471+
n_head = self.hparams["num_attention_heads"]
3472+
n_kv_head = self.hparams.get("num_key_value_heads")
3473+
3474+
if name.endswith(("q_proj.weight", "q_proj.bias")):
3475+
data_torch = DeepseekModel.permute(data_torch, n_head, n_head)
3476+
if name.endswith(("k_proj.weight", "k_proj.bias")):
3477+
data_torch = DeepseekModel.permute(data_torch, n_head, n_kv_head)
3478+
34673479
# process the experts separately
34683480
if name.find("mlp.experts") != -1:
34693481
n_experts = self.hparams["n_routed_experts"]
@@ -3509,52 +3521,40 @@ def prepare_tensors(self):
35093521
raise ValueError(f"Unprocessed experts: {experts}")
35103522

35113523

3512-
@Model.register("DeepseekForCausalLM")
3513-
class DeepseekModel(Model):
3514-
model_arch = gguf.MODEL_ARCH.DEEPSEEK
3524+
@Model.register("DeepseekV2ForCausalLM")
3525+
class DeepseekV2Model(Model):
3526+
model_arch = gguf.MODEL_ARCH.DEEPSEEK2
35153527

35163528
def set_vocab(self):
3517-
try:
3518-
self._set_vocab_sentencepiece()
3519-
except FileNotFoundError:
3520-
self._set_vocab_gpt2()
3529+
self._set_vocab_gpt2()
35213530

35223531
def set_gguf_parameters(self):
35233532
super().set_gguf_parameters()
35243533
hparams = self.hparams
3525-
if "head_dim" in hparams:
3526-
rope_dim = hparams["head_dim"]
3527-
else:
3528-
rope_dim = hparams["hidden_size"] // hparams["num_attention_heads"]
35293534

3530-
self.gguf_writer.add_rope_dimension_count(rope_dim)
3531-
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.NONE)
35323535
self.gguf_writer.add_leading_dense_block_count(hparams["first_k_dense_replace"])
35333536
self.gguf_writer.add_vocab_size(hparams["vocab_size"])
3537+
if "q_lora_rank" in hparams and hparams["q_lora_rank"] is not None:
3538+
self.gguf_writer.add_q_lora_rank(hparams["q_lora_rank"])
3539+
self.gguf_writer.add_kv_lora_rank(hparams["kv_lora_rank"])
3540+
self.gguf_writer.add_key_length(hparams["qk_nope_head_dim"] + hparams["qk_rope_head_dim"])
3541+
self.gguf_writer.add_value_length(hparams["v_head_dim"])
35343542
self.gguf_writer.add_expert_feed_forward_length(hparams["moe_intermediate_size"])
3535-
self.gguf_writer.add_expert_weights_scale(1.0)
35363543
self.gguf_writer.add_expert_count(hparams["n_routed_experts"])
35373544
self.gguf_writer.add_expert_shared_count(hparams["n_shared_experts"])
3545+
self.gguf_writer.add_expert_weights_scale(hparams["routed_scaling_factor"])
3546+
self.gguf_writer.add_rope_dimension_count(hparams["qk_rope_head_dim"])
35383547

3539-
_experts: list[dict[str, Tensor]] | None = None
3548+
if self.hparams.get("rope_scaling") is not None and "factor" in self.hparams["rope_scaling"]:
3549+
if self.hparams["rope_scaling"].get("type") == "yarn":
3550+
self.gguf_writer.add_rope_scaling_type(gguf.RopeScalingType.YARN)
3551+
self.gguf_writer.add_rope_scaling_factor(self.hparams["rope_scaling"]["factor"])
3552+
self.gguf_writer.add_rope_scaling_orig_ctx_len(self.hparams["rope_scaling"]["original_max_position_embeddings"])
3553+
self.gguf_writer.add_rope_scaling_yarn_log_mul(0.1 * hparams["rope_scaling"]["mscale_all_dim"])
35403554

3541-
@staticmethod
3542-
def permute(weights: Tensor, n_head: int, n_head_kv: int | None):
3543-
if n_head_kv is not None and n_head != n_head_kv:
3544-
n_head = n_head_kv
3545-
return (weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:])
3546-
.swapaxes(1, 2)
3547-
.reshape(weights.shape))
3555+
_experts: list[dict[str, Tensor]] | None = None
35483556

35493557
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
3550-
n_head = self.hparams["num_attention_heads"]
3551-
n_kv_head = self.hparams.get("num_key_value_heads")
3552-
3553-
if name.endswith(("q_proj.weight", "q_proj.bias")):
3554-
data_torch = DeepseekModel.permute(data_torch, n_head, n_head)
3555-
if name.endswith(("k_proj.weight", "k_proj.bias")):
3556-
data_torch = DeepseekModel.permute(data_torch, n_head, n_kv_head)
3557-
35583558
# process the experts separately
35593559
if name.find("mlp.experts") != -1:
35603560
n_experts = self.hparams["n_routed_experts"]

src/llama.cpp

Lines changed: 29 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -8917,15 +8917,8 @@ static bool llm_load_tensors(
89178917
layer.ffn_up_exps = create_tensor(tn(LLM_TENSOR_FFN_UP_EXPS, "weight", i), {n_embd, n_ff, n_expert}, 0);
89188918
}
89198919
} break;
8920-
case LLM_ARCH_DEEPSEEK2:
8920+
case LLM_ARCH_DEEPSEEK:
89218921
{
8922-
const bool is_lite = (hparams.n_layer == 27);
8923-
8924-
const int64_t n_embd_head_qk_rope = hparams.n_rot;
8925-
const int64_t n_embd_head_qk_nope = hparams.n_embd_head_k - hparams.n_rot;
8926-
8927-
const int64_t q_lora_rank = hparams.n_lora_q;
8928-
const int64_t kv_lora_rank = hparams.n_lora_kv;
89298922

89308923
const int64_t n_ff_exp = hparams.n_ff_exp;
89318924
const int64_t n_expert_shared = hparams.n_expert_shared;
@@ -8940,23 +8933,11 @@ static bool llm_load_tensors(
89408933
auto & layer = model.layers[i];
89418934

89428935
layer.attn_norm = create_tensor(tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd}, 0);
8943-
if (!is_lite) {
8944-
layer.attn_q_a_norm = create_tensor(tn(LLM_TENSOR_ATTN_Q_A_NORM, "weight", i), {q_lora_rank}, 0);
8945-
}
8946-
8947-
layer.attn_kv_a_norm = create_tensor(tn(LLM_TENSOR_ATTN_KV_A_NORM, "weight", i), {kv_lora_rank}, 0);
8948-
8949-
if (!is_lite) {
8950-
layer.wq_a = create_tensor(tn(LLM_TENSOR_ATTN_Q_A, "weight", i), {n_embd, q_lora_rank}, 0);
8951-
layer.wq_b = create_tensor(tn(LLM_TENSOR_ATTN_Q_B, "weight", i), {q_lora_rank, n_head * n_embd_head_k}, 0);
8952-
} else {
8953-
layer.wq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "weight", i), {n_embd, n_embd_k_gqa}, 0);
8954-
}
8955-
8956-
layer.wkv_a_mqa = create_tensor(tn(LLM_TENSOR_ATTN_KV_A_MQA, "weight", i), {n_embd, kv_lora_rank + (n_embd_head_qk_rope)}, 0);
8957-
layer.wkv_b = create_tensor(tn(LLM_TENSOR_ATTN_KV_B, "weight", i), {kv_lora_rank, n_head * (n_embd_head_qk_nope + n_embd_head_v)}, 0);
8958-
layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), { n_head * ( n_embd_head_v), n_embd}, 0);
89598936

8937+
layer.wq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "weight", i), {n_embd, n_embd}, 0);
8938+
layer.wk = create_tensor(tn(LLM_TENSOR_ATTN_K, "weight", i), {n_embd, n_embd_gqa}, 0);
8939+
layer.wv = create_tensor(tn(LLM_TENSOR_ATTN_V, "weight", i), {n_embd, n_embd_gqa}, 0);
8940+
layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd, n_embd}, 0);
89608941
layer.ffn_norm = create_tensor(tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}, 0);
89618942

89628943
if (i < (int) hparams.n_layer_dense_lead) {
@@ -8985,8 +8966,15 @@ static bool llm_load_tensors(
89858966
}
89868967
}
89878968
} break;
8988-
case LLM_ARCH_DEEPSEEK:
8969+
case LLM_ARCH_DEEPSEEK2:
89898970
{
8971+
const bool is_lite = (hparams.n_layer == 27);
8972+
8973+
const int64_t n_embd_head_qk_rope = hparams.n_rot;
8974+
const int64_t n_embd_head_qk_nope = hparams.n_embd_head_k - hparams.n_rot;
8975+
8976+
const int64_t q_lora_rank = hparams.n_lora_q;
8977+
const int64_t kv_lora_rank = hparams.n_lora_kv;
89908978

89918979
const int64_t n_ff_exp = hparams.n_ff_exp;
89928980
const int64_t n_expert_shared = hparams.n_expert_shared;
@@ -9001,11 +8989,23 @@ static bool llm_load_tensors(
90018989
auto & layer = model.layers[i];
90028990

90038991
layer.attn_norm = create_tensor(tn(LLM_TENSOR_ATTN_NORM, "weight", i), {n_embd}, 0);
8992+
if (!is_lite) {
8993+
layer.attn_q_a_norm = create_tensor(tn(LLM_TENSOR_ATTN_Q_A_NORM, "weight", i), {q_lora_rank}, 0);
8994+
}
8995+
8996+
layer.attn_kv_a_norm = create_tensor(tn(LLM_TENSOR_ATTN_KV_A_NORM, "weight", i), {kv_lora_rank}, 0);
8997+
8998+
if (!is_lite) {
8999+
layer.wq_a = create_tensor(tn(LLM_TENSOR_ATTN_Q_A, "weight", i), {n_embd, q_lora_rank}, 0);
9000+
layer.wq_b = create_tensor(tn(LLM_TENSOR_ATTN_Q_B, "weight", i), {q_lora_rank, n_head * n_embd_head_k}, 0);
9001+
} else {
9002+
layer.wq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "weight", i), {n_embd, n_embd_k_gqa}, 0);
9003+
}
9004+
9005+
layer.wkv_a_mqa = create_tensor(tn(LLM_TENSOR_ATTN_KV_A_MQA, "weight", i), {n_embd, kv_lora_rank + (n_embd_head_qk_rope)}, 0);
9006+
layer.wkv_b = create_tensor(tn(LLM_TENSOR_ATTN_KV_B, "weight", i), {kv_lora_rank, n_head * (n_embd_head_qk_nope + n_embd_head_v)}, 0);
9007+
layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), { n_head * ( n_embd_head_v), n_embd}, 0);
90049008

9005-
layer.wq = create_tensor(tn(LLM_TENSOR_ATTN_Q, "weight", i), {n_embd, n_embd}, 0);
9006-
layer.wk = create_tensor(tn(LLM_TENSOR_ATTN_K, "weight", i), {n_embd, n_embd_gqa}, 0);
9007-
layer.wv = create_tensor(tn(LLM_TENSOR_ATTN_V, "weight", i), {n_embd, n_embd_gqa}, 0);
9008-
layer.wo = create_tensor(tn(LLM_TENSOR_ATTN_OUT, "weight", i), {n_embd, n_embd}, 0);
90099009
layer.ffn_norm = create_tensor(tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}, 0);
90109010

90119011
if (i < (int) hparams.n_layer_dense_lead) {

0 commit comments

Comments
 (0)