@@ -1562,12 +1562,16 @@ void llama_model::load_hparams(llama_model_loader & ml) {
15621562 case LLM_ARCH_DEEPSEEK2:
15631563 {
15641564 bool is_lite = (hparams.n_layer == 27 );
1565+ bool is_ocr = (name.find (" ocr" ) != std::string::npos || name.find (" OCR" ) != std::string::npos);
1566+
15651567 ml.get_key (LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps );
15661568 ml.get_key (LLM_KV_LEADING_DENSE_BLOCK_COUNT, hparams.n_layer_dense_lead );
1567- if (!is_lite) {
1569+ if (!is_lite && !is_ocr ) {
15681570 ml.get_key (LLM_KV_ATTENTION_Q_LORA_RANK, hparams.n_lora_q );
15691571 }
1570- ml.get_key (LLM_KV_ATTENTION_KV_LORA_RANK, hparams.n_lora_kv );
1572+ if (!is_ocr) {
1573+ ml.get_key (LLM_KV_ATTENTION_KV_LORA_RANK, hparams.n_lora_kv );
1574+ }
15711575 ml.get_key (LLM_KV_ATTENTION_KEY_LENGTH_MLA, hparams.n_embd_head_k_mla , false );
15721576 ml.get_key (LLM_KV_ATTENTION_VALUE_LENGTH_MLA, hparams.n_embd_head_v_mla , false );
15731577 ml.get_key (LLM_KV_EXPERT_FEED_FORWARD_LENGTH, hparams.n_ff_exp );
@@ -1583,6 +1587,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
15831587 ml.get_key (LLM_KV_ROPE_SCALING_YARN_LOG_MUL, hparams.rope_yarn_log_mul , false );
15841588
15851589 switch (hparams.n_layer ) {
1590+ case 12 : type = LLM_TYPE_3B; break ;
15861591 case 27 : type = LLM_TYPE_16B; break ;
15871592 case 60 : type = LLM_TYPE_236B; break ;
15881593 case 61 : type = LLM_TYPE_671B; break ;
@@ -4550,6 +4555,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
45504555 case LLM_ARCH_DEEPSEEK2:
45514556 {
45524557 const bool is_lite = (hparams.n_layer == 27 );
4558+ const bool is_ocr = (name.find (" ocr" ) != std::string::npos || name.find (" OCR" ) != std::string::npos);
45534559
45544560 const bool is_mla = (hparams.n_embd_head_k_mla != 0 && hparams.n_embd_head_v_mla != 0 );
45554561
@@ -4575,6 +4581,35 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
45754581 for (int i = 0 ; i < n_layer; ++i) {
45764582 auto & layer = layers[i];
45774583
4584+ if (is_ocr) {
4585+ layer.attn_norm = create_tensor (tn (LLM_TENSOR_ATTN_NORM, " weight" , i), {n_embd}, 0 );
4586+ layer.wq = create_tensor (tn (LLM_TENSOR_ATTN_Q, " weight" , i), {n_embd, n_embd}, 0 );
4587+ layer.wk = create_tensor (tn (LLM_TENSOR_ATTN_K, " weight" , i), {n_embd, n_embd}, 0 );
4588+ layer.wv = create_tensor (tn (LLM_TENSOR_ATTN_V, " weight" , i), {n_embd, n_embd}, 0 );
4589+ layer.wo = create_tensor (tn (LLM_TENSOR_ATTN_OUT, " weight" , i), {n_embd, n_embd}, 0 );
4590+ layer.ffn_norm = create_tensor (tn (LLM_TENSOR_FFN_NORM, " weight" , i), {n_embd}, 0 );
4591+
4592+ if (i < (int ) hparams.n_layer_dense_lead ) {
4593+ layer.ffn_gate = create_tensor (tn (LLM_TENSOR_FFN_GATE, " weight" , i), {n_embd, n_ff}, 0 );
4594+ layer.ffn_down = create_tensor (tn (LLM_TENSOR_FFN_DOWN, " weight" , i), { n_ff, n_embd}, 0 );
4595+ layer.ffn_up = create_tensor (tn (LLM_TENSOR_FFN_UP, " weight" , i), {n_embd, n_ff}, 0 );
4596+ }
4597+ else {
4598+ layer.ffn_gate_inp = create_tensor (tn (LLM_TENSOR_FFN_GATE_INP, " weight" , i), {n_embd, n_expert}, 0 );
4599+ layer.ffn_exp_probs_b = create_tensor (tn (LLM_TENSOR_FFN_EXP_PROBS_B, " bias" , i), {n_expert}, TENSOR_NOT_REQUIRED);
4600+ // MoE branch
4601+ layer.ffn_gate_exps = create_tensor (tn (LLM_TENSOR_FFN_GATE_EXPS, " weight" , i), { n_embd, n_ff_exp, n_expert}, 0 );
4602+ layer.ffn_down_exps = create_tensor (tn (LLM_TENSOR_FFN_DOWN_EXPS, " weight" , i), {n_ff_exp, n_embd, n_expert}, 0 );
4603+ layer.ffn_up_exps = create_tensor (tn (LLM_TENSOR_FFN_UP_EXPS, " weight" , i), { n_embd, n_ff_exp, n_expert}, 0 );
4604+ // Shared expert branch
4605+ layer.ffn_gate_shexp = create_tensor (tn (LLM_TENSOR_FFN_GATE_SHEXP, " weight" , i), {n_embd, n_ff_exp * n_expert_shared}, 0 );
4606+ layer.ffn_down_shexp = create_tensor (tn (LLM_TENSOR_FFN_DOWN_SHEXP, " weight" , i), { n_ff_exp * n_expert_shared, n_embd}, 0 );
4607+ layer.ffn_up_shexp = create_tensor (tn (LLM_TENSOR_FFN_UP_SHEXP, " weight" , i), {n_embd, n_ff_exp * n_expert_shared}, 0 );
4608+ }
4609+
4610+ continue ;
4611+ }
4612+
45784613 layer.attn_norm = create_tensor (tn (LLM_TENSOR_ATTN_NORM, " weight" , i), {n_embd}, 0 );
45794614 if (!is_lite) {
45804615 layer.attn_q_a_norm = create_tensor (tn (LLM_TENSOR_ATTN_Q_A_NORM, " weight" , i), {q_lora_rank}, 0 );
0 commit comments