Skip to content

Commit 23bc779

Browse files
authored
model : detect GigaChat3-10-A1.8B as deepseek lite (ggml-org#17420)
* Detect GigaChat3-10-A1.8B as deepseek lite Hardcodes checking number of layers to detect if lite version of deepseek. * Add commnent identifying deepseek lite variants deepseek lite variants include DeepSeek-V2-Lite, GigaChat3-10B-A1.8B
1 parent 28175f8 commit 23bc779

File tree

2 files changed

+6
-3
lines changed

2 files changed

+6
-3
lines changed

src/llama-model.cpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1593,7 +1593,8 @@ void llama_model::load_hparams(llama_model_loader & ml) {
15931593
} break;
15941594
case LLM_ARCH_DEEPSEEK2:
15951595
{
1596-
bool is_lite = (hparams.n_layer == 27);
1596+
// lite variants include DeepSeek-V2-Lite, GigaChat3-10B-A1.8B
1597+
bool is_lite = (hparams.n_layer == 27 || hparams.n_layer == 26);
15971598
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
15981599
ml.get_key(LLM_KV_LEADING_DENSE_BLOCK_COUNT, hparams.n_layer_dense_lead);
15991600
if (!is_lite) {
@@ -4581,7 +4582,8 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
45814582
} break;
45824583
case LLM_ARCH_DEEPSEEK2:
45834584
{
4584-
const bool is_lite = (hparams.n_layer == 27);
4585+
// lite variants include DeepSeek-V2-Lite, GigaChat3-10B-A1.8B
4586+
const bool is_lite = (hparams.n_layer == 27 || hparams.n_layer == 26);
45854587

45864588
const bool is_mla = (hparams.n_embd_head_k_mla != 0 && hparams.n_embd_head_v_mla != 0);
45874589

src/models/deepseek2.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44

55
llm_build_deepseek2::llm_build_deepseek2(const llama_model & model, const llm_graph_params & params) :
66
llm_graph_context(params) {
7-
bool is_lite = (hparams.n_layer == 27);
7+
// lite variants include DeepSeek-V2-Lite, GigaChat3-10B-A1.8B
8+
bool is_lite = (hparams.n_layer == 27 || hparams.n_layer == 26);
89

910
const bool is_mla = (hparams.n_embd_head_k_mla != 0 && hparams.n_embd_head_v_mla != 0);
1011

0 commit comments

Comments
 (0)