Skip to content

Commit d85099e

Browse files
committed
support loading GLM4 hparams
1 parent 64fbb24 commit d85099e

File tree

1 file changed

+30
-0
lines changed

1 file changed

+30
-0
lines changed

src/llama-model.cpp

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1436,6 +1436,36 @@ void llama_model::load_hparams(llama_model_loader & ml) {
14361436
default: type = LLM_TYPE_UNKNOWN;
14371437
}
14381438
} break;
1439+
case LLM_ARCH_GLM4_MOE:
1440+
{
1441+
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
1442+
ml.get_key(LLM_KV_LEADING_DENSE_BLOCK_COUNT, hparams.n_layer_dense_lead);
1443+
ml.get_key(LLM_KV_EXPERT_FEED_FORWARD_LENGTH, hparams.n_ff_exp);
1444+
ml.get_key(LLM_KV_EXPERT_SHARED_COUNT, hparams.n_expert_shared);
1445+
ml.get_key(LLM_KV_EXPERT_WEIGHTS_SCALE, hparams.expert_weights_scale);
1446+
1447+
GGML_ASSERT(hparams.n_expert_shared == 1);
1448+
GGML_ASSERT(hparams.expert_weights_scale > 0.0);
1449+
1450+
// NOTE: currently only two models use this arch - we need to update the switch
1451+
// statement below if more are released
1452+
1453+
switch (hparams.n_expert) {
1454+
// ref: https://github.com/ggml-org/llama.cpp/pull/15026#issue-3285604563
1455+
case 128: {
1456+
type = LLM_TYPE_106B_A12B;
1457+
hparams.use_kq_norm = false;
1458+
}; break;
1459+
case 160: {
1460+
type = LLM_TYPE_355B_A32B;
1461+
hparams.use_kq_norm = true;
1462+
}; break;
1463+
default: {
1464+
type = LLM_TYPE_UNKNOWN;
1465+
hparams.use_kq_norm = false;
1466+
};
1467+
}
1468+
} break;
14391469
case LLM_ARCH_BITNET:
14401470
{
14411471
ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);

0 commit comments

Comments
 (0)