File tree Expand file tree Collapse file tree 1 file changed +30
-0
lines changed Expand file tree Collapse file tree 1 file changed +30
-0
lines changed Original file line number Diff line number Diff line change @@ -1436,6 +1436,36 @@ void llama_model::load_hparams(llama_model_loader & ml) {
14361436 default: type = LLM_TYPE_UNKNOWN;
14371437 }
14381438 } break;
1439+ case LLM_ARCH_GLM4_MOE:
1440+ {
1441+ ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
1442+ ml.get_key(LLM_KV_LEADING_DENSE_BLOCK_COUNT, hparams.n_layer_dense_lead);
1443+ ml.get_key(LLM_KV_EXPERT_FEED_FORWARD_LENGTH, hparams.n_ff_exp);
1444+ ml.get_key(LLM_KV_EXPERT_SHARED_COUNT, hparams.n_expert_shared);
1445+ ml.get_key(LLM_KV_EXPERT_WEIGHTS_SCALE, hparams.expert_weights_scale);
1446+
1447+ GGML_ASSERT(hparams.n_expert_shared == 1);
1448+ GGML_ASSERT(hparams.expert_weights_scale > 0.0);
1449+
1450+ // NOTE: currently only two models use this arch - we need to update the switch
1451+ // statement below if more are released
1452+
1453+ switch (hparams.n_expert) {
1454+ // ref: https://github.com/ggml-org/llama.cpp/pull/15026#issue-3285604563
1455+ case 128: {
1456+ type = LLM_TYPE_106B_A12B;
1457+ hparams.use_kq_norm = false;
1458+ }; break;
1459+ case 160: {
1460+ type = LLM_TYPE_355B_A32B;
1461+ hparams.use_kq_norm = true;
1462+ }; break;
1463+ default: {
1464+ type = LLM_TYPE_UNKNOWN;
1465+ hparams.use_kq_norm = false;
1466+ };
1467+ }
1468+ } break;
14391469 case LLM_ARCH_BITNET:
14401470 {
14411471 ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
You can’t perform that action at this time.
0 commit comments