feat: support GLM 4.5 family of models

sammcj · sammcj · commit fb36d0a07314 · 2025-07-29T18:31:11.000+10:00
diff --git a/src/llama-model.cpp b/src/llama-model.cpp
@@ -1429,7 +1429,7 @@ void llama_model::load_hparams(llama_model_loader & ml) {
                 ml.get_key(LLM_KV_FIRST_K_DENSE_REPLACE,       hparams.first_k_dense_replace, 0);
                 
                 // Expert gating function (GLM45 uses sigmoid)
-                ml.get_key(LLM_KV_EXPERT_GATING_FUNC,          hparams.expert_gating_func, (uint32_t)LLAMA_EXPERT_GATING_FUNC_TYPE_SIGMOID);
+                ml.get_key(LLM_KV_EXPERT_GATING_FUNC,          hparams.expert_gating_func, static_cast<uint32_t>(LLAMA_EXPERT_GATING_FUNC_TYPE_SIGMOID));
                 
                 switch (hparams.n_layer) {
                     case 46: type = LLM_TYPE_12B; break;  // GLM-4.5-Air
@@ -4404,7 +4404,7 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
                         layer.ffn_norm = create_tensor(tn(LLM_TENSOR_FFN_NORM, "weight", i), {n_embd}, 0);
 
                         // Check if this layer uses MoE or dense FFN based on first_k_dense_replace
-                        const bool use_moe = (hparams.n_expert > 0) && (i >= hparams.first_k_dense_replace);
+                        const bool use_moe = (hparams.n_expert > 0) && (static_cast<uint32_t>(i) >= hparams.first_k_dense_replace);
                         
                         if (use_moe) {
                             // MoE layers
@@ -13445,7 +13445,7 @@ struct llm_build_glm4 : public llm_graph_context {
 struct llm_build_glm45 : public llm_graph_context {
     llm_build_glm45(const llama_model & model, const llm_graph_params & params) : llm_graph_context(params) {
         const int64_t n_embd_head = hparams.n_embd_head_v;
-        const int64_t n_embd_gqa  = hparams.n_embd_v_gqa();
+        // const int64_t n_embd_gqa  = hparams.n_embd_v_gqa();
 
         GGML_ASSERT(n_embd_head == hparams.n_embd_head_k);
 
@@ -13541,7 +13541,7 @@ struct llm_build_glm45 : public llm_graph_context {
             cb(cur, "ffn_norm", il);
 
             // Check if this is a dense layer (first_k_dense_replace=1, so layer 0 is dense)
-            if (il < hparams.first_k_dense_replace) {
+            if (static_cast<uint32_t>(il) < hparams.first_k_dense_replace) {
                 // Dense FFN layer
                 cur = build_ffn(cur,
                         model.layers[il].ffn_up,   NULL, NULL,