Merge branch 'xsn/gemma3_text' into xsn/gemma3_vision

ngxson · ngxson · commit c02e67e63131 · 2025-03-12T08:04:04.000+01:00
diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py
@@ -3368,7 +3368,7 @@ def set_gguf_parameters(self):
         self.gguf_writer.add_file_type(self.ftype)
         self.gguf_writer.add_rope_freq_base(hparams.get("rope_theta", 1_000_000.0)) # for global layers
         # both attn_logit_softcapping and final_logit_softcapping are removed in Gemma3
-        assert hparams.get("attn_logit_softcapping")  is None
+        assert hparams.get("attn_logit_softcapping") is None
         assert hparams.get("final_logit_softcapping") is None
         self.gguf_writer.add_sliding_window(hparams["sliding_window"])
         self.gguf_writer.add_head_count_kv(hparams.get("num_key_value_heads", 4))
diff --git a/src/llama-model.cpp b/src/llama-model.cpp
@@ -9,6 +9,7 @@
 #include <algorithm>
 #include <cassert>
 #include <cstring>
+#include <cmath>
 #include <functional>
 #include <map>
 #include <sstream>
@@ -878,8 +879,8 @@ void llama_model::load_hparams(llama_model_loader & ml) {
                 }
 
                 hparams.f_attention_scale = type == LLM_TYPE_27B
-                    ? 1.0f / sqrtf(float(hparams.n_embd / hparams.n_head(0)))
-                    : 1.0f / sqrtf(float(hparams.n_embd_head_k));
+                    ? 1.0f / std::sqrtf(float(hparams.n_embd / hparams.n_head(0)))
+                    : 1.0f / std::sqrtf(float(hparams.n_embd_head_k));
             } break;
         case LLM_ARCH_STARCODER2:
             {