Fix: Correctly read query_pre_attn_scalar from text_config (Gemma3)

turboderp · turboderp · commit 385a5162ba00 · 2025-03-15T11:01:33.000+01:00
diff --git a/exllamav2/config.py b/exllamav2/config.py
@@ -296,8 +296,8 @@ def prepare(self, no_tensors: bool = False):
         self.num_key_value_groups = self.num_attention_heads // self.num_key_value_heads
         self.use_qk_norm = read(read_config, bool, ["use_qk_norm"], self.arch.lm.default_use_qk_norm)
 
-        self.query_pre_attn_scalar = read(read_config, float, "query_pre_attn_scalar", None)
-        self.attention_multiplier = read(read_config, float, "attention_multiplier", None)
+        self.query_pre_attn_scalar = read(read_config, float, ["query_pre_attn_scalar"], None, opt_subkey = "text_config")
+        self.attention_multiplier = read(read_config, float, ["attention_multiplier"], None, opt_subkey = "text_config")
 
         # MLP params