Gemma 3 fix and patch release. (#2520)

sachinprasadhs · divyashreepathihalli · web-flow · commit 4a41006a039e · 2026-01-13T12:01:52.000-08:00
* Fix overflow issue in Gemma3 float16 (#2519) * fix gemma3 decoder block overflow issue * Fix overflow issue in float16 * code reformat * patch release --------- Co-authored-by: Divyashree Sreepathihalli <divyashreepathihalli@gmail.com>
diff --git a/keras_hub/src/models/gemma3/gemma3_decoder_block.py b/keras_hub/src/models/gemma3/gemma3_decoder_block.py
@@ -251,6 +251,11 @@ def call(
         cache_update_mask=None,
     ):
         # Note: `vision_mask` is used only for Gemma3.
+        # If float16, we clamp the input to avoid overflow.
+        is_float16 = keras.backend.standardize_dtype(x.dtype) == "float16"
+        if is_float16:
+            x = ops.clip(x, -65504, 65504)
+
         normalized_x = self.pre_attention_norm(x)
         attention_mask = self._compute_attention_mask(
             normalized_x, padding_mask, vision_mask, cache, cache_update_index
@@ -275,7 +280,15 @@ def call(
         if self.dropout:
             attention = self.attention_dropout(attention)
 
-        attention_x = x + attention
+        if is_float16:
+            attention_x = ops.add(
+                ops.cast(x, "float32"), ops.cast(attention, "float32")
+            )
+            attention_x = ops.clip(attention_x, -65504, 65504)
+            attention_x = ops.cast(attention_x, "float16")
+        else:
+            attention_x = x + attention
+
         normalized_x = self.pre_ffw_norm(attention_x)
 
         x1 = self.gating_ffw(normalized_x)
@@ -286,7 +299,14 @@ def call(
         if self.use_post_ffw_norm:
             x = self.post_ffw_norm(x)
 
-        x = x + attention_x
+        if is_float16:
+            x = ops.add(
+                ops.cast(x, "float32"), ops.cast(attention_x, "float32")
+            )
+            x = ops.clip(x, -65504, 65504)
+            x = ops.cast(x, "float16")
+        else:
+            x = x + attention_x
 
         if cache is not None:
             return x, new_cache
diff --git a/keras_hub/src/version.py b/keras_hub/src/version.py
@@ -1,7 +1,7 @@
 from keras_hub.src.api_export import keras_hub_export
 
 # Unique source of truth for the version number.
-__version__ = "0.25.0"
+__version__ = "0.25.1"
 
 
 @keras_hub_export("keras_hub.version")