Patch release commits for 3.13.1 (#22005)

sachinprasadhs · hertschuh · web-flow · commit 8914427b7fa9 · 2026-01-13T17:42:16.000-08:00
* Remove NumPy warning with NumPy >= 2. (#21949) Merely importing keras currently triggers this warning with NumPy 2. ``` keras/src/export/tf2onnx_lib.py:8: FutureWarning: In the future `np.object` will be defined as the corresponding NumPy scalar. ``` Only patch NumPy if and when needed. * Fix CUDNN flash attention for JAX > 0.6.2. (#21970) The signature of `check_is_flash_attention` changed with JAX 0.7.0. In addition to `query` and `key`, a positional argument of `value` was added. This was not caught as our kokoro tests use JAX 0.6.2 because it's the last version that supports Python 3.10. This change was tested here: #21957 * Do no always make batch size dynamic during export. (#21944) This is a follow-up of #21674 This PR changed the signature of `make_tf_tensor_spec` from `(x)` to `(x, dynamic_batch=True)`, thereby adding the ability to make the batch size dynamic. This PR also adds `_get_save_spec(self, dynamic_batch=True)` which uses `make_tf_tensor_spec` and forwards the `dynamic_batch` argument. However, the default before this change for other export (SavedModel, ONNX) was to keep the batch size untouched. In particular, when a user manually provides an `input_signature` to [`ExportArchive.add_endpoint`](https://github.com/keras-team/keras/blob/master/keras/src/export/saved_model.py#L362), we should honor. The user controls whether the batch size is dynamic or not in the `input_signature`. This PR changes the default of `make_tf_tensor_spec` back to `dynamic_batch=False` to revert SavedModel and ONNX exports to the previous behavior. Also removed call to `return super()._get_save_spec(dynamic_batch)` which can never succeed as `TFLayer` is a top level class (ignoring the auto-tracking stuff). * Cherry pick & patch release --------- Co-authored-by: hertschuh <1091026+hertschuh@users.noreply.github.com>
diff --git a/keras/src/backend/jax/nn.py b/keras/src/backend/jax/nn.py
@@ -1471,25 +1471,42 @@ def _can_use_flash_attention(query, key, value, bias, raise_error=False):
         # Only support at least Ampere
         if not check_compute_capability("8.0"):
             raise RuntimeError("Require at least Ampere arch to run")
-        # Check inputs layout
+
+        # Inspect inputs of `check_layout`
         check_layout_params = list(
             inspect.signature(check_layout).parameters.keys()
         )
         for known_param in ("query", "key", "value", "bias", "layout"):
             check_layout_params.remove(known_param)
         # Defaults to `None` when not specified.
-        kwargs = {key: None for key in check_layout_params}
+        check_layout_kwargs = {key: None for key in check_layout_params}
         check_layout(
-            query, key, value, bias, layout=_normalize_layout("BTNH"), **kwargs
-        )
-        check_is_flash_attention(
             query,
             key,
-            _normalize_layout("BTNH"),
-            cudnn_version,
-            bias is not None,
-            is_training=False,
+            value,
+            bias,
+            layout=_normalize_layout("BTNH"),
+            **check_layout_kwargs,
         )
+
+        # Inspect inputs of `check_is_flash_attention`
+        check_is_flash_attention_params = inspect.signature(
+            check_is_flash_attention
+        ).parameters
+        check_is_flash_attention_kwargs = {
+            "query": query,
+            "key": key,
+            "value": value,
+            "layout": _normalize_layout("BTNH"),
+            "cudnn_version": cudnn_version,
+            "has_bias": bias is not None,
+            "is_training": False,
+        }
+        # Remove unsupported arguments
+        for param in list(check_is_flash_attention_kwargs.keys()):
+            if param not in check_is_flash_attention_params:
+                check_is_flash_attention_kwargs.pop(param)
+        check_is_flash_attention(**check_is_flash_attention_kwargs)
         return True
     except:
         if raise_error:
diff --git a/keras/src/backend/tensorflow/layer.py b/keras/src/backend/tensorflow/layer.py
@@ -94,22 +94,18 @@ def _get_save_spec(self, dynamic_batch=True):
             A TensorSpec, list or dict mirroring the model inputs, or
             `None` when specs cannot be inferred.
         """
-        # Prefer the base implementation if available
-        try:
-            return super()._get_save_spec(dynamic_batch)
-        except AttributeError:
-            # Lazy import to avoid circular dependency
-            from keras.src.export.export_utils import make_tf_tensor_spec
-
-            # Fall back to building specs from `self.inputs`
-            inputs = getattr(self, "inputs", None)
-            if inputs is None:
-                return None
-
-            return tree.map_structure(
-                lambda x: make_tf_tensor_spec(x, dynamic_batch=dynamic_batch),
-                inputs,
-            )
+        # Lazy import to avoid circular dependency
+        from keras.src.export.export_utils import make_tf_tensor_spec
+
+        # Fall back to building specs from `self.inputs`
+        inputs = getattr(self, "inputs", None)
+        if inputs is None:
+            return None
+
+        return tree.map_structure(
+            lambda x: make_tf_tensor_spec(x, dynamic_batch=dynamic_batch),
+            inputs,
+        )
 
     @property
     def _default_save_signature(self):
diff --git a/keras/src/export/export_utils.py b/keras/src/export/export_utils.py
@@ -102,7 +102,7 @@ def make_input_spec(x):
     return input_spec
 
 
-def make_tf_tensor_spec(x, dynamic_batch=True):
+def make_tf_tensor_spec(x, dynamic_batch=False):
     """Create a TensorSpec from various input types.
 
     Args:
diff --git a/keras/src/export/tf2onnx_lib.py b/keras/src/export/tf2onnx_lib.py
@@ -5,9 +5,6 @@
 
 import numpy as np
 
-if not hasattr(np, "object"):
-    np.object = object
-
 
 @functools.lru_cache()
 def patch_tf2onnx():
@@ -20,6 +17,9 @@ def patch_tf2onnx():
 
     logger = logging.getLogger(tf2onnx.__name__)
 
+    if not hasattr(np, "object"):
+        np.object = object
+
     def patched_rewrite_constant_fold(g, ops):
         """
         We call tensorflow transform with constant folding but in some cases
diff --git a/keras/src/version.py b/keras/src/version.py
@@ -1,7 +1,7 @@
 from keras.src.api_export import keras_export
 
 # Unique source of truth for the version number.
-__version__ = "3.13.0"
+__version__ = "3.13.1"
 
 
 @keras_export("keras.version")