Handle JAX _DimExpr as dtype. Default arange step to None. (#21688)

hertschuh · web-flow · commit 26d71665f6a0 · 2025-09-26T12:42:54.000-07:00
The pattern used to get dtypes in ops that can handle scalars is `getattr(x, "dtype", type(x))`. The `type(x)` part is used for native Python types `int` and `float`. But if `x` is derived from a dynamic dimension with JAX, it can be a `_DimExpr`. Changed `arange` to have a default `step` of `None` instead of `1`. NumPy, Torch, JAX and Keras all document that the following versions of `arange` are supported: - `arange(stop)`: generate values from 0 to stop, stepping by 1. - `arange(start, stop)`: generate values from start to stop, stepping by 1. - `arange(start, stop, step)`: generate values from start to stop, stepping by step. Note that in the case of NumPy and Torch, this is achieved via overloads and not by detecting `None` values. Regardless, the form `arange(stop, step=n)` is not officially supported. However, by having a default `step` value of `1`, we were always exercising this `arange(stop, step=1)` case when only `stop` was provided. This is causing issues with JAX is some specific contexts. By changing the default value of `step` to `None`, we're aligning with the JAX approach and we can detect when the value is not provided. - Added support for `arange(stop, step=n)` for all backends with unit tests - Fixed bug with Torch backend where `arange(stop, step=n)` would ignore the `step` value These fix a regression in KerasHub introduced by #21672
diff --git a/keras/src/backend/common/variables.py b/keras/src/backend/common/variables.py
@@ -599,8 +599,10 @@ def standardize_shape(shape):
 
     if config.backend() == "jax":
         # Replace `_DimExpr` (dimension expression) with None
+        from jax import export as jax_export
+
         shape = tuple(
-            [None if "_DimExpr" in str(type(d)) else d for d in shape]
+            None if jax_export.is_symbolic_dim(d) else d for d in shape
         )
 
     if config.backend() == "torch":
diff --git a/keras/src/backend/jax/numpy.py b/keras/src/backend/jax/numpy.py
@@ -3,6 +3,7 @@
 
 import jax.experimental.sparse as jax_sparse
 import jax.numpy as jnp
+from jax import export as jax_export
 
 from keras.src.backend import config
 from keras.src.backend.common import dtypes
@@ -306,14 +307,20 @@ def append(x1, x2, axis=None):
     return jnp.append(x1, x2, axis=axis)
 
 
-def arange(start, stop=None, step=1, dtype=None):
+def arange(start, stop=None, step=None, dtype=None):
+    def get_dtype(x):
+        if hasattr(x, "dtype"):
+            return x.dtype
+        if jax_export.is_symbolic_dim(x):
+            return int
+        return type(x)
+
     if dtype is None:
-        dtypes_to_resolve = [
-            getattr(start, "dtype", type(start)),
-            getattr(step, "dtype", type(step)),
-        ]
+        dtypes_to_resolve = [get_dtype(start)]
         if stop is not None:
-            dtypes_to_resolve.append(getattr(stop, "dtype", type(stop)))
+            dtypes_to_resolve.append(get_dtype(stop))
+        if step is not None:
+            dtypes_to_resolve.append(get_dtype(step))
         dtype = dtypes.result_type(*dtypes_to_resolve)
     dtype = standardize_dtype(dtype)
     return jnp.arange(start, stop, step=step, dtype=dtype)
diff --git a/keras/src/backend/numpy/numpy.py b/keras/src/backend/numpy/numpy.py
@@ -173,15 +173,18 @@ def append(x1, x2, axis=None):
     return np.append(x1, x2, axis=axis)
 
 
-def arange(start, stop=None, step=1, dtype=None):
+def arange(start, stop=None, step=None, dtype=None):
     if dtype is None:
-        dtypes_to_resolve = [
-            getattr(start, "dtype", type(start)),
-            getattr(step, "dtype", type(step)),
-        ]
+        dtypes_to_resolve = [getattr(start, "dtype", type(start))]
         if stop is not None:
             dtypes_to_resolve.append(getattr(stop, "dtype", type(stop)))
+        if step is not None:
+            dtypes_to_resolve.append(getattr(step, "dtype", type(step)))
         dtype = dtypes.result_type(*dtypes_to_resolve)
+    if stop is None:
+        start, stop = 0, start
+    if step is None:
+        step = 1
     return np.arange(start, stop, step=step, dtype=dtype)
 
 
diff --git a/keras/src/backend/openvino/numpy.py b/keras/src/backend/openvino/numpy.py
@@ -210,7 +210,7 @@ def append(x1, x2, axis=None):
     return OpenVINOKerasTensor(ov_opset.concat([x1, x2], axis).output(0))
 
 
-def arange(start, stop=None, step=1, dtype=None):
+def arange(start, stop=None, step=None, dtype=None):
     if stop is None:
         start, stop = get_ov_output(0), get_ov_output(start)
     else:
diff --git a/keras/src/backend/tensorflow/numpy.py b/keras/src/backend/tensorflow/numpy.py
@@ -813,16 +813,17 @@ def append(x1, x2, axis=None):
         return tf.concat([x1, x2], axis=axis)
 
 
-def arange(start, stop=None, step=1, dtype=None):
+def arange(start, stop=None, step=None, dtype=None):
     if dtype is None:
-        dtypes_to_resolve = [
-            getattr(start, "dtype", type(start)),
-            getattr(step, "dtype", type(step)),
-        ]
+        dtypes_to_resolve = [getattr(start, "dtype", type(start))]
         if stop is not None:
             dtypes_to_resolve.append(getattr(stop, "dtype", type(stop)))
+        if step is not None:
+            dtypes_to_resolve.append(getattr(step, "dtype", type(step)))
         dtype = dtypes.result_type(*dtypes_to_resolve)
     dtype = standardize_dtype(dtype)
+    if step is None:
+        step = 1
     try:
         out = tf.range(start, stop, delta=step, dtype=dtype)
     except tf.errors.NotFoundError:
diff --git a/keras/src/backend/torch/numpy.py b/keras/src/backend/torch/numpy.py
@@ -313,18 +313,19 @@ def append(x1, x2, axis=None):
     return torch.cat((x1, x2), dim=axis)
 
 
-def arange(start, stop=None, step=1, dtype=None):
+def arange(start, stop=None, step=None, dtype=None):
     if dtype is None:
-        dtypes_to_resolve = [
-            getattr(start, "dtype", type(start)),
-            getattr(step, "dtype", type(step)),
-        ]
+        dtypes_to_resolve = [getattr(start, "dtype", type(start))]
         if stop is not None:
             dtypes_to_resolve.append(getattr(stop, "dtype", type(stop)))
+        if step is not None:
+            dtypes_to_resolve.append(getattr(step, "dtype", type(step)))
         dtype = dtypes.result_type(*dtypes_to_resolve)
     dtype = to_torch_dtype(dtype)
     if stop is None:
-        return torch.arange(end=start, dtype=dtype, device=get_device())
+        start, stop = 0, start
+    if step is None:
+        step = 1
     return torch.arange(
         start, stop, step=step, dtype=dtype, device=get_device()
     )
diff --git a/keras/src/ops/numpy.py b/keras/src/ops/numpy.py
@@ -595,27 +595,28 @@ def __init__(self, dtype=None, *, name=None):
         super().__init__(name=name)
         self.dtype = None if dtype is None else backend.standardize_dtype(dtype)
 
-    def call(self, start, stop=None, step=1):
+    def call(self, start, stop=None, step=None):
         return backend.numpy.arange(start, stop, step=step, dtype=self.dtype)
 
-    def compute_output_spec(self, start, stop=None, step=1):
+    def compute_output_spec(self, start, stop=None, step=None):
         if stop is None:
             start, stop = 0, start
+        if step is None:
+            step = 1
         output_shape = [int(np.ceil((stop - start) / step))]
         dtype = self.dtype
         if dtype is None:
-            dtypes_to_resolve = [
-                getattr(start, "dtype", type(start)),
-                getattr(step, "dtype", type(step)),
-            ]
+            dtypes_to_resolve = [getattr(start, "dtype", type(start))]
             if stop is not None:
                 dtypes_to_resolve.append(getattr(stop, "dtype", type(stop)))
+            if step is not None:
+                dtypes_to_resolve.append(getattr(step, "dtype", type(step)))
             dtype = dtypes.result_type(*dtypes_to_resolve)
         return KerasTensor(output_shape, dtype=dtype)
 
 
 @keras_export(["keras.ops.arange", "keras.ops.numpy.arange"])
-def arange(start, stop=None, step=1, dtype=None):
+def arange(start, stop=None, step=None, dtype=None):
     """Return evenly spaced values within a given interval.
 
     `arange` can be called with a varying number of positional arguments:
diff --git a/keras/src/ops/numpy_test.py b/keras/src/ops/numpy_test.py
@@ -6357,8 +6357,10 @@ def test_argsort(self, dtype):
         )
 
     @parameterized.parameters(
-        (10, None, 1, None),
-        (0, 10, 1, None),
+        (10, None, None, None),  # stop
+        (2, 10, None, None),  # start, stop
+        (10, None, 2, None),  # stop, step
+        (0, 10, 2, None),  # start, stop, step
         (0, 10, 0.5, None),
         (10.0, None, 1, None),
         (0, 10.0, 1, None),