keras-team
diff --git a/‎conftest.py‎
Lines changed: 0 additions & 6 deletions b/‎conftest.py‎
Lines changed: 0 additions & 6 deletions
diff --git a/‎keras/src/backend/common/dtypes.py‎
Lines changed: 5 additions & 0 deletions b/‎keras/src/backend/common/dtypes.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎keras/src/backend/common/dtypes_test.py‎
Lines changed: 15 additions & 32 deletions b/‎keras/src/backend/common/dtypes_test.py‎
Lines changed: 15 additions & 32 deletions
diff --git a/‎keras/src/backend/common/variables_test.py‎
Lines changed: 27 additions & 38 deletions b/‎keras/src/backend/common/variables_test.py‎
Lines changed: 27 additions & 38 deletions
diff --git a/‎keras/src/backend/numpy/numpy.py‎
Lines changed: 15 additions & 0 deletions b/‎keras/src/backend/numpy/numpy.py‎
Lines changed: 15 additions & 0 deletions
diff --git a/‎keras/src/backend/torch/nn.py‎
Lines changed: 1 addition & 2 deletions b/‎keras/src/backend/torch/nn.py‎
Lines changed: 1 addition & 2 deletions
diff --git a/‎keras/src/constraints/constraints.py‎
Lines changed: 9 additions & 7 deletions b/‎keras/src/constraints/constraints.py‎
Lines changed: 9 additions & 7 deletions
diff --git a/‎keras/src/initializers/constant_initializers.py‎
Lines changed: 9 additions & 5 deletions b/‎keras/src/initializers/constant_initializers.py‎
Lines changed: 9 additions & 5 deletions
diff --git a/‎keras/src/initializers/constant_initializers_test.py‎
Lines changed: 8 additions & 13 deletions b/‎keras/src/initializers/constant_initializers_test.py‎
Lines changed: 8 additions & 13 deletions
diff --git a/‎keras/src/layers/preprocessing/stft_spectrogram_test.py‎
Lines changed: 2 additions & 7 deletions b/‎keras/src/layers/preprocessing/stft_spectrogram_test.py‎
Lines changed: 2 additions & 7 deletions
@@ -1,9 +1,3 @@
-import os
-
-# When using jax.experimental.enable_x64 in unit test, we want to keep the
-# default dtype with 32 bits, aligning it with Keras's default.
-os.environ["JAX_DEFAULT_DTYPE_BITS"] = "32"
-
 try:
     # When using torch and tensorflow, torch needs to be imported first,
     # otherwise it will segfault upon import. This should force the torch
 
@@ -244,6 +244,7 @@ def _resolve_weak_type(dtype, precision="32"):
     "int64": "int32",
     "uint64": "uint32",
     "float64": "float32",
+    "complex128": "complex64",
 }
 
 
@@ -275,6 +276,10 @@ def _lattice_result_type(*args):
     precision = config.floatx()[-2:]
     if out_weak_type:
         out_dtype = _resolve_weak_type(out_dtype, precision=precision)
+
+    # Force to be 32-bit dtype when encountering 64-bit dtype.
+    # TODO(hongyu): Add a config to enable 64-bit dtypes.
+    out_dtype = BIT64_TO_BIT32_DTYPE.get(out_dtype, out_dtype)
     return out_dtype
 
 
 
@@ -12,40 +12,23 @@
 class DtypesTest(test_case.TestCase):
     """Test the dtype to verify that the behavior matches JAX."""
 
+    ALL_DTYPES = [
+        x
+        for x in dtypes.ALLOWED_DTYPES
+        if x
+        not in (
+            "string",
+            "complex128",
+            "float64",
+            "uint64",
+            "int64",
+        )
+        + dtypes.FLOAT8_TYPES  # Remove float8 dtypes for the following tests
+    ] + [None]
     if backend.backend() == "torch":
-        from keras.src.backend.torch.core import to_torch_dtype
-
-        # TODO: torch doesn't support uint64.
-        ALL_DTYPES = []
-        for x in dtypes.ALLOWED_DTYPES:
-            if x not in ["string", "uint64"]:
-                x = str(to_torch_dtype(x)).split(".")[-1]
-                if x not in ALL_DTYPES:  # skip duplicates created by remapping
-                    ALL_DTYPES.append(x)
-        ALL_DTYPES += [None]
+        ALL_DTYPES = [x for x in ALL_DTYPES if x not in ("uint16", "uint32")]
     elif backend.backend() == "openvino":
-        ALL_DTYPES = [
-            x
-            for x in dtypes.ALLOWED_DTYPES
-            if x not in ["string", "complex64", "complex128"]
-        ] + [None]
-    else:
-        ALL_DTYPES = [x for x in dtypes.ALLOWED_DTYPES if x != "string"] + [
-            None
-        ]
-    # Remove float8 dtypes for the following tests
-    ALL_DTYPES = [x for x in ALL_DTYPES if x not in dtypes.FLOAT8_TYPES]
-
-    def setUp(self):
-        from jax.experimental import enable_x64
-
-        self.jax_enable_x64 = enable_x64()
-        self.jax_enable_x64.__enter__()
-        return super().setUp()
-
-    def tearDown(self):
-        self.jax_enable_x64.__exit__(None, None, None)
-        return super().tearDown()
+        ALL_DTYPES = [x for x in ALL_DTYPES if x not in ("complex64",)]
 
     @parameterized.named_parameters(
         named_product(dtype1=ALL_DTYPES, dtype2=[bool, int, float])
 
@@ -782,47 +782,36 @@ def test_invalid_float(self):
             float(v)
 
 
-# TODO: Using uint64 will lead to weak type promotion (`float`),
-# resulting in different behavior between JAX and Keras. Currently, we
-# are skipping the test for uint64
-ALL_DTYPES = [
-    x for x in dtypes.ALLOWED_DTYPES if x not in ["string", "uint64"]
-] + [None]
-INT_DTYPES = [x for x in dtypes.INT_TYPES if x != "uint64"]
-FLOAT_DTYPES = dtypes.FLOAT_TYPES
-COMPLEX_DTYPES = ["complex32", "complex64", "complex128"]
-
-if backend.backend() == "torch":
-    # TODO: torch doesn't support uint16, uint32 and uint64, complex
-    ALL_DTYPES = [
-        x
-        for x in ALL_DTYPES
-        if x not in ["uint16", "uint32", "uint64", "complex128", "complex64"]
-    ]
-    INT_DTYPES = [
-        x for x in INT_DTYPES if x not in ["uint16", "uint32", "uint64"]
-    ]
-elif backend.backend() == "openvino":
-    # TODO: openvino doesn't support complex
-    ALL_DTYPES = [x for x in ALL_DTYPES if x not in ["complex128", "complex64"]]
-# Remove float8 dtypes for the following tests
-ALL_DTYPES = [x for x in ALL_DTYPES if x not in dtypes.FLOAT8_TYPES]
-NON_COMPLEX_DTYPES = [x for x in ALL_DTYPES if x and x not in COMPLEX_DTYPES]
-
-
 class VariableOpsDTypeTest(test_case.TestCase):
     """Test the dtype to verify that the behavior matches JAX."""
 
-    def setUp(self):
-        from jax.experimental import enable_x64
-
-        self.jax_enable_x64 = enable_x64()
-        self.jax_enable_x64.__enter__()
-        return super().setUp()
-
-    def tearDown(self):
-        self.jax_enable_x64.__exit__(None, None, None)
-        return super().tearDown()
+    ALL_DTYPES = [
+        x
+        for x in dtypes.ALLOWED_DTYPES
+        if x
+        not in (
+            "string",
+            "complex128",
+            # Remove 64-bit dtypes.
+            "float64",
+            "uint64",
+            "int64",
+        )
+        + dtypes.FLOAT8_TYPES  # Remove float8 dtypes for the following tests
+    ] + [None]
+    INT_DTYPES = [x for x in dtypes.INT_TYPES if x not in ("uint64", "int64")]
+    FLOAT_DTYPES = [x for x in dtypes.FLOAT_TYPES if x not in ("float64",)]
+    COMPLEX_DTYPES = ["complex32", "complex64"]
+    if backend.backend() == "torch":
+        ALL_DTYPES = [
+            x for x in ALL_DTYPES if x not in ("uint16", "uint32", "complex64")
+        ]
+        INT_DTYPES = [x for x in INT_DTYPES if x not in ("uint16", "uint32")]
+    elif backend.backend() == "openvino":
+        ALL_DTYPES = [x for x in ALL_DTYPES if x not in ("complex64",)]
+    NON_COMPLEX_DTYPES = [
+        x for x in ALL_DTYPES if x and x not in ["complex32", "complex64"]
+    ]
 
     @parameterized.named_parameters(
         named_product(dtypes=itertools.combinations(ALL_DTYPES, 2))
 
@@ -372,6 +372,9 @@ def bincount_fn(arr_w):
 def bitwise_and(x, y):
     x = convert_to_tensor(x)
     y = convert_to_tensor(y)
+    dtype = dtypes.result_type(x.dtype, y.dtype)
+    x = x.astype(dtype)
+    y = y.astype(dtype)
     return np.bitwise_and(x, y)
 
 
@@ -387,19 +390,28 @@ def bitwise_not(x):
 def bitwise_or(x, y):
     x = convert_to_tensor(x)
     y = convert_to_tensor(y)
+    dtype = dtypes.result_type(x.dtype, y.dtype)
+    x = x.astype(dtype)
+    y = y.astype(dtype)
     return np.bitwise_or(x, y)
 
 
 def bitwise_xor(x, y):
     x = convert_to_tensor(x)
     y = convert_to_tensor(y)
+    dtype = dtypes.result_type(x.dtype, y.dtype)
+    x = x.astype(dtype)
+    y = y.astype(dtype)
     return np.bitwise_xor(x, y)
 
 
 def bitwise_left_shift(x, y):
     x = convert_to_tensor(x)
     if not isinstance(y, int):
         y = convert_to_tensor(y)
+        dtype = dtypes.result_type(x.dtype, y.dtype)
+        x = x.astype(dtype)
+        y = y.astype(dtype)
     return np.left_shift(x, y)
 
 
@@ -411,6 +423,9 @@ def bitwise_right_shift(x, y):
     x = convert_to_tensor(x)
     if not isinstance(y, int):
         y = convert_to_tensor(y)
+        dtype = dtypes.result_type(x.dtype, y.dtype)
+        x = x.astype(dtype)
+        y = y.astype(dtype)
     return np.right_shift(x, y)
 
 
 
@@ -9,7 +9,6 @@
 from keras.src.backend.torch.core import convert_to_tensor
 from keras.src.backend.torch.core import get_device
 from keras.src.backend.torch.numpy import expand_dims
-from keras.src.backend.torch.numpy import maximum
 from keras.src.backend.torch.numpy import where
 from keras.src.utils.argument_validation import standardize_tuple
 
@@ -668,7 +667,7 @@ def one_hot(x, num_classes, axis=-1, dtype=None, sparse=False):
     # manual handling for negatives in the input to one_hot by using max(x, 0).
     # The output will have some invalid results, so we set them back to 0 using
     # `where` afterwards.
-    output = tnn.one_hot(maximum(x, 0), num_classes)
+    output = tnn.one_hot(torch.clamp(x, min=0), num_classes)
     output = where(expand_dims(x, axis=-1) >= 0, output, zero)
     output = convert_to_tensor(output, dtype=dtype)
     dims = output.dim()
 
@@ -110,7 +110,9 @@ def __call__(self, w):
         w = backend.convert_to_tensor(w)
         norms = ops.sqrt(ops.sum(ops.square(w), axis=self.axis, keepdims=True))
         desired = ops.clip(norms, 0, self.max_value)
-        return w * (desired / (backend.epsilon() + norms))
+        return ops.cast(w, norms.dtype) * (
+            desired / (backend.epsilon() + norms)
+        )
 
     def get_config(self):
         return {"max_value": self.max_value, "axis": self.axis}
@@ -122,7 +124,7 @@ class NonNeg(Constraint):
 
     def __call__(self, w):
         w = backend.convert_to_tensor(w)
-        return w * ops.cast(ops.greater_equal(w, 0.0), dtype=w.dtype)
+        return ops.multiply(w, ops.greater_equal(w, 0.0))
 
 
 @keras_export(["keras.constraints.UnitNorm", "keras.constraints.unit_norm"])
@@ -148,10 +150,8 @@ def __init__(self, axis=0):
 
     def __call__(self, w):
         w = backend.convert_to_tensor(w)
-        return w / (
-            backend.epsilon()
-            + ops.sqrt(ops.sum(ops.square(w), axis=self.axis, keepdims=True))
-        )
+        norms = ops.sqrt(ops.sum(ops.square(w), axis=self.axis, keepdims=True))
+        return ops.cast(w, norms.dtype) / (backend.epsilon() + norms)
 
     def get_config(self):
         return {"axis": self.axis}
@@ -202,7 +202,9 @@ def __call__(self, w):
             self.rate * ops.clip(norms, self.min_value, self.max_value)
             + (1 - self.rate) * norms
         )
-        return w * (desired / (backend.epsilon() + norms))
+        return ops.cast(w, norms.dtype) * (
+            desired / (backend.epsilon() + norms)
+        )
 
     def get_config(self):
         return {
 
@@ -253,22 +253,26 @@ def __call__(self, shape, dtype=None):
                 scaling = ops.sum(ops.abs(win))
 
         _fft_length = (fft_length - 1) * 2
-        freq = (
-            ops.reshape(ops.arange(fft_length, dtype=dtype), (1, 1, fft_length))
-            / _fft_length
+        freq = ops.divide(
+            ops.reshape(
+                ops.arange(fft_length, dtype=dtype), (1, 1, fft_length)
+            ),
+            _fft_length,
         )
         time = ops.reshape(
             ops.arange(frame_length, dtype=dtype), (frame_length, 1, 1)
         )
-        args = -2 * time * freq * ops.arccos(ops.cast(-1, dtype))
+        args = ops.multiply(ops.multiply(-2, time), freq) * ops.arccos(
+            ops.cast(-1, dtype)
+        )
 
         if self.side == "real":
             kernel = ops.cast(ops.cos(args), dtype)
         else:
             kernel = ops.cast(ops.sin(args), dtype)
 
         if win is not None:
-            kernel = kernel * win / scaling
+            kernel = ops.divide(ops.multiply(kernel, win), scaling)
         return kernel
 
     def get_config(self):
 
@@ -80,14 +80,9 @@ def test_stft_initializer(self):
         shape = (256, 1, 513)
         time_range = np.arange(256).reshape((-1, 1, 1))
         freq_range = (np.arange(513) / 1024.0).reshape((1, 1, -1))
-        pi = np.arccos(np.float64(-1))
+        pi = np.arccos(np.float32(-1))
         args = -2 * pi * time_range * freq_range
-
-        tol_kwargs = {}
-        if backend.backend() == "jax":
-            # TODO(mostafa-mahmoud): investigate the cases
-            # of non-small error in jax and torch
-            tol_kwargs = {"atol": 1e-4, "rtol": 1e-6}
+        tol_kwargs = {"atol": 1e-4, "rtol": 1e-6}
 
         initializer = initializers.STFT("real", None)
         values = backend.convert_to_numpy(initializer(shape))
@@ -101,8 +96,8 @@ def test_stft_initializer(self):
             True,
         )
         window = scipy.signal.windows.get_window("hamming", 256, True)
-        window = window.astype("float64").reshape((-1, 1, 1))
-        values = backend.convert_to_numpy(initializer(shape, "float64"))
+        window = window.astype("float32").reshape((-1, 1, 1))
+        values = backend.convert_to_numpy(initializer(shape, "float32"))
         self.assertAllClose(np.cos(args) * window, values, **tol_kwargs)
         self.run_class_serialization_test(initializer)
 
@@ -113,9 +108,9 @@ def test_stft_initializer(self):
             False,
         )
         window = scipy.signal.windows.get_window("tukey", 256, False)
-        window = window.astype("float64").reshape((-1, 1, 1))
+        window = window.astype("float32").reshape((-1, 1, 1))
         window = window / np.sqrt(np.sum(window**2))
-        values = backend.convert_to_numpy(initializer(shape, "float64"))
+        values = backend.convert_to_numpy(initializer(shape, "float32"))
         self.assertAllClose(np.sin(args) * window, values, **tol_kwargs)
         self.run_class_serialization_test(initializer)
 
@@ -125,9 +120,9 @@ def test_stft_initializer(self):
             "spectrum",
         )
         window = np.arange(1, 257)
-        window = window.astype("float64").reshape((-1, 1, 1))
+        window = window.astype("float32").reshape((-1, 1, 1))
         window = window / np.sum(window)
-        values = backend.convert_to_numpy(initializer(shape, "float64"))
+        values = backend.convert_to_numpy(initializer(shape, "float32"))
         self.assertAllClose(np.sin(args) * window, values, **tol_kwargs)
         self.run_class_serialization_test(initializer)
 
 
@@ -11,7 +11,7 @@
 
 
 class TestSpectrogram(testing.TestCase):
-    DTYPE = "float32" if backend.backend() == "torch" else "float64"
+    DTYPE = "float32"
 
     @staticmethod
     def _calc_spectrograms(
@@ -340,12 +340,7 @@ def test_spectrogram_error(self):
             mask |= np.isclose(np.cos(y), np.cos(y_true), **tol_kwargs)
             mask |= np.isclose(np.sin(y), np.sin(y_true), **tol_kwargs)
 
-            if backend.backend() == "tensorflow":
-                self.assertTrue(np.all(mask))
-            else:
-                # TODO(mostafa-mahmoud): investigate the rare cases
-                # of non-small error in jax and torch
-                self.assertLess(np.mean(~mask), 2e-4)
+            self.assertLess(np.mean(~mask), 2e-4)
 
     @pytest.mark.skipif(
         backend.backend() != "tensorflow",