keras-team
diff --git a/‎.github/workflows/actions.yml
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/actions.yml
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/config/mlx/keras.json
Lines changed: 7 additions & 0 deletions b/‎.github/workflows/config/mlx/keras.json
Lines changed: 7 additions & 0 deletions
diff --git a/‎integration_tests/import_test.py
Lines changed: 1 addition & 0 deletions b/‎integration_tests/import_test.py
Lines changed: 1 addition & 0 deletions
diff --git a/‎keras/backend/common/variables.py
Lines changed: 3 additions & 1 deletion b/‎keras/backend/common/variables.py
Lines changed: 3 additions & 1 deletion
diff --git a/‎keras/backend/mlx/__init__.py
Lines changed: 25 additions & 0 deletions b/‎keras/backend/mlx/__init__.py
Lines changed: 25 additions & 0 deletions
diff --git a/‎keras/backend/mlx/core.py
Lines changed: 290 additions & 0 deletions b/‎keras/backend/mlx/core.py
Lines changed: 290 additions & 0 deletions
@@ -16,7 +16,7 @@ jobs:
       fail-fast: false
       matrix:
         python-version: [3.9]
-        backend: [tensorflow, jax, torch, numpy]
+        backend: [tensorflow, jax, torch, numpy, mlx]
     name: Run tests
     runs-on: ubuntu-latest
     env:
 
@@ -0,0 +1,7 @@
+{
+    "floatx": "float32",
+    "epsilon": 1e-07,
+    "backend": "mlx",
+    "image_data_format": "channels_last"
+}
+
@@ -8,6 +8,7 @@
     "tensorflow": "tensorflow",
     "torch": "torch torchvision",
     "jax": "jax jaxlib",
+    "mlx": "mlx",
 }
 
 
 
@@ -387,7 +387,9 @@ def standardize_dtype(dtype):
     if hasattr(dtype, "name"):
         dtype = dtype.name
     elif hasattr(dtype, "__str__") and (
-        "torch" in str(dtype) or "jax.numpy" in str(dtype)
+        "torch" in str(dtype)
+        or "jax.numpy" in str(dtype)
+        or "mlx" in str(dtype)
     ):
         dtype = str(dtype).split(".")[-1]
     elif hasattr(dtype, "__name__"):
 
@@ -0,0 +1,25 @@
+"""MLX backend APIs."""
+
+from keras.backend.mlx import core
+from keras.backend.mlx import image
+from keras.backend.mlx import math
+from keras.backend.mlx import nn
+from keras.backend.mlx import numpy
+from keras.backend.mlx import random
+from keras.backend.mlx.core import SUPPORTS_SPARSE_TENSORS
+from keras.backend.mlx.core import Variable
+from keras.backend.mlx.core import cast
+from keras.backend.mlx.core import compute_output_spec
+from keras.backend.mlx.core import cond
+from keras.backend.mlx.core import convert_to_numpy
+from keras.backend.mlx.core import convert_to_tensor
+from keras.backend.mlx.core import is_tensor
+from keras.backend.mlx.core import scatter
+from keras.backend.mlx.core import shape
+from keras.backend.mlx.core import stop_gradient
+from keras.backend.mlx.core import to_mlx_dtype
+from keras.backend.mlx.core import vectorized_map
+from keras.backend.mlx.rnn import cudnn_ok
+from keras.backend.mlx.rnn import gru
+from keras.backend.mlx.rnn import lstm
+from keras.backend.mlx.rnn import rnn
@@ -0,0 +1,290 @@
+import mlx.core as mx
+import numpy as np
+import tree
+
+from keras.backend.common import KerasVariable
+from keras.backend.common import standardize_dtype
+from keras.backend.common.keras_tensor import KerasTensor
+from keras.backend.common.stateless_scope import StatelessScope
+from keras.utils.nest import pack_sequence_as
+
+SUPPORTS_SPARSE_TENSORS = False
+
+MLX_DTYPES = {
+    "float16": mx.float16,
+    "float32": mx.float32,
+    "float64": None,  # mlx does not support float64
+    "uint8": mx.uint8,
+    "uint16": mx.uint16,
+    "uint32": mx.uint32,
+    "uint64": mx.uint64,
+    "int8": mx.int8,
+    "int16": mx.int16,
+    "int32": mx.int32,
+    "int64": mx.int64,
+    "bfloat16": mx.bfloat16,
+    "bool": mx.bool_,
+}
+
+
+def to_mlx_dtype(dtype):
+    if isinstance(dtype, mx.Dtype):
+        return dtype
+    standardized_dtype = MLX_DTYPES.get(standardize_dtype(dtype), None)
+    if standardized_dtype is None:
+        raise ValueError(f"Unsupported dtype for MLX: {dtype}")
+    return standardized_dtype
+
+
+class Variable(KerasVariable):
+    def _initialize(self, value):
+        self._value = convert_to_tensor(value, dtype=self._dtype)
+
+    def _direct_assign(self, value):
+        self._value = value
+
+    def _convert_to_tensor(self, value, dtype=None):
+        return convert_to_tensor(value, dtype=dtype)
+
+    def __mlx_array__(self):
+        return self.value
+
+    def __array__(self, dtype=None):
+        value = convert_to_numpy(self._value)
+        if dtype:
+            return value.astype(dtype)
+        return value
+
+
+def convert_to_tensor(x, dtype=None, sparse=None):
+    if sparse:
+        raise ValueError("`sparse=True` is not supported with mlx backend")
+    mlx_dtype = to_mlx_dtype(dtype) if dtype is not None else None
+
+    if is_tensor(x):
+        if dtype is None:
+            return x
+        return x.astype(mlx_dtype)
+
+    if isinstance(x, Variable):
+        if dtype and standardize_dtype(dtype) != x.dtype:
+            return x.value.astype(mlx_dtype)
+        return x.value
+
+    if isinstance(x, np.ndarray):
+        if x.dtype == np.int64:
+            x = x.astype(np.int32)
+        x = x.astype(standardize_dtype(x.dtype))
+        return mx.array(x, dtype=mlx_dtype)
+
+    if isinstance(x, list):
+
+        def to_scalar_list(x):
+            if isinstance(x, list):
+                return [to_scalar_list(xi) for xi in x]
+            elif isinstance(x, mx.array):
+                if x.ndim == 0:
+                    return x.item()
+                else:
+                    return x.tolist()
+            else:
+                return x
+
+        return mx.array(to_scalar_list(x), dtype=mlx_dtype)
+
+    return mx.array(x, dtype=mlx_dtype)
+
+
+def convert_to_tensors(*xs):
+    ys = [None] * len(xs)
+    dtype = None
+    for i, x in enumerate(xs):
+        if not isinstance(x, (int, float, bool)):
+            ys[i] = convert_to_tensor(x)
+            dtype = ys[i].dtype
+    # Floating point wins so scalars promote to dtype
+    if dtype in (mx.float32, mx.float16, mx.bfloat16):
+        for i, x in enumerate(xs):
+            if ys[i] is None:
+                ys[i] = mx.array(x, dtype=dtype)
+    # Bool loses against everything so scalars keep their type
+    elif dtype == mx.bool_:
+        for i, x in enumerate(xs):
+            if ys[i] is None:
+                ys[i] = mx.array(x)
+    # Integral types keep their type except if the scalar is a float
+    else:
+        for i, x in enumerate(xs):
+            if ys[i] is None:
+                if isinstance(x, float):
+                    ys[i] = mx.array(x)
+                else:
+                    ys[i] = mx.array(x, dtype=dtype)
+
+    return ys
+
+
+def convert_to_numpy(x):
+    # Performs a copy. If we want 0-copy we can pass copy=False
+    return np.array(x)
+
+
+def is_tensor(x):
+    return isinstance(x, mx.array)
+
+
+def shape(x):
+    return tuple(x.shape)
+
+
+def cast(x, dtype):
+    return convert_to_tensor(x, dtype=dtype)
+
+
+# Shape / dtype inference util
+def compute_output_spec(fn, *args, **kwargs):
+    def has_none_shape(x):
+        """Check for if a `KerasTensor` has dynamic shape."""
+        if isinstance(x, KerasTensor):
+            return None in x.shape
+        return False
+
+    def convert_keras_tensor_to_mlx(x, fill_value=None):
+        """Convert `KerasTensor`s to `mlx.array`s."""
+        if isinstance(x, KerasTensor):
+            shape = list(x.shape)
+            if fill_value:
+                for i, e in enumerate(shape):
+                    if e is None:
+                        shape[i] = fill_value
+            return mx.ones(shape, dtype=MLX_DTYPES[x.dtype])
+        return x
+
+    def convert_mlx_to_keras_tensor(x):
+        """Convert `mlx.array`s to `KerasTensor`s."""
+        if is_tensor(x):
+            return KerasTensor(x.shape, standardize_dtype(x.dtype))
+        return x
+
+    def symbolic_call(fn, args, kwargs, fill_value):
+        """Call `fn` to infer output shape and dtype."""
+        arr_args, arr_kwargs = tree.map_structure(
+            lambda x: convert_keras_tensor_to_mlx(x, fill_value),
+            (args, kwargs),
+        )
+        return fn(*arr_args, **arr_kwargs)
+
+    with StatelessScope():
+        outputs = symbolic_call(fn, args, kwargs, fill_value=83)
+
+        none_in_shape = any(map(has_none_shape, tree.flatten((args, kwargs))))
+        if none_in_shape:
+            outputs_1 = outputs
+            outputs_2 = symbolic_call(fn, args, kwargs, fill_value=89)
+
+            flat_out_1 = tree.flatten(outputs_1)
+            flat_out_2 = tree.flatten(outputs_2)
+
+            flat_out = []
+            for x1, x2 in zip(flat_out_1, flat_out_2):
+                shape = list(x1.shape)
+                for i, e in enumerate(x2.shape):
+                    if e != shape[i]:
+                        shape[i] = None
+                flat_out.append(KerasTensor(shape, standardize_dtype(x1.dtype)))
+            outputs = pack_sequence_as(outputs_1, flat_out)
+
+        output_spec = tree.map_structure(convert_mlx_to_keras_tensor, outputs)
+    return output_spec
+
+
+def cond(pred, true_fn, false_fn):
+    # TODO: How should we avoid evaluating pred in case we are tracing?
+    if pred:
+        return true_fn()
+    return false_fn()
+
+
+def vectorized_map(function, elements):
+    return mx.vmap(function)(elements)
+
+
+def scatter(indices, values, shape):
+    indices = convert_to_tensor(indices)
+    values = convert_to_tensor(values)
+    zeros = mx.zeros(shape, dtype=values.dtype)
+    indices = tuple(indices[..., i] for i in range(indices.shape[-1]))
+    zeros = zeros.at[indices].add(values)
+
+    return zeros
+
+
+def scatter_update(inputs, indices, updates):
+    inputs = convert_to_tensor(inputs)
+    indices = convert_to_tensor(indices)
+    updates = convert_to_tensor(updates)
+    indices = tuple(indices[..., i] for i in range(indices.shape[-1]))
+    inputs[indices] = updates
+
+    return inputs
+
+
+def slice(inputs, start_indices, shape):
+    inputs = convert_to_tensor(inputs)
+
+    python_slice = __builtins__["slice"]
+    slices = tuple(
+        python_slice(int(start_index), int(start_index + length))
+        for start_index, length in zip(start_indices, shape)
+    )
+    return inputs[slices]
+
+
+def slice_update(inputs, start_indices, updates):
+    inputs = convert_to_tensor(inputs)
+    updates = convert_to_tensor(updates)
+
+    python_slice = __builtins__["slice"]
+    slices = tuple(
+        python_slice(int(start_index), int(start_index + update_length))
+        for start_index, update_length in zip(start_indices, updates.shape)
+    )
+    inputs[slices] = updates
+    return inputs
+
+
+def while_loop(
+    cond,
+    body,
+    loop_vars,
+    maximum_iterations=None,
+):
+    # TODO: How should we avoid evaluating cond when tracing?
+    current_iter = 0
+    iteration_check = (
+        lambda iter: maximum_iterations is None or iter < maximum_iterations
+    )
+    loop_vars = tuple([convert_to_tensor(v) for v in loop_vars])
+    while cond(*loop_vars) and iteration_check(current_iter):
+        loop_vars = body(*loop_vars)
+        if not isinstance(loop_vars, (list, tuple)):
+            loop_vars = (loop_vars,)
+        loop_vars = tuple(loop_vars)
+        current_iter += 1
+    return loop_vars
+
+
+def fori_loop(lower, upper, body_fun, init_val):
+    val = init_val
+    for i in range(lower, upper):
+        val = body_fun(i, val)
+    return val
+
+
+def stop_gradient(variable):
+    return mx.stop_gradient(variable)
+
+
+def unstack(x, num=None, axis=0):
+    y = x.split(num or x.shape[axis], axis=axis)
+    return [yi.squeeze(axis) for yi in y]
Original file line number	Diff line number	Diff line change
`@@ -8,6 +8,7 @@`
`8`	`8`	`"tensorflow": "tensorflow",`
`9`	`9`	`"torch": "torch torchvision",`
`10`	`10`	`"jax": "jax jaxlib",`
	`11`	`+ "mlx": "mlx",`
`11`	`12`	`}`
`12`	`13`
`13`	`14`