Merge branch 'mlx-poc' of https://github.com/williambdean/pytensor into pr/1365

cetagostini · cetagostini · commit 9d3eca8ffffa · 2025-04-18T20:13:35.000-05:00
diff --git a/pytensor/link/mlx/dispatch/basic.py b/pytensor/link/mlx/dispatch/basic.py
@@ -1,4 +1,5 @@
 import warnings
+from copy import deepcopy
 from functools import singledispatch
 from types import NoneType
 
@@ -58,7 +59,7 @@ def mlx_funcify_FunctionGraph(
 @mlx_funcify.register(DeepCopyOp)
 def mlx_funcify_DeepCopyOp(op, **kwargs):
     def deepcopyop(x):
-        return x.copy()
+        return deepcopy(x)
 
     return deepcopyop
 
diff --git a/pytensor/link/mlx/dispatch/elemwise.py b/pytensor/link/mlx/dispatch/elemwise.py
@@ -2,7 +2,6 @@
 
 from pytensor.link.mlx.dispatch.basic import mlx_funcify
 from pytensor.scalar import Softplus
-from pytensor.scalar.basic import AND, OR, Add, Mul, ScalarMaximum, ScalarMinimum
 from pytensor.tensor.elemwise import CAReduce, DimShuffle
 from pytensor.tensor.special import Softmax, SoftmaxGrad
 
@@ -24,44 +23,53 @@ def dimshuffle(x):
 
 @mlx_funcify.register(CAReduce)
 def mlx_funcify_CAReduce(op, **kwargs):
-    if isinstance(op.scalar_op, Add):
-
-        def sum(x):
-            return mx.sum(x, axis=op.axis)
-
-        return sum
-    elif isinstance(op.scalar_op, Mul):
-
-        def prod(x):
-            return mx.prod(x, axis=op.axis)
-
-        return prod
-    elif isinstance(op.scalar_op, AND):
-
-        def all(x):
-            return x.all(axis=op.axis)
-
-        return all
-    elif isinstance(op.scalar_op, OR):
-
-        def any(x):
-            return mx.any(x, axis=op.axis)
-
-        return any
-    elif isinstance(op.scalar_op, ScalarMaximum):
-
-        def max(x):
-            return mx.max(x, axis=op.axis)
-
-        return max
-    elif isinstance(op.scalar_op, ScalarMinimum):
-
-        def min(x):
-            return mx.min(x, axis=op.axis)
-
-        return min
-    else:
-        raise NotImplementedError(f"MLX does not support Elemwise {op.scalar_op}")
+    axis = op.axis
+    op_nfunc_spec = getattr(op, "nfunc_spec", None)
+    scalar_nfunc_spec = getattr(op.scalar_op, "nfunc_spec", None)
+    scalar_op_name = getattr(op.scalar_op, "name", None)
+    scalar_op_identity = getattr(op.scalar_op, "identity", None)
+    acc_dtype = getattr(op, "acc_dtype", None)
+
+    def careduce(x):
+        nonlocal \
+            axis, \
+            op_nfunc_spec, \
+            scalar_nfunc_spec, \
+            scalar_op_name, \
+            scalar_op_identity, \
+            acc_dtype
+
+        if axis is None:
+            axis = list(range(x.ndim))
+
+        if acc_dtype is None:
+            acc_dtype = x.dtype.type
+
+        if op_nfunc_spec:
+            mlx_op = getattr(mx, op_nfunc_spec[0])
+            return mlx_op(x, axis=axis)
+            return mlx_op(x, axis=axis).astype(acc_dtype)
+
+        # The PyTensor `Op` didn't tell us which NumPy equivalent to use (or
+        # there isn't one), so we use this fallback approach
+        if scalar_nfunc_spec:
+            scalar_fn_name = scalar_nfunc_spec[0]
+        elif scalar_op_name:
+            scalar_fn_name = scalar_op_name
+
+        to_reduce = sorted(axis, reverse=True)
+
+        if to_reduce:
+            raise NotImplementedError("Not implemented yet")
+            # In this case, we need to use the `jax.lax` function (if there
+            # is one), and not the `jnp` version.
+            mlx_op = getattr(mx, scalar_fn_name)
+            init_value = mx.array(scalar_op_identity, dtype=acc_dtype)
+            return mx.reduce(x, init_value, mlx_op, to_reduce).astype(acc_dtype)
+        else:
+            return x
+
+    return careduce
 
 
 @mlx_funcify.register(Softmax)
diff --git a/pytensor/link/mlx/dispatch/subtensor.py b/pytensor/link/mlx/dispatch/subtensor.py
@@ -1,3 +1,5 @@
+from copy import deepcopy
+
 from pytensor.link.mlx.dispatch.basic import mlx_funcify
 from pytensor.tensor.subtensor import (
     AdvancedIncSubtensor,
@@ -24,6 +26,7 @@ def subtensor(x, *ilists):
 
     return subtensor
 
+
 @mlx_funcify.register(AdvancedSubtensor)
 @mlx_funcify.register(AdvancedSubtensor1)
 def mlx_funcify_AdvancedSubtensor(op, node, **kwargs):
@@ -48,15 +51,15 @@ def mlx_funcify_IncSubtensor(op, node, **kwargs):
 
         def mlx_fn(x, indices, y):
             if not op.inplace:
-                x = x.copy()
+                x = deepcopy(x)
             x[indices] = y
             return x
 
     else:
 
         def mlx_fn(x, indices, y):
             if not op.inplace:
-                x = x.copy()
+                x = deepcopy(x)
             x[indices] += y
             return x
 
@@ -76,15 +79,15 @@ def mlx_funcify_AdvancedIncSubtensor(op, node, **kwargs):
 
         def mlx_fn(x, indices, y):
             if not op.inplace:
-                x = x.copy()
+                x = deepcopy(x)
             x[indices] = y
             return x
 
     else:
 
         def mlx_fn(x, indices, y):
             if not op.inplace:
-                x = x.copy()
+                x = deepcopy(x)
             x[indices] += y
             return x
 
diff --git a/tests/link/mlx/test_math.py b/tests/link/mlx/test_math.py
@@ -3,6 +3,7 @@
 
 import pytensor
 import pytensor.tensor as pt
+from pytensor.tensor.math import Argmax, Max
 from tests.link.mlx.test_basic import compare_mlx_and_py, mx
 
 
@@ -87,3 +88,14 @@ def test_elemwise_two_inputs(op) -> None:
     x_test = mx.array([1.0, 2.0, 3.0])
     y_test = mx.array([4.0, 5.0, 6.0])
     compare_mlx_and_py([x, y], out, [x_test, y_test])
+
+
+@pytest.mark.xfail(reason="Argmax not implemented yet")
+def test_mlx_max_and_argmax():
+    # Test that a single output of a multi-output `Op` can be used as input to
+    # another `Op`
+    x = pt.dvector()
+    mx = Max([0])(x)
+    amx = Argmax([0])(x)
+    out = mx * amx
+    compare_mlx_and_py([x], [out], [np.r_[1, 2]])
diff --git a/tests/link/mlx/test_shape.py b/tests/link/mlx/test_shape.py
@@ -0,0 +1,78 @@
+import numpy as np
+import pytest
+
+import pytensor.tensor as pt
+from pytensor.compile.ops import DeepCopyOp, ViewOp
+from pytensor.configdefaults import config
+from pytensor.tensor.shape import Shape, Shape_i, reshape
+from pytensor.tensor.type import iscalar, vector
+from tests.link.mlx.test_basic import compare_mlx_and_py
+
+
+@pytest.mark.xfail(reason="Shape Op is not supported yet")
+def test_mlx_shape_ops():
+    x_np = np.zeros((20, 3))
+    x = Shape()(pt.as_tensor_variable(x_np))
+
+    compare_mlx_and_py([], [x], [], must_be_device_array=False)
+
+    x = Shape_i(1)(pt.as_tensor_variable(x_np))
+
+    compare_mlx_and_py([], [x], [], must_be_device_array=False)
+
+
+@pytest.mark.xfail(reason="Shape Op is not supported yet")
+def test_mlx_specify_shape():
+    in_pt = pt.matrix("in")
+    x = pt.specify_shape(in_pt, (4, None))
+    compare_mlx_and_py([in_pt], [x], [np.ones((4, 5)).astype(config.floatX)])
+
+    # When used to assert two arrays have similar shapes
+    in_pt = pt.matrix("in")
+    shape_pt = pt.matrix("shape")
+    x = pt.specify_shape(in_pt, shape_pt.shape)
+
+    compare_mlx_and_py(
+        [in_pt, shape_pt],
+        [x],
+        [np.ones((4, 5)).astype(config.floatX), np.ones((4, 5)).astype(config.floatX)],
+    )
+
+
+@pytest.mark.xfail(reason="Reshape Op is not supported yet")
+def test_mlx_Reshape_constant():
+    a = vector("a")
+    x = reshape(a, (2, 2))
+    compare_mlx_and_py([a], [x], [np.r_[1.0, 2.0, 3.0, 4.0].astype(config.floatX)])
+
+
+@pytest.mark.xfail(reason="Reshape Op is not supported yet")
+def test_mlx_Reshape_concrete_shape():
+    """MLX should compile when a concrete value is passed for the `shape` parameter."""
+    a = vector("a")
+    x = reshape(a, a.shape)
+    compare_mlx_and_py([a], [x], [np.r_[1.0, 2.0, 3.0, 4.0].astype(config.floatX)])
+
+    x = reshape(a, (a.shape[0] // 2, a.shape[0] // 2))
+    compare_mlx_and_py([a], [x], [np.r_[1.0, 2.0, 3.0, 4.0].astype(config.floatX)])
+
+
+@pytest.mark.xfail(reason="`shape_pt` should be specified as a static argument")
+def test_mlx_Reshape_shape_graph_input():
+    a = vector("a")
+    shape_pt = iscalar("b")
+    x = reshape(a, (shape_pt, shape_pt))
+    compare_mlx_and_py(
+        [a, shape_pt], [x], [np.r_[1.0, 2.0, 3.0, 4.0].astype(config.floatX), 2]
+    )
+
+
+@pytest.mark.xfail(reason="ViewOp Op is not supported yet")
+def test_mlx_compile_ops():
+    x = DeepCopyOp()(pt.as_tensor_variable(1.1))
+    compare_mlx_and_py([], [x], [])
+
+    x_np = np.zeros((20, 1, 1))
+    x = ViewOp()(pt.as_tensor_variable(x_np))
+
+    compare_mlx_and_py([], [x], [])