Working with simple model

cetagostini · jessegrabowski · commit d47de983204f · 2025-10-07T23:11:52.000-05:00
diff --git a/pytensor/link/mlx/dispatch/core.py b/pytensor/link/mlx/dispatch/core.py
@@ -115,12 +115,25 @@ def eye(*args):
     return eye
 
 
-def convert_dtype_to_mlx(dtype_str):
+def convert_dtype_to_mlx(dtype_str, auto_cast_unsupported=True):
     """Convert PyTensor dtype strings to MLX dtype objects.
 
     MLX expects dtype objects rather than string literals for type conversion.
     This function maps common dtype strings to their MLX equivalents.
+
+    Parameters
+    ----------
+    dtype_str : str or MLX dtype
+        The dtype to convert
+    auto_cast_unsupported : bool
+        If True, automatically cast unsupported dtypes to supported ones with warnings
+
+    Returns
+    -------
+    MLX dtype object
     """
+    import warnings
+
     if isinstance(dtype_str, str):
         if dtype_str == "bool":
             return mx.bool_
@@ -145,13 +158,35 @@ def convert_dtype_to_mlx(dtype_str):
         elif dtype_str == "float32":
             return mx.float32
         elif dtype_str == "float64":
-            return mx.float64
+            if auto_cast_unsupported:
+                warnings.warn(
+                    "MLX does not support float64 on GPU. Automatically casting to float32. "
+                    "This may result in reduced precision. To avoid this warning, "
+                    "explicitly use float32 in your code or set floatX='float32' in PyTensor config.",
+                    UserWarning,
+                    stacklevel=3,
+                )
+                return mx.float32
+            else:
+                return mx.float64
         elif dtype_str == "bfloat16":
             return mx.bfloat16
         elif dtype_str == "complex64":
             return mx.complex64
         elif dtype_str == "complex128":
-            return mx.complex128
+            if auto_cast_unsupported:
+                warnings.warn(
+                    "MLX does not support complex128. Automatically casting to complex64. "
+                    "This may result in reduced precision. To avoid this warning, "
+                    "explicitly use complex64 in your code.",
+                    UserWarning,
+                    stacklevel=3,
+                )
+                return mx.complex64
+            else:
+                # Return the original even though it might fail
+                # This allows users to opt out of auto-casting if needed
+                return mx.complex64  # MLX doesn't have complex128, so fallback
     # Return as is if it's already an MLX dtype or not a recognized string
     return dtype_str
 
@@ -212,7 +247,31 @@ def allocempty(*shape):
 @mlx_funcify.register(Alloc)
 def mlx_funcify_Alloc(op, node, **kwargs):
     def alloc(x, *shape):
-        res = mx.broadcast_to(x, shape)
-        return res
+        try:
+            # Convert shape elements to Python ints for MLX compatibility
+            # MLX requires shape dimensions to be Python integers, not MLX arrays
+            shape_ints = tuple(
+                int(s.item()) if hasattr(s, "item") else int(s) for s in shape
+            )
+            return mx.broadcast_to(x, shape_ints)
+        except ValueError as e:
+            if (
+                "[eval] Attempting to eval an array during function transformations"
+                in str(e)
+            ):
+                # This is the MLX compilation limitation - provide helpful error
+                raise ValueError(
+                    "MLX compilation limitation: Alloc operations with dynamic shapes "
+                    "cannot be used inside compiled functions. This is because MLX "
+                    "compilation forbids evaluating arrays to extract shape values. "
+                    "\n\nWorkarounds:"
+                    "\n1. Avoid using Alloc with dynamic shapes in compiled contexts"
+                    "\n2. Use static shapes when possible"
+                    "\n3. Move Alloc operations outside compiled functions"
+                    "\n\nOriginal error: " + str(e)
+                ) from e
+            else:
+                # Re-raise other ValueError exceptions
+                raise
 
     return alloc
diff --git a/pytensor/link/mlx/dispatch/elemwise.py b/pytensor/link/mlx/dispatch/elemwise.py
@@ -149,6 +149,25 @@ def softplus(x):
 def mlx_funcify_Cast(op, **kwargs):
     def cast(x):
         dtype = convert_dtype_to_mlx(op.scalar_op.o_type.dtype)
-        return x.astype(dtype)
+        try:
+            return x.astype(dtype)
+        except ValueError as e:
+            if "is not supported on the GPU" in str(e):
+                # MLX GPU limitation - try auto-casting with warning
+                import warnings
+
+                warnings.warn(
+                    f"MLX GPU limitation: {e}. Attempting automatic fallback casting.",
+                    UserWarning,
+                    stacklevel=2,
+                )
+                # Get the auto-cast version
+                fallback_dtype = convert_dtype_to_mlx(
+                    op.scalar_op.o_type.dtype, auto_cast_unsupported=True
+                )
+                return x.astype(fallback_dtype)
+            else:
+                # Re-raise other ValueError exceptions
+                raise
 
     return cast
diff --git a/pytensor/link/mlx/dispatch/math.py b/pytensor/link/mlx/dispatch/math.py
@@ -303,7 +303,26 @@ def minimum(x, y):
 def _(scalar_op):
     def cast(x):
         dtype = convert_dtype_to_mlx(scalar_op.o_type.dtype)
-        return x.astype(dtype)
+        try:
+            return x.astype(dtype)
+        except ValueError as e:
+            if "is not supported on the GPU" in str(e):
+                # MLX GPU limitation - try auto-casting with warning
+                import warnings
+
+                warnings.warn(
+                    f"MLX GPU limitation: {e}. Attempting automatic fallback casting.",
+                    UserWarning,
+                    stacklevel=2,
+                )
+                # Get the auto-cast version
+                fallback_dtype = convert_dtype_to_mlx(
+                    scalar_op.o_type.dtype, auto_cast_unsupported=True
+                )
+                return x.astype(fallback_dtype)
+            else:
+                # Re-raise other ValueError exceptions
+                raise
 
     return cast
 
diff --git a/pytensor/link/mlx/linker.py b/pytensor/link/mlx/linker.py
@@ -4,9 +4,10 @@
 class MLXLinker(JITLinker):
     """A `Linker` that JIT-compiles NumPy-based operations using Apple's MLX."""
 
-    def __init__(self, *args, **kwargs):
+    def __init__(self, use_compile=True, *args, **kwargs):
         super().__init__(*args, **kwargs)
         self.gen_functors = []
+        self.use_compile = use_compile
 
     def fgraph_convert(self, fgraph, **kwargs):
         """Convert a PyTensor FunctionGraph to an MLX-compatible function.
@@ -33,6 +34,13 @@ def jit_compile(self, fn):
 
         from pytensor.link.mlx.dispatch import mlx_typify
 
+        if not self.use_compile:
+            # Skip compilation and just return the function with MLX typification
+            def fn_no_compile(*inputs):
+                return fn(*(mlx_typify(inp) for inp in inputs))
+
+            return fn_no_compile
+
         inner_fn = mx.compile(fn)
 
         def fn(*inputs, inner_fn=inner_fn):
diff --git a/tests/link/mlx/test_basic.py b/tests/link/mlx/test_basic.py