...

ricardoV94 · ricardoV94 · commit 128da2a2f291 · 2024-08-30T14:33:32.000+02:00
Co-authored-by: Ricardo Vieira &lt;28983449+ricardov94@users.noreply.github.com&gt;
diff --git a/pytensor/graph/op.py b/pytensor/graph/op.py
@@ -583,10 +583,11 @@ def make_thunk(
         )
         return self.make_py_thunk(node, storage_map, compute_map, no_recycling)
 
-    def try_inplace_inputs(self, candidate_inputs: list[int]) -> "Op":
-        """Try to return a version of self that can inplace on candidate_inputs."""
+    def inplace_on_inputs(self, allowed_inplace_inputs: list[int]) -> "Op":
+        """Try to return a version of self that tries to inplace in as many as `allowed_inplace_inputs`."""
         # TODO: Document this in the Create your own op docs
-        raise NotImplementedError()
+        # By default, do nothing
+        return self
 
     def __str__(self):
         return getattr(type(self), "__name__", super().__str__())
diff --git a/pytensor/link/numba/dispatch/basic.py b/pytensor/link/numba/dispatch/basic.py
@@ -660,12 +660,8 @@ def cholesky(a):
 
     else:
         # TODO: Use SciPy's BLAS/LAPACK Cython wrappers.
-
         warnings.warn(
-            (
-                "Numba will use object mode to allow the "
-                "`lower` argument to `scipy.linalg.cholesky`."
-            ),
+            "Numba will use object mode to allow the `lower=False` argument to `scipy.linalg.cholesky`.",
             UserWarning,
         )
 
diff --git a/pytensor/tensor/rewriting/__init__.py b/pytensor/tensor/rewriting/__init__.py
@@ -6,6 +6,7 @@
 import pytensor.tensor.rewriting.einsum
 import pytensor.tensor.rewriting.elemwise
 import pytensor.tensor.rewriting.extra_ops
+import pytensor.tensor.rewriting.inplace
 import pytensor.tensor.rewriting.jax
 import pytensor.tensor.rewriting.linalg
 import pytensor.tensor.rewriting.math
diff --git a/pytensor/tensor/rewriting/blas.py b/pytensor/tensor/rewriting/blas.py
@@ -757,8 +757,7 @@ def local_dot22_to_ger_or_gemv(fgraph, node):
 )
 
 
-# After destroyhandler(49.5) but before we try to make elemwise things
-# inplace (75)
+# After destroyhandler(49.5) but before we try to make elemwise things inplace (75)
 blas_opt_inplace = in2out(
     local_inplace_gemm, local_inplace_gemv, local_inplace_ger, name="blas_opt_inplace"
 )
diff --git a/pytensor/tensor/rewriting/blockwise.py b/pytensor/tensor/rewriting/blockwise.py
@@ -1,11 +1,7 @@
-import itertools
-from typing import Optional
-
-from pytensor.compile import Supervisor
 from pytensor.compile.mode import optdb
 from pytensor.graph import Constant, node_rewriter
 from pytensor.graph.replace import vectorize_node
-from pytensor.graph.rewriting.basic import copy_stack_trace, in2out, out2in
+from pytensor.graph.rewriting.basic import copy_stack_trace, out2in
 from pytensor.tensor.basic import Alloc, ARange, alloc, shape_padleft
 from pytensor.tensor.blockwise import Blockwise
 from pytensor.tensor.math import Dot
@@ -229,77 +225,3 @@ def local_blockwise_reshape(fgraph, node):
         new_out = x.reshape([*tuple(batched_shape), *tuple(core_reshape)])
         copy_stack_trace(node.outputs[0], new_out)
         return [new_out]
-
-
-@node_rewriter([Blockwise], inplace=True)
-def node_blockwise_inplace(fgraph, node):
-    # Find inputs that are candidates for inplacing
-    blockwise_op = node.op
-
-    if blockwise_op.destroy_map:
-        # Op already has inplace
-        return False
-
-    core_op = blockwise_op.core_op
-    batch_ndim = blockwise_op.batch_ndim(node)
-    out_batch_bcast = node.outputs[0].type.broadcastable[:batch_ndim]
-
-    # TODO: Refactor this code, which is also present in Elemwise Inplacer
-    protected_inputs = [
-        f.protected for f in fgraph._features if isinstance(f, Supervisor)
-    ]
-    protected_inputs = list(itertools.chain.from_iterable(protected_inputs))
-    protected_inputs.extend(fgraph.outputs)
-
-    # TODO: Add test for the broadcastable logic (don't inplace inputs that are being broadcasted)
-    candidate_inputs = [
-        idx
-        for idx, inp in enumerate(node.inputs)
-        if (
-            not isinstance(inp, Constant)
-            and inp.type.broadcastable[:batch_ndim] == out_batch_bcast
-            and not fgraph.has_destroyers([inp])
-            and inp not in protected_inputs
-        )
-    ]
-
-    if not candidate_inputs:
-        return None
-
-    try:
-        inplace_core_op = core_op.try_inplace_inputs(candidate_inputs)
-    except NotImplementedError:
-        return False
-
-    core_destroy_map = inplace_core_op.destroy_map
-
-    if not core_destroy_map:
-        return False
-
-    # Check Op is not trying to inplace on non-candidate inputs
-    for destroyed_inputs in core_destroy_map.values():
-        for destroyed_input in destroyed_inputs:
-            if destroyed_input not in candidate_inputs:
-                raise ValueError("core_op did not respect candidate inputs")
-
-    # Recreate core_op with inplace
-    inplace_blockwise_op = Blockwise(
-        core_op=inplace_core_op,
-        signature=blockwise_op.signature,
-        name=blockwise_op.name,
-        gufunc_spec=blockwise_op.gufunc_spec,
-        destroy_map=core_destroy_map,
-    )
-
-    return inplace_blockwise_op.make_node(*node.inputs).outputs
-
-
-# After destroyhandler(49.5) but before we try to make elemwise things inplace (75)
-blockwise_inplace = in2out(node_blockwise_inplace, name="blockwise_inplace")
-optdb.register(
-    "blockwise_inplace",
-    blockwise_inplace,
-    "fast_run",
-    "inplace",
-    position=69.0,
-)
diff --git a/pytensor/tensor/rewriting/inplace.py b/pytensor/tensor/rewriting/inplace.py
@@ -0,0 +1,126 @@
+import itertools
+
+from pytensor.compile import Supervisor, optdb
+from pytensor.graph import Constant
+from pytensor.graph.rewriting.basic import copy_stack_trace, in2out, node_rewriter
+from pytensor.tensor.blockwise import Blockwise
+from pytensor.tensor.slinalg import Cholesky
+
+
+def filter_allowed_inplace_inputs(fgraph, node) -> list[int]:
+    protected_inputs = [
+        f.protected for f in fgraph._features if isinstance(f, Supervisor)
+    ]
+    protected_inputs = list(itertools.chain.from_iterable(protected_inputs))
+    protected_inputs.extend(fgraph.outputs)
+
+    return [
+        idx
+        for idx, inp in enumerate(node.inputs)
+        if (
+            not isinstance(inp, Constant)
+            and not fgraph.has_destroyers([inp])
+            and inp not in protected_inputs
+        )
+    ]
+
+
+def validate_inplace_inputs(allowed_inplace_inputs, destroy_map):
+    # Check Op is not trying to inplace on non-candidate inputs
+    for destroyed_inputs in destroy_map.values():
+        for destroyed_input in destroyed_inputs:
+            if destroyed_input not in allowed_inplace_inputs:
+                raise ValueError(
+                    "Op destroy_map does not respect allowed_inplace_inputs"
+                )
+
+
+def make_inplace_core_op(fgraph, node):
+    # Find inputs that are candidates for inplacing
+    op = node.op
+
+    if op.destroy_map:
+        # Op already has inplace
+        return None
+
+    allowed_inplace_inputs = filter_allowed_inplace_inputs(fgraph, node)
+
+    if not allowed_inplace_inputs:
+        return None
+
+    inplace_op = op.inplace_on_inputs(allowed_inplace_inputs=allowed_inplace_inputs)
+
+    if not inplace_op.destroy_map:
+        return None
+
+    validate_inplace_inputs(allowed_inplace_inputs, destroy_map=inplace_op.destroy_map)
+
+    out = inplace_op.make_node(*node.inputs).outputs
+    copy_stack_trace(node.outputs, out)
+    return inplace_op
+
+
+@node_rewriter([Cholesky], inplace=True)
+def linalg_inplace(fgraph, node):
+    return make_inplace_core_op(fgraph, node)
+
+
+@node_rewriter(tracks=[Blockwise])
+def blockwise_inplace(fgraph, node):
+    blockwise_op: Blockwise = node.op
+
+    if blockwise_op.destroy_map:
+        # Op already has inplace
+        return
+
+    batch_ndim = blockwise_op.batch_ndim(node)
+    out_batch_bcast = node.outputs[0].type.broadcastable[:batch_ndim]
+
+    allowed_inplace_inputs = [
+        idx
+        for idx in filter_allowed_inplace_inputs(fgraph, node)
+        # We can only inplace on inputs that are not being broadcasted
+        if node.inputs[idx].type.broadcastable[:batch_ndim] == out_batch_bcast
+    ]
+
+    inplace_core_op = blockwise_op.core_op.inplace_on_inputs(
+        allowed_inplace_inputs=allowed_inplace_inputs
+    )
+
+    if not inplace_core_op.destroy_map:
+        return None
+
+    validate_inplace_inputs(
+        allowed_inplace_inputs, destroy_map=inplace_core_op.destroy_map
+    )
+
+    # Recreate core_op with inplace
+    inplace_blockwise_op = Blockwise(
+        core_op=inplace_core_op,
+        signature=blockwise_op.signature,
+        name=blockwise_op.name,
+        gufunc_spec=blockwise_op.gufunc_spec,
+        destroy_map=inplace_core_op.destroy_map,
+    )
+
+    out = inplace_blockwise_op.make_node(*node.inputs).outputs
+    copy_stack_trace(node.outputs, out)
+    return out
+
+
+# After destroyhandler(49.5) but before we try to make blas (70) and elemwise things inplace (75)
+optdb.register(
+    "linalg_inplace",
+    in2out(linalg_inplace),
+    "fast_run",
+    "inplace",
+    position=69.0,
+)
+
+optdb.register(
+    "blockwise_inplace",
+    in2out(blockwise_inplace),
+    "fast_run",
+    "inplace",
+    position=69.0,
+)
diff --git a/pytensor/tensor/rewriting/linalg.py b/pytensor/tensor/rewriting/linalg.py
@@ -4,7 +4,9 @@
 
 from pytensor import Variable
 from pytensor.graph import Apply, FunctionGraph
-from pytensor.graph.rewriting.basic import (copy_stack_trace, node_rewriter,
+from pytensor.graph.rewriting.basic import (
+    copy_stack_trace,
+    node_rewriter,
 )
 from pytensor.scalar.basic import Mul
 from pytensor.tensor.basic import (
@@ -609,26 +611,3 @@ def rewrite_inv_inv(fgraph, node):
     ):
         return None
     return [potential_inner_inv.inputs[0]]
-
-
-cholesky_no_inplace = Cholesky(overwrite_a=False)
-cholesky_inplace = Cholesky(overwrite_a=True)
-
-
-@node_rewriter([cholesky_no_inplace], inplace=True)
-@node_rewriter([Cholesky], inplace=True)
-def local_inplace_cholesky(fgraph, node):
-    return make_inplace(node, "overwrite_a")
-
-
-# After destroyhandler(49.5) but before we try to make elemwise things
-# inplace (75)
-linalg_opt_inplace = in2out(local_inplace_cholesky, name="linalg_opt_inplace")
-optdb.register(
-    "InplaceLinalgOpt",
-    linalg_opt_inplace,
-    "fast_run",
-    "inplace",
-    "linalg_opt_inplace",
-    position=69.0,
-)
diff --git a/pytensor/tensor/slinalg.py b/pytensor/tensor/slinalg.py
diff --git a/tests/tensor/rewriting/test_inplace.py b/tests/tensor/rewriting/test_inplace.py
diff --git a/tests/tensor/rewriting/test_linalg.py b/tests/tensor/rewriting/test_linalg.py

Original file line number	Diff line number	Diff line change
`@@ -757,8 +757,7 @@ def local_dot22_to_ger_or_gemv(fgraph, node):`
`757`	`757`	`)`
`758`	`758`
`759`	`759`
`760`		`-# After destroyhandler(49.5) but before we try to make elemwise things`
`761`		`-# inplace (75)`
	`760`	`+# After destroyhandler(49.5) but before we try to make elemwise things inplace (75)`
`762`	`761`	`blas_opt_inplace = in2out(`
`763`	`762`	`local_inplace_gemm, local_inplace_gemv, local_inplace_ger, name="blas_opt_inplace"`
`764`	`763`	`)`