diff --git a/pytensor/tensor/rewriting/linalg.py b/pytensor/tensor/rewriting/linalg.py index 17a3ce9165..636bc3cadf 100644 --- a/pytensor/tensor/rewriting/linalg.py +++ b/pytensor/tensor/rewriting/linalg.py @@ -14,7 +14,7 @@ node_rewriter, ) from pytensor.graph.rewriting.unify import OpPattern -from pytensor.scalar.basic import Abs, Log, Mul, Sign +from pytensor.scalar.basic import Abs, Exp, Log, Mul, Sign, Sqr from pytensor.tensor.basic import ( AllocDiag, ExtractDiag, @@ -23,6 +23,7 @@ concatenate, diag, diagonal, + ones, ) from pytensor.tensor.blockwise import Blockwise from pytensor.tensor.elemwise import DimShuffle, Elemwise @@ -46,9 +47,12 @@ ) from pytensor.tensor.rewriting.blockwise import blockwise_of from pytensor.tensor.slinalg import ( + LU, + QR, BlockDiagonal, Cholesky, CholeskySolve, + LUFactor, Solve, SolveBase, SolveTriangular, @@ -65,6 +69,10 @@ MATRIX_INVERSE_OPS = (MatrixInverse, MatrixPinv) +def matrix_diagonal_product(x): + return pt.prod(diagonal(x, axis1=-2, axis2=-1), axis=-1) + + def is_matrix_transpose(x: TensorVariable) -> bool: """Check if a variable corresponds to a transpose of the last two axes""" node = x.owner @@ -281,41 +289,39 @@ def cholesky_ldotlt(fgraph, node): @register_stabilize @register_specialize -@node_rewriter([det]) -def local_det_chol(fgraph, node): - """ - If we have det(X) and there is already an L=cholesky(X) - floating around, then we can use prod(diag(L)) to get the determinant. +@node_rewriter([log]) +def local_log_prod_to_sum_log(fgraph, node): + """Rewrite log(prod(x)) as sum(log(x)), when x is known to be positive.""" + [p] = node.inputs + p_node = p.owner - """ - (x,) = node.inputs - for cl, xpos in fgraph.clients[x]: - if isinstance(cl.op, Blockwise) and isinstance(cl.op.core_op, Cholesky): - L = cl.outputs[0] - return [prod(diagonal(L, axis1=-2, axis2=-1) ** 2, axis=-1)] + if p_node is None: + return None + p_op = p_node.op -@register_canonicalize -@register_stabilize -@register_specialize -@node_rewriter([log]) -def local_log_prod_sqr(fgraph, node): - """ - This utilizes a boolean `positive` tag on matrices. - """ - (x,) = node.inputs - if x.owner and isinstance(x.owner.op, Prod): - # we cannot always make this substitution because - # the prod might include negative terms - p = x.owner.inputs[0] + if isinstance(p_op, Prod): + x = p_node.inputs[0] - # p is the matrix we're reducing with prod - if getattr(p.tag, "positive", None) is True: - return [log(p).sum(axis=x.owner.op.axis)] + # TODO: The product of diagonals of a Cholesky(A) are also strictly positive + if ( + x.owner is not None + and isinstance(x.owner.op, Elemwise) + and isinstance(x.owner.op.scalar_op, Abs | Sqr | Exp) + ) or getattr(x.tag, "positive", False): + return [log(x).sum(axis=p_node.op.axis)] # TODO: have a reduction like prod and sum that simply # returns the sign of the prod multiplication. + # Special case for log(abs(prod(x))) -> sum(log(abs(x))) that shows up in slogdet + elif isinstance(p_op, Elemwise) and isinstance(p_op.scalar_op, Abs): + [p] = p_node.inputs + p_node = p.owner + if p_node is not None and isinstance(p_node.op, Prod): + [x] = p.owner.inputs + return [log(abs(x)).sum(axis=p_node.op.axis)] + @register_specialize @node_rewriter([blockwise_of(MatrixInverse | Cholesky | MatrixPinv)]) @@ -442,6 +448,127 @@ def _find_diag_from_eye_mul(potential_mul_input): return eye_input, non_eye_inputs +@register_stabilize("shape_unsafe") +@register_specialize("shape_unsafe") +@node_rewriter([det]) +def det_of_matrix_factorized_elsewhere(fgraph, node): + """ + If we have det(X) or abs(det(X)) and there is already a nice decomposition(X) floating around, + use it to compute it more cheaply + + """ + [det] = node.outputs + [x] = node.inputs + + sign_not_needed = all( + isinstance(client.op, Elemwise) and isinstance(client.op.scalar_op, (Abs, Sqr)) + for client, _ in fgraph.clients[det] + ) + + new_det = None + for client, _ in fgraph.clients[x]: + core_op = client.op.core_op if isinstance(client.op, Blockwise) else client.op + match core_op: + case Cholesky(): + L = client.outputs[0] + new_det = matrix_diagonal_product(L) ** 2 + case LU(): + U = client.outputs[-1] + new_det = matrix_diagonal_product(U) + case LUFactor(): + LU_packed = client.outputs[0] + new_det = matrix_diagonal_product(LU_packed) + case _: + if not sign_not_needed: + continue + match core_op: + case SVD(): + lmbda = ( + client.outputs[1] + if core_op.compute_uv + else client.outputs[0] + ) + new_det = prod(lmbda, axis=-1) + case QR(): + R = client.outputs[-1] + # if mode == "economic", R may not be square and this rewrite could hide a shape error + # That's why it's tagged as `shape_unsafe` + new_det = matrix_diagonal_product(R) + + if new_det is not None: + # found a match + break + else: # no-break (i.e., no-match) + return None + + [det] = node.outputs + copy_stack_trace(det, new_det) + return [new_det] + + +@register_stabilize("shape_unsafe") +@register_specialize("shape_unsafe") +@node_rewriter(tracks=[det]) +def det_of_factorized_matrix(fgraph, node): + """Introduce special forms for det(decomposition(X)). + + Some cases are only known up to a sign change such as det(QR(X)), + and are only introduced if the determinant sign is discarded downstream (e.g., abs, sqr) + """ + [det] = node.outputs + [x] = node.inputs + + sign_not_needed = all( + isinstance(client.op, Elemwise) and isinstance(client.op.scalar_op, (Abs, Sqr)) + for client, _ in fgraph.clients[det] + ) + + x_node = x.owner + if x_node is None: + return None + + x_op = x_node.op + core_op = x_op.core_op if isinstance(x_op, Blockwise) else x_op + + new_det = None + match core_op: + case Cholesky(): + new_det = matrix_diagonal_product(x) + case LU(): + if x is x_node.outputs[-2]: + # x is L + new_det = ones(x.shape[:-2], dtype=det.dtype) + elif x is x_node.outputs[-1]: + # x is U + new_det = matrix_diagonal_product(x) + case SVD(): + if not core_op.compute_uv or x is x_node.outputs[1]: + # x is lambda + new_det = prod(x, axis=-1) + elif sign_not_needed: + # x is either U or Vt and sign is discarded downstream + new_det = ones(x.shape[:-2], dtype=det.dtype) + case QR(): + # if mode == "economic", Q/R may not be square and this rewrite could hide a shape error + # That's why it's tagged as `shape_unsafe` + if x is x_node.outputs[-1]: + # x is R + new_det = matrix_diagonal_product(x) + elif ( + sign_not_needed + and core_op.mode in ("economic", "full") + and x is x_node.outputs[0] + ): + # x is Q and sign is discarded downstream + new_det = ones(x.shape[:-2], dtype=det.dtype) + + if new_det is None: + return None + + copy_stack_trace(det, new_det) + return [new_det] + + @register_canonicalize("shape_unsafe") @register_stabilize("shape_unsafe") @node_rewriter([det]) diff --git a/tests/tensor/linalg/test_rewriting.py b/tests/tensor/linalg/test_rewriting.py index 419ff357d4..b91f536953 100644 --- a/tests/tensor/linalg/test_rewriting.py +++ b/tests/tensor/linalg/test_rewriting.py @@ -2,8 +2,10 @@ import pytest from pytensor import config, function, scan +from pytensor import tensor as pt from pytensor.compile.mode import get_default_mode from pytensor.gradient import grad +from pytensor.graph import rewrite_graph from pytensor.scan.op import Scan from pytensor.tensor._linalg.solve.rewriting import ( reuse_decomposition_multiple_solves, @@ -14,7 +16,9 @@ SolveLUFactorTridiagonal, ) from pytensor.tensor.blockwise import Blockwise, BlockwiseWithCoreShape +from pytensor.tensor.elemwise import Elemwise from pytensor.tensor.linalg import solve +from pytensor.tensor.nlinalg import Det, det from pytensor.tensor.slinalg import ( Cholesky, CholeskySolve, @@ -23,6 +27,7 @@ SolveTriangular, ) from pytensor.tensor.type import tensor +from tests.unittest_tools import assert_equal_computations class DecompSolveOpCounter: @@ -257,3 +262,249 @@ def test_decomposition_reused_preserves_check_finite(assume_a, counter): assert fn_opt(A_valid, b1_valid * np.nan, b2_valid) with pytest.raises((ValueError, np.linalg.LinAlgError), match=err_msg): assert fn_opt(A_valid * np.nan, b1_valid, b2_valid) + + +@pytest.mark.parametrize( + "original_fn, expected_fn", + [ + pytest.param( + lambda x: pt.log(pt.prod(pt.abs(x))), + lambda x: pt.sum(pt.log(pt.abs(x))), + id="log_prod_abs", + ), + pytest.param( + lambda x: pt.log(pt.prod(pt.exp(x))), lambda x: pt.sum(x), id="log_prod_exp" + ), + pytest.param( + lambda x: pt.log(pt.prod(x**2)), + lambda x: pt.sum(pt.log(pt.sqr(x))), + id="log_prod_sqr", + ), + pytest.param( + lambda x: pt.log(pt.abs(pt.prod(x))), + lambda x: pt.sum(pt.log(pt.abs(x))), + id="log_abs_prod", + ), + pytest.param( + lambda x: pt.log(pt.prod(pt.abs(x), axis=0)), + lambda x: pt.sum(pt.log(pt.abs(x)), axis=0), + id="log_prod_abs_axis0", + ), + pytest.param( + lambda x: pt.log(pt.prod(pt.exp(x), axis=-1)), + lambda x: pt.sum(x, axis=-1), + id="log_prod_exp_axis-1", + ), + ], +) +def test_local_log_prod_to_sum_log(original_fn, expected_fn): + x = pt.tensor("x", shape=(3, 4)) + out = original_fn(x) + expected = expected_fn(x) + rewritten = rewrite_graph(out, include=["stabilize", "specialize"]) + assert_equal_computations([rewritten], [expected]) + + +def test_local_log_prod_to_sum_log_positive_tag(): + x = pt.tensor("x", shape=(3, 4)) + x.tag.positive = True + out = pt.log(pt.prod(x)) + expected = pt.sum(pt.log(x)) + rewritten = rewrite_graph(out, include=["stabilize", "specialize"]) + assert_equal_computations([rewritten], [expected]) + + +def test_local_log_prod_to_sum_log_no_rewrite(): + x = pt.tensor("x", shape=(3, 4)) + out = pt.log(pt.prod(x)) + rewritten = rewrite_graph(out) + from pytensor.scalar.basic import Log + + assert rewritten.owner is not None + assert isinstance(rewritten.owner.op.scalar_op, Log) + + +@pytest.mark.parametrize( + "decomp_fn, decomp_output_idx", + [ + pytest.param(lambda x: pt.linalg.cholesky(x), 0, id="cholesky"), + pytest.param(lambda x: pt.linalg.lu(x), -1, id="lu"), + pytest.param(lambda x: pt.linalg.lu_factor(x), 0, id="lu_factor"), + ], +) +def test_det_of_matrix_factorized_elsewhere(decomp_fn, decomp_output_idx): + x = pt.tensor("x", shape=(3, 3)) + + decomp_out = decomp_fn(x) + if isinstance(decomp_out, list): + decomp_var = decomp_out[decomp_output_idx] + else: + decomp_var = decomp_out + + d = det(x) + + outputs = [decomp_var, d] + fn_no_opt = function( + [x], + outputs, + mode=get_default_mode().excluding("det_of_matrix_factorized_elsewhere"), + ) + det_nodes_no_opt = [ + node for node in fn_no_opt.maker.fgraph.apply_nodes if isinstance(node.op, Det) + ] + assert len(det_nodes_no_opt) == 1 + + fn_opt = function( + [x], + outputs, + mode=get_default_mode().including("det_of_matrix_factorized_elsewhere"), + ) + + det_nodes_opt = [ + node for node in fn_opt.maker.fgraph.apply_nodes if isinstance(node.op, Det) + ] + assert len(det_nodes_opt) == 0 + + +@pytest.mark.parametrize( + "decomp_fn, sign_op", + [ + pytest.param(lambda x: pt.linalg.svd(x, compute_uv=True), pt.abs, id="svd_abs"), + pytest.param( + lambda x: pt.linalg.svd(x, compute_uv=False), pt.abs, id="svd_no_uv_abs" + ), + pytest.param(lambda x: pt.linalg.qr(x), pt.abs, id="qr_abs"), + pytest.param(lambda x: pt.linalg.svd(x, compute_uv=True), pt.sqr, id="svd_sqr"), + pytest.param( + lambda x: pt.linalg.svd(x, compute_uv=False), pt.sqr, id="svd_no_uv_sqr" + ), + pytest.param(lambda x: pt.linalg.qr(x), pt.sqr, id="qr_sqr"), + ], +) +def test_det_of_matrix_factorized_elsewhere_abs(decomp_fn, sign_op): + x = pt.tensor("x", shape=(3, 3)) + + decomp_out = decomp_fn(x) + if isinstance(decomp_out, list): + decomp_var = decomp_out[0] + else: + decomp_var = decomp_out + + d = sign_op(det(x)) + + outputs = [decomp_var, d] + fn_no_opt = function( + [x], + outputs, + mode=get_default_mode().excluding("det_of_matrix_factorized_elsewhere"), + ) + det_nodes_no_opt = [ + node for node in fn_no_opt.maker.fgraph.apply_nodes if isinstance(node.op, Det) + ] + assert len(det_nodes_no_opt) == 1 + + fn_opt = function( + [x], + outputs, + mode=get_default_mode().including("det_of_matrix_factorized_elsewhere"), + ) + + det_nodes_opt = [ + node for node in fn_opt.maker.fgraph.apply_nodes if isinstance(node.op, Det) + ] + assert len(det_nodes_opt) == 0 + + +@pytest.mark.parametrize( + "original_fn, expected_fn", + [ + pytest.param( + lambda x: det(pt.linalg.cholesky(x)), + lambda x: pt.prod( + pt.diagonal(pt.linalg.cholesky(x), axis1=-2, axis2=-1), axis=-1 + ), + id="det_cholesky", + ), + pytest.param( + lambda x: det(pt.linalg.lu(x)[-1]), + lambda x: pt.prod( + pt.diagonal(pt.linalg.lu(x)[-1], axis1=-2, axis2=-1), axis=-1 + ), + id="det_lu_U", + ), + pytest.param( + lambda x: det(pt.linalg.lu(x)[-2]), + lambda x: pt.as_tensor(1.0, dtype=x.dtype), + id="det_lu_L", + ), + ], +) +def test_det_of_factorized_matrix(original_fn, expected_fn): + x = pt.tensor("x", shape=(3, 3)) + out = original_fn(x) + expected = expected_fn(x) + rewritten = rewrite_graph(out, include=["stabilize", "specialize"]) + assert_equal_computations([rewritten], [expected]) + + +@pytest.mark.parametrize( + "original_fn, expected_fn", + [ + pytest.param( + lambda x: pt.abs(det(pt.linalg.svd(x, compute_uv=True)[0])), + lambda x: pt.as_tensor(1.0, dtype=x.dtype), + id="abs_det_svd_U", + ), + pytest.param( + lambda x: pt.abs(det(pt.linalg.svd(x, compute_uv=True)[2])), + lambda x: pt.as_tensor(1.0, dtype=x.dtype), + id="abs_det_svd_Vt", + ), + pytest.param( + lambda x: pt.abs(det(pt.linalg.qr(x)[0])), + lambda x: pt.as_tensor(1.0, dtype=x.dtype), + id="abs_det_qr_Q", + ), + pytest.param( + lambda x: pt.sqr(det(pt.linalg.svd(x, compute_uv=True)[0])), + lambda x: pt.as_tensor(1.0, dtype=x.dtype), + id="sqr_det_svd_U", + ), + pytest.param( + lambda x: pt.sqr(det(pt.linalg.svd(x, compute_uv=True)[2])), + lambda x: pt.as_tensor(1.0, dtype=x.dtype), + id="sqr_det_svd_Vt", + ), + pytest.param( + lambda x: pt.sqr(det(pt.linalg.qr(x)[0])), + lambda x: pt.as_tensor(1.0, dtype=x.dtype), + id="sqr_det_qr_Q", + ), + pytest.param( + lambda x: det(pt.linalg.qr(x)[1]), + lambda x: pt.prod( + pt.diagonal(pt.linalg.qr(x)[1], axis1=-2, axis2=-1), axis=-1 + ), + id="det_qr_R", + ), + ], +) +def test_det_of_factorized_matrix_special_cases(original_fn, expected_fn): + x = pt.tensor("x", shape=(3, 3)) + out = original_fn(x) + expected = expected_fn(x) + rewritten = rewrite_graph(out, include=["stabilize", "specialize"]) + assert_equal_computations([rewritten], [expected]) + + +def test_det_of_factorized_matrix_no_rewrite_without_abs(): + x = pt.tensor("x", shape=(3, 3)) + Q = pt.linalg.qr(x)[0] + out = det(Q) + rewritten = rewrite_graph(out, include=["stabilize", "specialize"]) + + assert not ( + rewritten.owner is not None + and isinstance(rewritten.owner.op, Elemwise) + and len(rewritten.owner.inputs) == 0 + ), "det(Q) should not be rewritten to a constant without abs()" diff --git a/tests/tensor/rewriting/test_linalg.py b/tests/tensor/rewriting/test_linalg.py index 6b6f92f292..4283462bfa 100644 --- a/tests/tensor/rewriting/test_linalg.py +++ b/tests/tensor/rewriting/test_linalg.py @@ -246,14 +246,16 @@ def test_local_det_chol(): det_X = pt.linalg.det(X) f = function([X], [L, det_X]) - - nodes = f.maker.fgraph.toposort() - assert not any(isinstance(node, Det) for node in nodes) + assert not any(isinstance(node, Det) for node in f.maker.fgraph.apply_nodes) # This previously raised an error (issue #392) f = function([X], [L, det_X, X]) - nodes = f.maker.fgraph.toposort() - assert not any(isinstance(node, Det) for node in nodes) + assert not any(isinstance(node, Det) for node in f.maker.fgraph.apply_nodes) + + # Test graph that only has det_X + f = function([X], [det_X]) + f.dprint() + assert not any(isinstance(node, Det) for node in f.maker.fgraph.apply_nodes) def test_psd_solve_with_chol():