pymc-devs
diff --git a/‎pytensor/link/numba/dispatch/subtensor.py‎
Lines changed: 1 addition & 31 deletions b/‎pytensor/link/numba/dispatch/subtensor.py‎
Lines changed: 1 addition & 31 deletions
diff --git a/‎pytensor/tensor/rewriting/subtensor.py‎
Lines changed: 174 additions & 21 deletions b/‎pytensor/tensor/rewriting/subtensor.py‎
Lines changed: 174 additions & 21 deletions
diff --git a/‎pytensor/tensor/subtensor.py‎
Lines changed: 3 additions & 9 deletions b/‎pytensor/tensor/subtensor.py‎
Lines changed: 3 additions & 9 deletions
@@ -244,37 +244,7 @@ def numba_funcify_AdvancedSubtensor(op, node, **kwargs):
     else:
         tensor_inputs = node.inputs[2:]
 
-    adv_idxs = [
-        {
-            "axis": i,
-            "dtype": idx.type.dtype,
-            "bcast": idx.type.broadcastable,
-            "ndim": idx.type.ndim,
-        }
-        for i, idx in enumerate(idxs)
-        if isinstance(idx.type, TensorType)
-    ]
-
     # Reconstruct indexing information from idx_list and tensor inputs
-#   basic_idxs = []
-#   adv_idxs = []
-#   input_idx = 0
-#   
-#   for i, entry in enumerate(op.idx_list):
-#       if isinstance(entry, slice):
-#           # Basic slice index
-#           basic_idxs.append(entry)
-#       elif isinstance(entry, Type):
-#           # Advanced tensor index
-#           if input_idx < len(tensor_inputs):
-#               idx_input = tensor_inputs[input_idx]
-#               adv_idxs.append({
-#                   "axis": i,
-#                   "dtype": idx_input.type.dtype,
-#                   "bcast": idx_input.type.broadcastable,
-#                   "ndim": idx_input.type.ndim,
-#               })
-#               input_idx += 1
     basic_idxs = []
     adv_idxs = []
     input_idx = 0
@@ -313,7 +283,7 @@ def numba_funcify_AdvancedSubtensor(op, node, **kwargs):
         and len(adv_idxs) >= 1
         and all(adv_idx["dtype"] != "bool" for adv_idx in adv_idxs)
         # Implementation does not support newaxis
-        and not any(isinstance(idx.type, NoneTypeT) for idx in idxs)
+        and not any(isinstance(idx.type, NoneTypeT) for idx in tensor_inputs)
     ):
         return vector_integer_advanced_indexing(op, node, **kwargs)
 
 
@@ -33,6 +33,7 @@
     get_underlying_scalar_constant_value,
     register_infer_shape,
     switch,
+    tile,
 )
 from pytensor.tensor.basic import constant as tensor_constant
 from pytensor.tensor.blockwise import _squeeze_left
@@ -73,7 +74,6 @@
     IncSubtensor,
     Subtensor,
     advanced_inc_subtensor1,
-    advanced_subtensor,
     advanced_subtensor1,
     as_index_constant,
     get_canonical_form_slice,
@@ -83,7 +83,7 @@
     inc_subtensor,
     indices_from_subtensor,
 )
-from pytensor.tensor.type import TensorType
+from pytensor.tensor.type import TensorType, integer_dtypes
 from pytensor.tensor.type_other import NoneTypeT, SliceType
 from pytensor.tensor.variable import TensorConstant, TensorVariable
 
@@ -256,6 +256,122 @@ def local_replace_AdvancedSubtensor(fgraph, node):
     return [new_res]
 
 
+def _compute_tiling_reps(val, target, allow_symbolic=False, target_shape=None):
+    """Compute tiling repetitions needed to broadcast val to match target shape.
+
+    Parameters
+    ----------
+    val : TensorVariable
+        The value to tile
+    target : TensorVariable
+        The target to match shape with (or None if using target_shape)
+    allow_symbolic : bool
+        If True, allow symbolic shapes (return reps with 1s, skip tiling)
+        If False, return None for symbolic shapes
+    target_shape : tuple, optional
+        If provided, use this shape tuple instead of target.shape
+
+    Returns
+    -------
+    tuple or None
+        (needs_tiling, reps, has_symbolic_shapes) if compatible, None otherwise
+    """
+    try:
+        needs_tiling = False
+        reps = []
+        has_symbolic_shapes = False
+
+        def get_target_shape_i(i):
+            return target.shape[i] if i < len(target.shape) else None
+
+        if target_shape is None:
+            target_ndim = target.ndim
+        else:
+            target_ndim = len(target_shape)
+
+        for i in range(target_ndim):
+            try:
+                target_shape_i = get_target_shape_i(i)
+                val_shape_i = val.shape[i]
+            except (IndexError, AttributeError, TypeError):
+                return None
+
+            if target_shape_i is None:
+                # Symbolic shape in target - allow but skip tiling
+                reps.append(1)
+                continue
+
+            try:
+                target_shape_val = get_scalar_constant_value(
+                    target_shape_i, only_process_constants=True
+                )
+                val_shape_val = get_scalar_constant_value(
+                    val_shape_i, only_process_constants=True
+                )
+
+                if target_shape_val == val_shape_val:
+                    reps.append(1)
+                elif val_shape_val == 1:
+                    needs_tiling = True
+                    reps.append(target_shape_i)
+                else:
+                    return None
+
+            except NotScalarConstantError:
+                has_symbolic_shapes = True
+                if not allow_symbolic:
+                    return None
+                # For symbolic shapes, check dimension compatibility
+                if target_ndim == val.ndim:
+                    reps.append(1)
+                elif val.ndim == 0:
+                    reps.append(1)
+                elif val.ndim == 1 and target_ndim >= 1:
+                    reps.append(1)
+                elif val.ndim < target_ndim:
+                    return None
+                else:
+                    return None
+
+        return (needs_tiling, reps, has_symbolic_shapes)
+    except (TypeError, ValueError, AttributeError, IndexError):
+        return None
+
+
+def _validate_and_apply_tiling(val, reps):
+    """Validate that all reps are positive and apply tiling.
+
+    Parameters
+    ----------
+    val : TensorVariable
+        The value to tile
+    reps : list
+        Repetition counts for each dimension
+
+    Returns
+    -------
+    TensorVariable or None
+        Tiled value if valid, None otherwise
+    """
+    try:
+        for rep in reps:
+            if isinstance(rep, (int, np.integer)):
+                if rep <= 0:
+                    return None
+            else:
+                try:
+                    rep_val = get_scalar_constant_value(
+                        rep, only_process_constants=True
+                    )
+                    if rep_val <= 0:
+                        return None
+                except NotScalarConstantError:
+                    return None
+        return tile(val, reps)
+    except (TypeError, ValueError, AttributeError, IndexError):
+        return None
+
+
 @register_specialize
 @node_rewriter([AdvancedIncSubtensor])
 def local_AdvancedIncSubtensor_to_AdvancedIncSubtensor1(fgraph, node):
@@ -265,6 +381,7 @@ def local_AdvancedIncSubtensor_to_AdvancedIncSubtensor1(fgraph, node):
     """
 
     if type(node.op) is not AdvancedIncSubtensor:
+        # Don't apply to subclasses
         return
 
     if node.op.ignore_duplicates:
@@ -1321,7 +1438,9 @@ def local_useless_inc_subtensor_alloc(fgraph, node):
             if isinstance(node.op, IncSubtensor):
                 xi = Subtensor(node.op.idx_list)(x, *i)
             elif isinstance(node.op, AdvancedIncSubtensor):
-                xi = advanced_subtensor(x, *i)
+                # Use the same idx_list as the original operation to ensure correct shape
+                op = AdvancedSubtensor(node.op.idx_list)
+                xi = op.make_node(x, *i).outputs[0]
             elif isinstance(node.op, AdvancedIncSubtensor1):
                 xi = advanced_subtensor1(x, *i)
             else:
@@ -1771,10 +1890,11 @@ def local_blockwise_inc_subtensor(fgraph, node):
 
 
 @node_rewriter(tracks=[AdvancedSubtensor, AdvancedIncSubtensor])
-def bool_idx_to_nonzero(fgraph, node):
-    """Convert boolean indexing into equivalent vector boolean index, supported by our dispatch
+def ravel_multidimensional_bool_idx(fgraph, node):
+    """Convert multidimensional boolean indexing into equivalent vector boolean index, supported by Numba
 
-    x[1:, eye(3, dtype=bool), 1:] -> x[1:, *eye(3).nonzero()]
+    x[eye(3, dtype=bool)] -> x.ravel()[eye(3).ravel()]
+    x[eye(3, dtype=bool)].set(y) -> x.ravel()[eye(3).ravel()].set(y).reshape(x.shape)
     """
 
     if isinstance(node.op, AdvancedSubtensor):
@@ -1787,26 +1907,53 @@ def bool_idx_to_nonzero(fgraph, node):
     # Reconstruct indices from idx_list and tensor inputs
     idxs = indices_from_subtensor(tensor_inputs, node.op.idx_list)
 
-    bool_pos = {
-        i
+    if any(
+        (
+            (isinstance(idx.type, TensorType) and idx.type.dtype in integer_dtypes)
+            or isinstance(idx.type, NoneTypeT)
+        )
+        for idx in idxs
+    ):
+        # Get out if there are any other advanced indexes or np.newaxis
+        return None
+
+    bool_idxs = [
+        (i, idx)
         for i, idx in enumerate(idxs)
         if (isinstance(idx.type, TensorType) and idx.dtype == "bool")
-    }
+    ]
 
-    if not bool_pos:
+    if len(bool_idxs) != 1:
+        # Get out if there are no or multiple boolean idxs
+        return None
+    [(bool_idx_pos, bool_idx)] = bool_idxs
+    bool_idx_ndim = bool_idx.type.ndim
+    if bool_idx.type.ndim < 2:
+        # No need to do anything if it's a vector or scalar, as it's already supported by Numba
         return None
 
-    new_idxs = []
-    for i, idx in enumerate(idxs):
-        if i in bool_pos:
-            new_idxs.extend(idx.nonzero())
-        else:
-            new_idxs.append(idx)
+    x_shape = x.shape
+    raveled_x = x.reshape(
+        (*x_shape[:bool_idx_pos], -1, *x_shape[bool_idx_pos + bool_idx_ndim :])
+    )
+
+    raveled_bool_idx = bool_idx.ravel()
+    new_idxs = list(idxs)
+    new_idxs[bool_idx_pos] = raveled_bool_idx
 
     if isinstance(node.op, AdvancedSubtensor):
-        new_out = node.op(x, *new_idxs)
+        new_out = raveled_x[tuple(new_idxs)]
     else:
-        new_out = node.op(x, y, *new_idxs)
+        sub = raveled_x[tuple(new_idxs)]
+        new_out = inc_subtensor(
+            sub,
+            y,
+            set_instead_of_inc=node.op.set_instead_of_inc,
+            ignore_duplicates=node.op.ignore_duplicates,
+            inplace=node.op.inplace,
+        )
+        new_out = new_out.reshape(x_shape)
+
     return [copy_stack_trace(node.outputs[0], new_out)]
 
 
@@ -1941,10 +2088,16 @@ def ravel_multidimensional_int_idx(fgraph, node):
 
 
 optdb["specialize"].register(
-    bool_idx_to_nonzero.__name__,
-    bool_idx_to_nonzero,
+    ravel_multidimensional_bool_idx.__name__,
+    ravel_multidimensional_bool_idx,
+    "numba",
+    use_db_name_as_tag=False,  # Not included if only "specialize" is requested
+)
+
+optdb["specialize"].register(
+    ravel_multidimensional_int_idx.__name__,
+    ravel_multidimensional_int_idx,
     "numba",
-    "shape_unsafe",  # It can mask invalid mask sizes
     use_db_name_as_tag=False,  # Not included if only "specialize" is requested
 )
 
 
@@ -922,11 +922,12 @@ def __init__(self, idx_list=None):
 
     def _normalize_idx_list_for_hash(self):
         """Normalize idx_list for hash and equality comparison."""
-        if self.idx_list is None:
+        idx_list = getattr(self, "idx_list", None)
+        if idx_list is None:
             return None
 
         msg = []
-        for entry in self.idx_list:
+        for entry in idx_list:
             if isinstance(entry, slice):
                 msg.append((entry.start, entry.stop, entry.step))
             else:
@@ -2812,13 +2813,6 @@ def make_node(self, x, *inputs):
         advanced_indices = []
         adv_group_axis = None
         last_adv_group_axis = None
-        if new_axes: #not defined?
-            expanded_x_shape_list = list(x.type.shape)
-            for new_axis in new_axes:
-                expanded_x_shape_list.insert(new_axis, 1)
-            expanded_x_shape = tuple(expanded_x_shape_list)
-        else:
-            expanded_x_shape = x.type.shape
         for i, (idx, dim_length) in enumerate(
             zip_longest(explicit_indices, x.type.shape, fillvalue=slice(None))
         ):