Merge branch 'main' into copilot/make-files-internal-private

justinchuby · web-flow · commit 0c0863a823df · 2025-12-22T10:51:53.000-08:00
diff --git a/onnxscript/function_libs/tools/torch_lib/deduce_type_constraints_test.py b/onnxscript/function_libs/tools/torch_lib/deduce_type_constraints_test.py
@@ -21,7 +21,7 @@
 
 def torch_lib_onnx_functions_from_registry() -> Generator[onnxscript.OnnxFunction, None, None]:
     for op in registration.default_registry.values():
-        for func in (*op.overloads, *op.privates, *op.complex):
+        for func in (*op.overloads, *op.complex):
             if isinstance(func, onnxscript.OnnxFunction):
                 yield func
 
diff --git a/onnxscript/function_libs/torch_lib/ops/core.py b/onnxscript/function_libs/torch_lib/ops/core.py
@@ -3151,6 +3151,7 @@ def aten_embedding_bag(
     sparse: bool = False,
     per_sample_weights: Optional[TFloat] = None,
     include_last_offset: bool = False,
+    padding_idx: Optional[int] = None,
 ) -> Tuple[TFloat, TFloat, TFloat, TFloat]:
     """embedding_bag(Tensor weight, Tensor indices, Tensor offsets, bool scale_grad_by_freq=False, int mode=0, bool sparse=False, Tensor? per_sample_weights=None, bool include_last_offset=False) -> (Tensor, Tensor, Tensor, Tensor)"""
 
@@ -3247,23 +3248,24 @@ def _aten_embedding_bag_onnx(
 
     # Only compute the shape of other 3 outputs, we don't care the value
     if mode == 0:  # sum
-        offset2bag = op.Shape(indices, start=0, end=0)  # Generate empty tensor
+        offset2bag = op.Cast(op.Shape(indices, start=0, end=0), to=INT64.dtype)
         if op.Equal(include_last_offset, True):
-            bag_size = op.Expand(0, op.Shape(offsets))
+            bag_size = op.Cast(op.Expand(0, op.Shape(offsets)), to=INT64.dtype)
+            max_indices = op.Cast(op.Expand(0, op.Shape(offsets)), to=INT64.dtype)
         else:
-            bag_size = op.Expand(0, op.Shape(offsets) - 1)
-        max_indices = op.Expand(0, op.Shape(bag_size))
+            bag_size = op.Cast(op.Expand(0, op.Shape(offsets) - 1), to=INT64.dtype)
+            max_indices = op.Cast(op.Expand(0, op.Shape(offsets) - 1), to=INT64.dtype)
     elif mode == 1:  # mean
-        offset2bag = op.Expand(0, op.Shape(indices, start=0, end=1))
-        bag_size = op.Expand(0, op.Shape(offsets) - 1)
-        max_indices = op.Expand(0, op.Shape(bag_size))
+        offset2bag = op.Cast(op.Expand(0, op.Shape(indices, start=0, end=1)), to=INT64.dtype)
+        bag_size = op.Cast(op.Expand(0, op.Shape(offsets) - 1), to=INT64.dtype)
+        max_indices = op.Cast(op.Expand(0, op.Shape(offsets) - 1), to=INT64.dtype)
     else:  # max
-        offset2bag = op.Expand(0, op.Shape(indices, start=0, end=1))
-        bag_size = op.Expand(0, op.Shape(offsets) - 1)
+        offset2bag = op.Cast(op.Expand(0, op.Shape(indices, start=0, end=1)), to=INT64.dtype)
+        bag_size = op.Cast(op.Expand(0, op.Shape(offsets) - 1), to=INT64.dtype)
         # shape = (bag_size.dim[0], weight.dim[1])
         dim_0 = op.Shape(bag_size, start=0, end=1)
         dim_1 = op.Shape(weight, start=1, end=2)
-        max_indices = op.Expand(0, op.Concat(dim_0, dim_1, axis=0))
+        max_indices = op.Cast(op.Expand(0, op.Concat(dim_0, dim_1, axis=0)), to=INT64.dtype)
 
     return result, offset2bag, bag_size, max_indices
 
@@ -3285,27 +3287,40 @@ def aten_embedding_bag_padding_idx(
     sparse: bool = False,
     per_sample_weights: Optional[TFloat] = None,
     include_last_offset: bool = False,
-    padding_idx: int = -1,
+    padding_idx: Optional[int] = None,
 ) -> Tuple[TFloat, TFloat, TFloat, TFloat]:
     """embedding_bag.padding_idx(Tensor weight, Tensor indices, Tensor offsets, bool scale_grad_by_freq, int mode, bool sparse, Tensor? per_sample_weights, bool include_last_offset, int? padding_idx) -> (Tensor, Tensor, Tensor, Tensor)
 
     We add default values for the attributes to accommodate _embedding_bag as well:
     _embedding_bag(Tensor weight, Tensor indices, Tensor offsets, bool scale_grad_by_freq=False, int mode=0, bool sparse=False, Tensor? per_sample_weights=None, bool include_last_offset=False, int padding_idx=-1)
     """
-    assert padding_idx is not None, (
-        "padding_idx must not be None. This is likely a dispatcher error"
-    )
 
     if per_sample_weights is None:
         per_sample_weights = op.Expand(op.Constant(value_floats=[1.0]), op.Shape(indices))
         per_sample_weights = op.CastLike(per_sample_weights, weight)
 
-    # Change padding_idx to positive value, -1 means the last index
-    if padding_idx < 0:
-        padding_idx = weight.shape[0] + padding_idx
+    if padding_idx is not None:
+        # Call the existing function for handling padding_idx
+        result, offset2bag, bag_size, max_indices = _aten_embedding_bag_1d_padding_idx_onnx(
+            weight,
+            indices,
+            offsets,
+            mode,
+            per_sample_weights,
+            include_last_offset,
+            padding_idx,
+        )
 
-    result, offset2bag, bag_size, max_indices = _aten_embedding_bag_1d_padding_idx_onnx(
-        weight, indices, offsets, mode, per_sample_weights, include_last_offset, padding_idx
+        return result, offset2bag, bag_size, max_indices
+
+    # When padding_idx is None, use the standard embedding_bag implementation
+    result, offset2bag, bag_size, max_indices = _aten_embedding_bag_onnx(
+        weight,
+        indices,
+        offsets,
+        mode,
+        per_sample_weights,
+        include_last_offset,
     )
 
     return result, offset2bag, bag_size, max_indices
@@ -3322,6 +3337,12 @@ def _aten_embedding_bag_1d_padding_idx_onnx(
     padding_idx: int,
 ) -> Tuple[TFloat, TFloat, TFloat, TFloat]:
     neg_1 = op.Constant(value_ints=[-1])
+
+    num_embeddings = op.Shape(weight, start=0, end=1)  # Get number of rows in weight
+    num_embeddings_scalar = op.Squeeze(num_embeddings)
+    if padding_idx < 0:
+        padding_idx = padding_idx + num_embeddings_scalar
+
     # Get weight out according to indices,
     # e.g. indices=[3,1,4,5,3] means get weight[[3,1,4,5,3]]
     indices_weight = op.Gather(weight, indices)
@@ -3357,7 +3378,10 @@ def _aten_embedding_bag_1d_padding_idx_onnx(
         cond_2 = j < end_pos
         while cond_2:
             index = op.Gather(indices, j)
-            if not op.Equal(index, padding_idx):
+            normalized_index = index
+            if index < 0:
+                normalized_index = index + num_embeddings_scalar
+            if not op.Equal(normalized_index, padding_idx):
                 # Something like the 'append' operation
                 curr_offsets = op.Concat(curr_offsets, op.Reshape(j, neg_1), axis=0)
             j = j + 1
@@ -3386,23 +3410,24 @@ def _aten_embedding_bag_1d_padding_idx_onnx(
     result = op.CastLike(result, weight)
 
     if mode == 0:  # sum
-        offset2bag = op.Expand(0, op.Shape(indices))
+        offset2bag = op.Cast(op.Expand(0, op.Shape(indices)), to=INT64.dtype)
         if op.Equal(include_last_offset, True):
-            bag_size = op.Expand(0, op.Shape(offsets))
+            bag_size = op.Cast(op.Expand(0, op.Shape(offsets)), to=INT64.dtype)
+            max_indices = op.Cast(op.Expand(0, op.Shape(offsets)), to=INT64.dtype)
         else:
-            bag_size = op.Expand(0, op.Shape(offsets) - 1)
-        max_indices = op.Expand(0, op.Shape(bag_size))
+            bag_size = op.Cast(op.Expand(0, op.Shape(offsets) - 1), to=INT64.dtype)
+            max_indices = op.Cast(op.Expand(0, op.Shape(offsets) - 1), to=INT64.dtype)
     elif mode == 1:  # mean
-        offset2bag = op.Expand(0, op.Shape(indices, start=0, end=1))
-        bag_size = op.Expand(0, op.Shape(offsets) - 1)
-        max_indices = op.Expand(0, op.Shape(bag_size))
+        offset2bag = op.Cast(op.Expand(0, op.Shape(indices, start=0, end=1)), to=INT64.dtype)
+        bag_size = op.Cast(op.Expand(0, op.Shape(offsets) - 1), to=INT64.dtype)
+        max_indices = op.Cast(op.Expand(0, op.Shape(offsets) - 1), to=INT64.dtype)
     else:  # mode == 2, max
-        offset2bag = op.Expand(0, op.Shape(indices, start=0, end=1))
-        bag_size = op.Expand(0, op.Shape(offsets) - 1)
+        offset2bag = op.Cast(op.Expand(0, op.Shape(indices, start=0, end=1)), to=INT64.dtype)
+        bag_size = op.Cast(op.Expand(0, op.Shape(offsets) - 1), to=INT64.dtype)
         # shape = (bag_size.dim[0], weight.dim[1])
         dim_0 = op.Shape(bag_size, start=0, end=1)
         dim_1 = op.Shape(weight, start=1, end=2)
-        max_indices = op.Expand(0, op.Concat(dim_0, dim_1, axis=0))
+        max_indices = op.Cast(op.Expand(0, op.Concat(dim_0, dim_1, axis=0)), to=INT64.dtype)
 
     return result, offset2bag, bag_size, max_indices
 
@@ -4382,7 +4407,6 @@ def aten_grid_sampler(
     padding_mode_options = ("zeros", "border", "reflection")
     padding_mode_str = padding_mode_options[padding_mode]
 
-    # Only one onnx Op so don't put into private function
     return op.GridSample(
         input,
         grid,
@@ -4408,7 +4432,6 @@ def aten_grid_sampler_2d(
     padding_mode_options = ("zeros", "border", "reflection")
     padding_mode_str = padding_mode_options[padding_mode]
 
-    # Only one onnx Op so don't put into private function
     return op.GridSample(
         input,
         grid,
@@ -4698,7 +4721,7 @@ def _aten_index_onnx(
     if _has_none_in_middle(indices):
         # If there is None in the middle, Advanced Indexing cannot decide where to put
         # the new dimensions. So it places them in the front, like GatherND does.
-        return op.Identity(self)
+        return self
 
     # When the indices are consecutive, Advanced Indexing will place the new dimensions
     # (aka. the broadcasted shape) in the middle, replacing the original [x1, ..., xk] axes.
@@ -4744,7 +4767,9 @@ def _aten_index_onnx(
 
 
 @torch_op(("aten::index.Tensor", "aten::_unsafe_index.Tensor"), trace_only=True)
-def aten_index(self: TensorType, indices: Sequence[Optional[INT64]]) -> TensorType:
+def aten_index(
+    self: TensorType, indices: Sequence[Optional[Union[INT64, BOOL]]]
+) -> TensorType:
     """index.Tensor(Tensor self, Tensor?[] indices) -> Tensor
 
     NOTE: Understanding `aten::index`
@@ -4764,17 +4789,19 @@ def aten_index(self: TensorType, indices: Sequence[Optional[INT64]]) -> TensorTy
 
     None in `indices` are like fillers for dimensions that cannot be removed in the process.
     """
+    # Handle Boolean indexing first
+    if any(index is not None and index.dtype == ir.DataType.BOOL for index in indices):
+        return _aten_index_bool(self, indices)
 
     index_ranks = [len(index.shape) for index in indices if index is not None]
 
     return _aten_index_onnx(self, indices, index_ranks)
 
 
-@torch_op(("aten::index.Tensor", "aten::_unsafe_index.Tensor"), trace_only=True)
-def aten_index_bool(self: TensorType, indices: Sequence[Optional[BOOL]]) -> TensorType:  # pylint: disable=inconsistent-return-statements
+def _aten_index_bool(self: TensorType, indices: Sequence[Optional[BOOL]]) -> TensorType:
     index_ranks = [len(index.shape) for index in indices if index is not None]
 
-    if index_ranks[0] == 1:
+    if all(rank == 1 for rank in index_ranks):
         # indices contains scalar only.
         new_indices = [
             op.Transpose(op.NonZero(index), perm=[1, 0]) if index is not None else None
@@ -4784,6 +4811,7 @@ def aten_index_bool(self: TensorType, indices: Sequence[Optional[BOOL]]) -> Tens
             op.Squeeze(index, axes=[1]) if index is not None else None for index in new_indices
         ]
         return _aten_index_onnx(self, new_indices, index_ranks)
+
     else:
         input_rank = len(self.shape)
         # Prepare perm for transposing self tensor.
@@ -4800,15 +4828,19 @@ def aten_index_bool(self: TensorType, indices: Sequence[Optional[BOOL]]) -> Tens
             if index is None:
                 self = op.Transpose(self, perm=trans_perm)
                 count_of_none += 1
-            else:
-                new_indices = op.Transpose(op.NonZero(index), perm=[1, 0])
-                result = op.GatherND(self, new_indices, batch_dims=0)
-                finla_rank = input_rank - (len(index.shape) - 1)
-                trans_perm = list(range(finla_rank))
-                trans_perm = trans_perm[-1:] + trans_perm[:-1]
-                for _ in range(count_of_none):
-                    result = op.Transpose(result, perm=trans_perm)
-                return result
+                continue
+
+            new_indices = op.Transpose(op.NonZero(index), perm=[1, 0])
+            result = op.GatherND(self, new_indices, batch_dims=0)
+            final_rank = input_rank - (len(index.shape) - 1)
+            trans_perm = list(range(final_rank))
+            trans_perm = trans_perm[-1:] + trans_perm[:-1]
+            for _ in range(count_of_none):
+                result = op.Transpose(result, perm=trans_perm)
+            # FIXME(justinchuby): Even though this logic passes the tests, it still looks strange:
+            # why does it return early here instead of continuing to process the remaining indices?
+            # I think the assumption here is that there can be only one Boolean index in the indices list?
+            return result
 
 
 def aten_index_add(
@@ -4830,7 +4862,7 @@ def aten_index_copy(
 @torch_op(("aten::index_put", "aten::_unsafe_index_put"), trace_only=True)
 def aten_index_put(
     self: TReal,
-    indices: Sequence[INT64],
+    indices: Sequence[Optional[Union[INT64, BOOL]]],
     values: TReal,
     accumulate: bool = False,
 ) -> TReal:
@@ -4839,6 +4871,9 @@ def aten_index_put(
     See implementation of `torch.onnx.symbolic_opset11.index_put
     <https://github.com/pytorch/pytorch/blob/main/torch/onnx/symbolic_opset11.py#L212>`_.
     """
+    if any(index is not None and index.dtype == BOOL.dtype for index in indices):
+        return _aten_index_put_bool(self, indices, values, accumulate)
+
     # Ensure the number of indices matches the tensor rank by appending trailing Nones.
     self_rank = len(self.shape)
     if len(indices) < self_rank:
@@ -4971,8 +5006,7 @@ def same_shape(other_shape: ir.Shape) -> bool:
     return result
 
 
-@torch_op("aten::index_put", trace_only=True)
-def aten_index_put_bool(
+def _aten_index_put_bool(
     self: TReal,
     indices: Sequence[BOOL],
     values: TReal,
diff --git a/onnxscript/function_libs/torch_lib/ops/nn.py b/onnxscript/function_libs/torch_lib/ops/nn.py
@@ -328,7 +328,6 @@ def aten_col2im(
     else:  # assert len(padding) == 4, already [w, x, y, z]
         pads = padding
 
-    # Only one ONNX op here so didn't write a private function
     return op.Col2Im(
         self,
         output_size,
diff --git a/onnxscript/function_libs/torch_lib/registration.py b/onnxscript/function_libs/torch_lib/registration.py
@@ -5,6 +5,7 @@
 from __future__ import annotations
 
 import re
+import warnings
 from typing import Any, Callable, Generator, Optional
 
 import onnxscript
@@ -22,14 +23,12 @@ class OverloadedFunction:
     Attributes:
         name: Name of the op. E.g. "aten::add".
         overloads: Overloads function.
-        privates: Private functions not exposed to users.
         complex: Support complex functions.
     """
 
     def __init__(self, name: str):
         self.name = name
         self.overloads: list[Any] = []
-        self.privates: list[Any] = []
         self.complex: list[Any] = []
 
 
@@ -39,17 +38,26 @@ class Registry:
     def __init__(self):
         self._registry: dict[str, OverloadedFunction] = {}
 
-    def register(
-        self, func: Any, name: str, *, private: bool = False, complex: bool = False
-    ) -> None:
+    def register(self, func: Any, name: str, *, complex: bool = False) -> None:
         """Register a function."""
-
-        if private:
-            self._registry.setdefault(name, OverloadedFunction(name)).privates.append(func)
-        elif complex:
-            self._registry.setdefault(name, OverloadedFunction(name)).complex.append(func)
+        overloaded_function = self._registry.setdefault(name, OverloadedFunction(name))
+
+        if complex:
+            if overloaded_function.complex:
+                warnings.warn(
+                    f"Complex overload for '{name}' already registered: {overloaded_function.complex}.",
+                    stacklevel=3,
+                )
+                return
+            overloaded_function.complex.append(func)
         else:
-            self._registry.setdefault(name, OverloadedFunction(name)).overloads.append(func)
+            if overloaded_function.overloads:
+                warnings.warn(
+                    f"Real overload for '{name}' already registered: {overloaded_function.overloads}.",
+                    stacklevel=3,
+                )
+                return
+            overloaded_function.overloads.append(func)
 
     def __getitem__(self, name):
         return self._registry[name]
@@ -131,7 +139,10 @@ def wrapper(
 
         assert registry is not None
         for name_ in _check_and_normalize_names(name):
-            registry.register(processed_func, name_, private=private, complex=complex)
+            if private:
+                # TODO: Remove the private tag once all functions are no longer private.
+                continue
+            registry.register(processed_func, name_, complex=complex)
         return processed_func
 
     return wrapper
diff --git a/pyproject.toml b/pyproject.toml
@@ -43,6 +43,11 @@ onnx = ["py.typed"]
 
 [tool.pytest.ini_options]
 addopts = "-rsfEX --tb=short --color=yes"
+norecursedirs = [
+  # Skip test collection because pytest will try to import the modules twice,
+  # causing the torchlib registry to complain that functions are redefined.
+  "onnxscript/function_libs/torch_lib/ops",
+]
 
 [tool.mypy]
 # TODO disallow_incomplete_defs = true
diff --git a/requirements/ci/requirements-onnx-weekly.txt b/requirements/ci/requirements-onnx-weekly.txt
@@ -1 +1 @@
-onnx-weekly==1.21.0.dev20251103
+onnx-weekly==1.21.0.dev20251215
diff --git a/tests/function_libs/torch_lib/ops_test_data.py b/tests/function_libs/torch_lib/ops_test_data.py

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-onnx-weekly==1.21.0.dev20251103`
	`1`	`+onnx-weekly==1.21.0.dev20251215`