c_alias_bucketize (#76152)

zhengshengning · web-flow · commit feaccbbbc8b9 · 2025-11-02T17:06:39.000+08:00
diff --git a/python/paddle/tensor/search.py b/python/paddle/tensor/search.py
@@ -1168,23 +1168,29 @@ def topk(
         return values, indices
 
 
+@param_two_alias(["x", "input"], ["sorted_sequence", "boundaries"])
 def bucketize(
     x: Tensor,
     sorted_sequence: Tensor,
     out_int32: bool = False,
     right: bool = False,
     name: str | None = None,
+    *,
+    out: Tensor | None = None,
 ) -> Tensor:
     """
     This API is used to find the index of the corresponding 1D tensor `sorted_sequence` in the innermost dimension based on the given `x`.
 
     Args:
         x (Tensor): An input N-D tensor value with type int32, int64, float32, float64.
+            alias: ``input``.
         sorted_sequence (Tensor): An input 1-D tensor with type int32, int64, float32, float64. The value of the tensor monotonically increases in the innermost dimension.
+            alias: ``boundaries``.
         out_int32 (bool, optional): Data type of the output tensor which can be int32, int64. The default value is False, and it indicates that the output data type is int64.
         right (bool, optional): Find the upper or lower bounds of the sorted_sequence range in the innermost dimension based on the given `x`. If the value of the sorted_sequence is nan or inf, return the size of the innermost dimension.
                                The default value is False and it shows the lower bounds.
         name (str|None, optional): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`.
+        out (Tensor|None, optional): The output tensor. Default: None.
 
     Returns:
         Tensor (the same sizes of the `x`), return the tensor of int32 if set :attr:`out_int32` is True, otherwise return the tensor of int64.
@@ -1229,26 +1235,35 @@ def bucketize(
         raise ValueError(
             f"sorted_sequence tensor must be 1 dimension, but got dim {sorted_sequence.dim()}"
         )
-    return searchsorted(sorted_sequence, x, out_int32, right, name)
+    return searchsorted(sorted_sequence, x, out_int32, right, name, out=out)
 
 
+@param_one_alias(["values", "input"])
 def searchsorted(
     sorted_sequence: Tensor,
     values: Tensor,
     out_int32: bool = False,
     right: bool = False,
     name: str | None = None,
+    *,
+    side: str | None = None,
+    out: Tensor | None = None,
+    sorter: Tensor | None = None,
 ) -> Tensor:
     """
     Find the index of the corresponding `sorted_sequence` in the innermost dimension based on the given `values`.
 
     Args:
         sorted_sequence (Tensor): An input N-D or 1-D tensor with type int32, int64, float16, float32, float64, bfloat16. The value of the tensor monotonically increases in the innermost dimension.
         values (Tensor): An input N-D tensor value with type int32, int64, float16, float32, float64, bfloat16.
+            alias: ``input``.
         out_int32 (bool, optional): Data type of the output tensor which can be int32, int64. The default value is False, and it indicates that the output data type is int64.
         right (bool, optional): Find the upper or lower bounds of the sorted_sequence range in the innermost dimension based on the given `values`. If the value of the sorted_sequence is nan or inf, return the size of the innermost dimension.
                                The default value is False and it shows the lower bounds.
         name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None.
+        side (str|None, optional): The same as right but preferred. `left` corresponds to False for right and `right` corresponds to True for right. It will error if this is set to `left` while right is True. Default value is None.
+        sorter (Tensor|None, optional): if provided, a tensor matching the shape of the unsorted `sorted_sequence` containing a sequence of indices that sort it in the ascending order on the innermost dimension
+        out (Tensor|None, optional): The output tensor. Default: None.
 
     Returns:
         Tensor (the same sizes of the `values`), return the tensor of int32 if set :attr:`out_int32` is True, otherwise return the tensor of int64.
@@ -1280,8 +1295,18 @@ def searchsorted(
              [1, 3, 4, 5]])
 
     """
+    # If side is present, override the value of right if needed.
+    if side is not None and side == "right":
+        right = True
+
     if in_dynamic_or_pir_mode():
-        return _C_ops.searchsorted(sorted_sequence, values, out_int32, right)
+        if sorter is not None:
+            sorted_sequence = sorted_sequence.take_along_axis(
+                axis=-1, indices=sorter
+            )
+        return _C_ops.searchsorted(
+            sorted_sequence, values, out_int32, right, out=out
+        )
     else:
         check_variable_and_dtype(
             sorted_sequence,
diff --git a/test/legacy_test/test_bucketize_api.py b/test/legacy_test/test_bucketize_api.py
@@ -110,5 +110,149 @@ def test_empty_input_error(self):
             self.assertRaises(AttributeError, paddle.bucketize, x, None)
 
 
+class TestBucketizeAPI_Extended(unittest.TestCase):
+    def setUp(self):
+        self.sorted_sequence = np.array([2, 4, 8, 16]).astype("float64")
+        self.x2d = np.array([[0, 8, 4, 16], [-1, 2, 8, 4]]).astype("float64")
+        self.x1d = np.array([0, 8, 4, 16]).astype("float64")
+        self.sorted_dup = np.array([1, 2, 2, 2, 3]).astype("float64")
+        self.x_dup = np.array([2, 2, 1, 3]).astype("float64")
+        self.place = get_places()
+
+    def test_dygraph_out_and_out_int32_and_name(self):
+        # Dynamic diagram: Testing the out parameter (inplace write) and out_int32
+        paddle.disable_static()
+        for place in self.place:
+            with paddle.base.dygraph.guard():
+                seq = paddle.to_tensor(self.sorted_sequence)
+                x = paddle.to_tensor(self.x2d)
+
+                res32 = paddle.bucketize(
+                    x, seq, out_int32=True, name="test_name"
+                )
+                self.assertEqual(res32.dtype, paddle.int32)
+                ref32 = np.searchsorted(self.sorted_sequence, self.x2d)
+                np.testing.assert_allclose(
+                    ref32, res32.numpy().astype("int64"), rtol=1e-05
+                )
+
+                # out parameter: supply existing tensor, should be written and returned
+                out_tensor = paddle.empty(shape=self.x2d.shape, dtype="int64")
+                ret = paddle.bucketize(x, seq, out=out_tensor)
+                ref = np.searchsorted(self.sorted_sequence, self.x2d)
+                np.testing.assert_allclose(ref, out_tensor.numpy(), rtol=1e-05)
+        paddle.enable_static()
+
+    def test_static_out_int32_and_right(self):
+        # Static image: Testing out_int32 and right=True/False
+        paddle.enable_static()
+        for place in self.place:
+            with paddle.static.program_guard(paddle.static.Program()):
+                seq = paddle.static.data(
+                    name="seq",
+                    shape=self.sorted_sequence.shape,
+                    dtype="float64",
+                )
+                x = paddle.static.data(
+                    name="x", shape=self.x2d.shape, dtype="float64"
+                )
+
+                out_left = paddle.bucketize(
+                    x, seq, right=False, out_int32=False
+                )
+                out_right = paddle.bucketize(x, seq, right=True, out_int32=True)
+
+                exe = paddle.static.Executor(place)
+                res_left, res_right = exe.run(
+                    feed={"seq": self.sorted_sequence, "x": self.x2d},
+                    fetch_list=[out_left, out_right],
+                )
+                ref_left = np.searchsorted(
+                    self.sorted_sequence, self.x2d, side="left"
+                )
+                ref_right = np.searchsorted(
+                    self.sorted_sequence, self.x2d, side="right"
+                )
+                np.testing.assert_allclose(ref_left, res_left, rtol=1e-05)
+                # out_int32 True -> numpy result must be cast-compatible to int32
+                self.assertEqual(res_right.dtype, np.int32)
+                np.testing.assert_allclose(
+                    ref_right, res_right.astype("int64"), rtol=1e-05
+                )
+        paddle.disable_static()
+
+    def test_dygraph_1d_input(self):
+        # Dynamic image: 1D x test
+        paddle.disable_static()
+        for place in self.place:
+            with paddle.base.dygraph.guard():
+                seq = paddle.to_tensor(self.sorted_sequence)
+                x = paddle.to_tensor(self.x1d)
+
+                out = paddle.bucketize(x, seq)
+                ref = np.searchsorted(self.sorted_sequence, self.x1d)
+                np.testing.assert_allclose(ref, out.numpy(), rtol=1e-05)
+        paddle.enable_static()
+
+    def test_dup_elements_side_behavior(self):
+        # Left/right difference when testing duplicate elements
+        paddle.disable_static()
+        for place in self.place:
+            with paddle.base.dygraph.guard():
+                seq = paddle.to_tensor(self.sorted_dup)
+                x = paddle.to_tensor(self.x_dup)
+
+                out_left = paddle.bucketize(x, seq, right=False)
+                out_right = paddle.bucketize(x, seq, right=True)
+
+                ref_left = np.searchsorted(
+                    self.sorted_dup, self.x_dup, side="left"
+                )
+                ref_right = np.searchsorted(
+                    self.sorted_dup, self.x_dup, side="right"
+                )
+
+                np.testing.assert_allclose(
+                    ref_left, out_left.numpy(), rtol=1e-05
+                )
+                np.testing.assert_allclose(
+                    ref_right, out_right.numpy(), rtol=1e-05
+                )
+        paddle.enable_static()
+
+    def test_static_and_dygraph_sort_of_api_stability(self):
+        # Simple coverage: Both static and dynamic calls can succeed (without checking for duplicate results)
+        paddle.enable_static()
+        for place in self.place:
+            with paddle.static.program_guard(paddle.static.Program()):
+                seq = paddle.static.data(
+                    name="seq",
+                    shape=self.sorted_sequence.shape,
+                    dtype="float64",
+                )
+                x = paddle.static.data(
+                    name="x", shape=self.x2d.shape, dtype="float64"
+                )
+                _ = paddle.bucketize(
+                    x, seq, out_int32=False, right=False, name="static_case"
+                )
+                exe = paddle.static.Executor(place)
+                exe.run(
+                    feed={"seq": self.sorted_sequence, "x": self.x2d},
+                    fetch_list=[],
+                )
+        paddle.disable_static()
+
+        paddle.disable_static()
+        for place in self.place:
+            with paddle.base.dygraph.guard():
+                seq = paddle.to_tensor(self.sorted_sequence)
+                x = paddle.to_tensor(self.x2d)
+                _ = paddle.bucketize(
+                    x, seq, out_int32=False, right=False, name="dy_case"
+                )
+        paddle.enable_static()
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/test/legacy_test/test_searchsorted_op.py b/test/legacy_test/test_searchsorted_op.py
@@ -304,6 +304,8 @@ def test_searchsorted_sortedsequence_size_error():
         )
 
     def test_check_type_error(self):
+        paddle.enable_static()
+
         def test_sortedsequence_values_type_error():
             with paddle.static.program_guard(paddle.static.Program()):
                 sorted_sequence = paddle.static.data(
@@ -317,5 +319,123 @@ def test_sortedsequence_values_type_error():
         self.assertRaises(TypeError, test_sortedsequence_values_type_error)
 
 
+class TestSearchSortedAPI_Extended(unittest.TestCase):
+    def init_test_case(self):
+        self.sorted_sequence = np.array([2, 4, 6, 8, 10]).astype("float64")
+        self.values_2d = np.array([[3, 6, 9], [3, 6, 9]]).astype("float64")
+        self.values_1d = np.array([3, 6, 9]).astype("float64")
+        self.unsorted_seq = np.array([6, 2, 10, 4, 8]).astype("float64")
+        # sorter such that unsorted_seq[sorter] is sorted
+        self.sorter = np.argsort(self.unsorted_seq).astype("int64")
+
+    def setUp(self):
+        self.init_test_case()
+        self.place = get_places()
+
+    def test_dygraph_side_and_right_priority_and_out_int32(self):
+        # Test: side takes precedence over right, out_int32 controls the returned dtype
+        paddle.disable_static()
+        for place in self.place:
+            with paddle.base.dygraph.guard():
+                seq = paddle.to_tensor(self.sorted_sequence)
+                vals = paddle.to_tensor(self.values_2d)
+                # Mixed parameter passing: right=False, side='right' -> side takes precedence, should be interpreted as right=True
+                out = paddle.searchsorted(
+                    seq, vals, right=False, side="right", out_int32=True
+                )
+                ref = np.searchsorted(
+                    self.sorted_sequence, self.values_2d, side="right"
+                )
+                self.assertEqual(out.dtype, paddle.int32)
+                np.testing.assert_allclose(
+                    ref, out.numpy().astype("int64"), rtol=1e-05
+                )
+
+    def test_dygraph_out_parameter_and_return_is_out(self):
+        # Test out parameter: Pass in an existing tensor, write the function on it, and return the same Tensor
+        paddle.disable_static()
+        for place in self.place:
+            with paddle.base.dygraph.guard():
+                seq = paddle.to_tensor(self.sorted_sequence)
+                vals = paddle.to_tensor(self.values_2d)
+                out_tensor = paddle.empty(
+                    shape=self.values_2d.shape, dtype="int64"
+                )
+                ret = paddle.searchsorted(seq, vals, out=out_tensor)
+                ref = np.searchsorted(self.sorted_sequence, self.values_2d)
+                np.testing.assert_allclose(ref, out_tensor.numpy(), rtol=1e-05)
+
+    def test_dygraph_sorter_behavior(self):
+        # Test sorter parameter: When the sequence is unsorted but a sorter is given, the behavior is consistent with numpy
+        paddle.disable_static()
+        for place in self.place:
+            with paddle.base.dygraph.guard():
+                seq = paddle.to_tensor(self.unsorted_seq)
+                vals = paddle.to_tensor(self.values_1d)
+                sorter_t = paddle.to_tensor(self.sorter)
+                out = paddle.searchsorted(seq, vals, sorter=sorter_t)
+                ref = np.searchsorted(
+                    self.unsorted_seq, self.values_1d, sorter=self.sorter
+                )
+                np.testing.assert_allclose(ref, out.numpy(), rtol=1e-05)
+
+    def test_static_side_and_sorter(self):
+        # Test side parameters and sorter parameters under static images (aligned with numpy)
+        paddle.enable_static()
+        for place in self.place:
+            with paddle.static.program_guard(paddle.static.Program()):
+                seq = paddle.static.data(
+                    name="seq", shape=self.unsorted_seq.shape, dtype="float64"
+                )
+                vals = paddle.static.data(
+                    name="vals", shape=self.values_1d.shape, dtype="float64"
+                )
+                sorter = paddle.static.data(
+                    name="sorter", shape=self.sorter.shape, dtype="int64"
+                )
+
+                out_left = paddle.searchsorted(
+                    seq, vals, side="left", sorter=sorter
+                )
+                out_right = paddle.searchsorted(
+                    seq, vals, side="right", sorter=sorter
+                )
+
+                exe = paddle.static.Executor(place)
+                (res_left, res_right) = exe.run(
+                    feed={
+                        "seq": self.unsorted_seq,
+                        "vals": self.values_1d,
+                        "sorter": self.sorter,
+                    },
+                    fetch_list=[out_left, out_right],
+                )
+                ref_left = np.searchsorted(
+                    self.unsorted_seq,
+                    self.values_1d,
+                    side="left",
+                    sorter=self.sorter,
+                )
+                ref_right = np.searchsorted(
+                    self.unsorted_seq,
+                    self.values_1d,
+                    side="right",
+                    sorter=self.sorter,
+                )
+                np.testing.assert_allclose(ref_left, res_left, rtol=1e-05)
+                np.testing.assert_allclose(ref_right, res_right, rtol=1e-05)
+        paddle.disable_static()
+
+    def test_dygraph_1d_values_and_name_param(self):
+        paddle.disable_static()
+        for place in self.place:
+            with paddle.base.dygraph.guard():
+                seq = paddle.to_tensor(self.sorted_sequence)
+                vals = paddle.to_tensor(self.values_1d)
+                out = paddle.searchsorted(seq, vals, name="my_search")
+                ref = np.searchsorted(self.sorted_sequence, self.values_1d)
+                np.testing.assert_allclose(ref, out.numpy(), rtol=1e-05)
+
+
 if __name__ == '__main__':
     unittest.main()