PaddlePaddle · zhwesky2010 · Aug 12, 2025 · Aug 8, 2025 · Aug 11, 2025 · zhwesky2010
diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py
@@ -573,6 +573,7 @@
     argmax,
     argmin,
     argsort,
+    argwhere,
     bucketize,
     index_sample,
     index_select,
@@ -1121,6 +1122,7 @@
     'atleast_3d',
     'reverse',
     'nonzero',
+    'argwhere',
     'CUDAPinnedPlace',
     'XPUPinnedPlace',
     'logical_not',

diff --git a/python/paddle/autograd/__init__.py b/python/paddle/autograd/__init__.py
@@ -28,11 +28,14 @@
 from .py_layer import PyLayer, PyLayerContext
 from .saved_tensors_hooks import saved_tensors_hooks
 
+Function = PyLayer
+
 __all__ = [
     'jacobian',
     'hessian',
     'backward',
     'PyLayer',
+    'Function',
     'PyLayerContext',
     'saved_tensors_hooks',
 ]
diff --git a/python/paddle/tensor/__init__.py b/python/paddle/tensor/__init__.py
@@ -365,6 +365,8 @@
     mm,
     mod,
     mod_,
+    mul,
+    mul_,
     multigammaln,
     multigammaln_,
     multiplex,
@@ -453,6 +455,7 @@
     argmax,
     argmin,
     argsort,
+    argwhere,
     bucketize,
     index_sample,
     index_select,
@@ -607,6 +610,8 @@
     'floor_mod_',
     'multiply',
     'multiply_',
+    'mul',
+    'mul_',
     'add',
     'add_',
     'subtract',
@@ -877,6 +882,7 @@
     'log_normal_',
     'set_',
     'resize_',
+    'argwhere',
 ]
 
 # this list used in math_op_patch.py for magic_method bind

diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py
@@ -98,7 +98,7 @@
     from collections.abc import Sequence
 
     from paddle import Tensor
-    from paddle._typing import DTypeLike
+    from paddle._typing import DTypeLike, Numeric
 
 __all__ = []
 
@@ -1164,6 +1164,67 @@ def multiply(x: Tensor, y: Tensor, name: str | None = None) -> Tensor:
         return _elementwise_op(LayerHelper('elementwise_mul', **locals()))
 
 
+def mul(x: Tensor, other: Numeric) -> Tensor:
+    """
+    Multiplies the input tensor ``x`` with ``other`` element-wise.
+
+    The equation is:
+
+    .. math::
+        out = x * other
+
+    Note:
+        Tensor.mul supports broadcasting. If you would like to know more about broadcasting, please refer to `Introduction to Tensor`_ .
+
+        .. _Introduction to Tensor: ../../guides/beginner/tensor_en.html#chapter5-broadcasting-of-tensor
+
+    Args:
+        x (Tensor): The first input tensor, its data type should be one of bfloat16, float16, float32, float64, int32, int64, bool, complex64, complex128.
+        other (Tensor|float|int|bool|complex): The second input, which can be a Tensor, float, int, bool or complex. If ``other`` is a Tensor, its data type should be one of bfloat16, float16, float32, float64, int32, int64, bool, complex64, complex128.
+
+    Returns:
+        N-D Tensor. A location into which the result is stored. If ``x``, ``other`` have different shapes and are "broadcastable", the resulting tensor shape is the shape of ``x`` and ``other`` after broadcasting. If ``x``, ``other`` have the same shape, its shape is the same as ``x`` and ``other``.
+
+    Examples:
+        .. code-block:: python
+
+            >>> import paddle
+
+            >>> # Multiply a tensor by a scalar
+            >>> x = paddle.to_tensor([1, 2, 3])
+            >>> res = paddle.mul(x, 2.0)
+            >>> print(res)
+            Tensor(shape=[3], dtype=float32, place=Place(cpu), stop_gradient=True,
+            [2., 4., 6.])
+
+            >>> # Multiply a tensor by another tensor
+            >>> x = paddle.to_tensor([[1, 2], [3, 4]])
+            >>> y = paddle.to_tensor([[5, 6], [7, 8]])
+            >>> res = paddle.mul(x, y)
+            >>> # also equivalent to `res = x * y`
+            >>> print(res)
+            Tensor(shape=[2, 2], dtype=int64, place=Place(cpu), stop_gradient=True,
+            [[5 , 12],
+             [21, 32]])
+
+            >>> # Broadcasting example
+            >>> x = paddle.to_tensor([[[1, 2, 3], [1, 2, 3]]])
+            >>> y = paddle.to_tensor([2])
+            >>> res = paddle.mul(x, y)
+            >>> print(res)
+            Tensor(shape=[1, 2, 3], dtype=int64, place=Place(cpu), stop_gradient=True,
+            [[[2, 4, 6],
+              [2, 4, 6]]])
+
+    """
+    other_tensor = (
+        other
+        if isinstance(other, paddle.Tensor)
+        else paddle.to_tensor(other, place=x.place)
+    )
+    return multiply(x, other_tensor)
+
+
 @inplace_apis_in_dygraph_only
 def multiply_(x: Tensor, y: Tensor, name: str | None = None) -> Tensor:
     """
@@ -1180,6 +1241,21 @@ def multiply_(x: Tensor, y: Tensor, name: str | None = None) -> Tensor:
     return _C_ops.multiply_(x, y)
 
 
+@inplace_apis_in_dygraph_only
+def mul_(x: Tensor, other: Numeric) -> Tensor:
+    """
+    Inplace version of ``mul`` API, the output Tensor will be inplaced with input ``x``.
+    Please refer to :ref:`api_paddle_mul`.
+    """
+
+    other_tensor = (
+        other
+        if isinstance(other, paddle.Tensor)
+        else paddle.to_tensor(other, place=x.place)
+    )
+    return multiply_(x, other_tensor)
+
+
 def _elementwise_op_with_axis(x, y, axis=-1, name=None, op_type="Undefined"):
     assert (
         in_dynamic_or_pir_mode()

diff --git a/python/paddle/tensor/search.py b/python/paddle/tensor/search.py
@@ -561,6 +561,38 @@ def nonzero(x: Tensor, as_tuple=False):
         return tuple(list_out)
 
 
+def argwhere(input: Tensor) -> Tensor:
+    """
+    Return a tensor containing the indices of all non-zero elements of the `input`
+    tensor. The returned tensor has shape [z, n], where `z` is the number of all non-zero
+    elements in the `input` tensor, and `n` is the number of dimensions in the `input`
+    tensor.
+
+    Args:
+        input (Tensor): The input tensor variable.
+
+    Returns:
+        Tensor, The data type is int64.
+
+    Examples:
+
+        .. code-block:: python
+
+            >>> import paddle
+
+            >>> x = paddle.to_tensor([[1.0, 0.0, 0.0],
+            ...                       [0.0, 2.0, 0.0],
+            ...                       [0.0, 0.0, 3.0]])
+            >>> out = paddle.tensor.search.argwhere(x)
+            >>> print(out)
+            Tensor(shape=[3, 2], dtype=int64, place=Place(cpu), stop_gradient=True,
+            [[0, 0],
+             [1, 1],
+             [2, 2]])
+    """
+    return nonzero(input, as_tuple=False)
+
+
 def _restrict_nonzero(condition: Tensor, total_true_num: int) -> Tensor:
     """
     Return a tensor containing the indices of all non-zero elements of the `input`

diff --git a/test/legacy_test/test_argwhere_api.py b/test/legacy_test/test_argwhere_api.py
@@ -0,0 +1,187 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import numpy as np
+from op_test import OpTest, convert_float_to_uint16
+
+import paddle
+from paddle import base
+from paddle.base import Program, program_guard
+
+
+def call_argwhere(x):
+    input = paddle.to_tensor(x)
+    return paddle.argwhere(input)
+
+
+class TestArgwhereAPI(unittest.TestCase):
+    def test_argwhere_api(self):
+        paddle.enable_static()
+        data = np.array([[1, 0], [0, 1]], dtype="float32")
+        with program_guard(Program(), Program()):
+            x = paddle.static.data(name='x', shape=[-1, 2], dtype='float32')
+            if not paddle.framework.use_pir_api():
+                x.desc.set_need_check_feed(False)
+            y = paddle.argwhere(x)
+            exe = base.Executor(base.CPUPlace())
+            (res,) = exe.run(
+                feed={'x': data}, fetch_list=[y], return_numpy=False
+            )
+        expect_out = np.array([[0, 0], [1, 1]])
+        np.testing.assert_allclose(expect_out, np.array(res), rtol=1e-05)
+
+        data = np.array([1, 1, 0], dtype="float32")
+        with program_guard(Program(), Program()):
+            x = paddle.static.data(name='x', shape=[-1], dtype='float32')
+            if not paddle.framework.use_pir_api():
+                x.desc.set_need_check_feed(False)
+            y = paddle.argwhere(x)
+            exe = base.Executor(base.CPUPlace())
+            (res,) = exe.run(
+                feed={'x': data}, fetch_list=[y], return_numpy=False
+            )
+        expect_out = np.array([[0], [1]])
+        np.testing.assert_allclose(expect_out, np.array(res), rtol=1e-05)
+
+    def test_dygraph_api(self):
+        data_x = np.array([[True, False], [False, True]])
+        with base.dygraph.guard():
+            x = paddle.to_tensor(data_x)
+            z = paddle.argwhere(x)
+            np_z = z.numpy()
+        expect_out = np.array([[0, 0], [1, 1]])
+
+
+# Base case
+class TestArgwhereOp(OpTest):
+    def setUp(self):
+        '''Test where_index op with random value'''
+        np.random.seed(2023)
+        self.op_type = "where_index"
+        self.python_api = call_argwhere
+        self.init_shape()
+        self.init_dtype()
+
+        self.inputs = self.create_inputs()
+        self.outputs = self.return_outputs()
+
+    def test_check_output(self):
+        self.check_output(check_pir=True, check_symbol_infer=False)
+
+    def init_shape(self):
+        self.shape = [8, 8]
+
+    def init_dtype(self):
+        self.dtype = np.float64
+
+    def create_inputs(self):
+        return {
+            'Condition': np.random.randint(5, size=self.shape).astype(
+                self.dtype
+            )
+        }
+
+    def return_outputs(self):
+        return {'Out': np.argwhere(self.inputs['Condition'])}
+
+
+class TestArgwhereComplex64Op(TestArgwhereOp):
+    def init_shape(self):
+        self.shape = [1, 2, 3]
+
+    def init_dtype(self):
+        self.dtype = np.complex64
+
+
+class TestArgwhereComplex128Op(TestArgwhereOp):
+    def init_shape(self):
+        self.shape = [1, 2, 3]
+
+    def init_dtype(self):
+        self.dtype = np.complex128
+
+
+class TestArgwhereFP32Op(TestArgwhereOp):
+    def init_shape(self):
+        self.shape = [2, 10, 2]
+
+    def init_dtype(self):
+        self.dtype = np.float32
+
+
+class TestArgwhereFP16Op(TestArgwhereOp):
+    def init_shape(self):
+        self.shape = [3, 4, 7]
+
+    def init_dtype(self):
+        self.dtype = np.float16
+
+
+class TestArgwhereBF16(OpTest):
+    def setUp(self):
+        '''Test where_index op with bfloat16 dtype'''
+        np.random.seed(2023)
+        self.op_type = "where_index"
+        self.python_api = call_argwhere
+        self.init_shape()
+        self.init_dtype()
+
+        self.inputs = self.create_inputs()
+        self.outputs = self.return_outputs()
+
+    def test_check_output(self):
+        self.check_output(check_pir=True, check_symbol_infer=False)
+
+    def init_shape(self):
+        self.shape = [12, 9]
+
+    def init_dtype(self):
+        self.dtype = np.uint16
+
+    def create_inputs(self):
+        return {
+            'Condition': convert_float_to_uint16(
+                np.random.randint(5, size=self.shape).astype(np.float32)
+            )
+        }
+
+    def return_outputs(self):
+        return {'Out': np.argwhere(self.inputs['Condition'])}
+
+
+class TestZeroSizeOp(TestArgwhereOp):
+
+    def init_shape(self):
+        self.shape = [0, 10]
+
+    def init_dtype(self):
+        self.dtype = np.float64
+
+
+class TestZeroSizeOpCase2(TestArgwhereOp):
+
+    def init_shape(self):
+        self.shape = [0, 10]
+
+    def init_dtype(self):
+        self.dtype = np.float64
+
+    def test_check_output(self):
+        self.check_output(check_pir=True, check_symbol_infer=True)
+
+
+if __name__ == "__main__":
+    unittest.main()