diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py
index 9111fe8eda5af..8e1a3e891b44a 100644
--- a/python/paddle/__init__.py
+++ b/python/paddle/__init__.py
@@ -573,6 +573,7 @@
     argmax,
     argmin,
     argsort,
+    argwhere,
     bucketize,
     index_sample,
     index_select,
@@ -1121,6 +1122,7 @@
     'atleast_3d',
     'reverse',
     'nonzero',
+    'argwhere',
     'CUDAPinnedPlace',
     'XPUPinnedPlace',
     'logical_not',
diff --git a/python/paddle/autograd/__init__.py b/python/paddle/autograd/__init__.py
index bfc772395037c..e28e784e77506 100644
--- a/python/paddle/autograd/__init__.py
+++ b/python/paddle/autograd/__init__.py
@@ -28,11 +28,14 @@
 from .py_layer import PyLayer, PyLayerContext
 from .saved_tensors_hooks import saved_tensors_hooks
 
+Function = PyLayer
+
 __all__ = [
     'jacobian',
     'hessian',
     'backward',
     'PyLayer',
+    'Function',
     'PyLayerContext',
     'saved_tensors_hooks',
 ]
diff --git a/python/paddle/tensor/__init__.py b/python/paddle/tensor/__init__.py
index 75d2882a04006..016cb20b50533 100644
--- a/python/paddle/tensor/__init__.py
+++ b/python/paddle/tensor/__init__.py
@@ -453,6 +453,7 @@
     argmax,
     argmin,
     argsort,
+    argwhere,
     bucketize,
     index_sample,
     index_select,
@@ -607,6 +608,8 @@
     'floor_mod_',
     'multiply',
     'multiply_',
+    'mul',
+    'mul_',
     'add',
     'add_',
     'subtract',
@@ -877,8 +880,12 @@
     'log_normal_',
     'set_',
     'resize_',
+    'argwhere',
 ]
 
+mul = multiply
+mul_ = multiply_
+
 # this list used in math_op_patch.py for magic_method bind
 magic_method_func = [
     ('__and__', 'bitwise_and'),
diff --git a/python/paddle/tensor/search.py b/python/paddle/tensor/search.py
index 3837d7595f8cc..b12dab154560a 100755
--- a/python/paddle/tensor/search.py
+++ b/python/paddle/tensor/search.py
@@ -561,6 +561,38 @@ def nonzero(x: Tensor, as_tuple=False):
         return tuple(list_out)
 
 
+def argwhere(input: Tensor) -> Tensor:
+    """
+    Return a tensor containing the indices of all non-zero elements of the `input`
+    tensor. The returned tensor has shape [z, n], where `z` is the number of all non-zero
+    elements in the `input` tensor, and `n` is the number of dimensions in the `input`
+    tensor.
+
+    Args:
+        input (Tensor): The input tensor variable.
+
+    Returns:
+        Tensor, The data type is int64.
+
+    Examples:
+
+        .. code-block:: python
+
+            >>> import paddle
+
+            >>> x = paddle.to_tensor([[1.0, 0.0, 0.0],
+            ...                       [0.0, 2.0, 0.0],
+            ...                       [0.0, 0.0, 3.0]])
+            >>> out = paddle.tensor.search.argwhere(x)
+            >>> print(out)
+            Tensor(shape=[3, 2], dtype=int64, place=Place(cpu), stop_gradient=True,
+            [[0, 0],
+             [1, 1],
+             [2, 2]])
+    """
+    return nonzero(input, as_tuple=False)
+
+
 def _restrict_nonzero(condition: Tensor, total_true_num: int) -> Tensor:
     """
     Return a tensor containing the indices of all non-zero elements of the `input`
diff --git a/test/legacy_test/test_argwhere_api.py b/test/legacy_test/test_argwhere_api.py
new file mode 100644
index 0000000000000..e5cf18ec775ba
--- /dev/null
+++ b/test/legacy_test/test_argwhere_api.py
@@ -0,0 +1,187 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import numpy as np
+from op_test import OpTest, convert_float_to_uint16
+
+import paddle
+from paddle import base
+from paddle.base import Program, program_guard
+
+
+def call_argwhere(x):
+    input = paddle.to_tensor(x)
+    return paddle.argwhere(input)
+
+
+class TestArgwhereAPI(unittest.TestCase):
+    def test_argwhere_api(self):
+        paddle.enable_static()
+        data = np.array([[1, 0], [0, 1]], dtype="float32")
+        with program_guard(Program(), Program()):
+            x = paddle.static.data(name='x', shape=[-1, 2], dtype='float32')
+            if not paddle.framework.use_pir_api():
+                x.desc.set_need_check_feed(False)
+            y = paddle.argwhere(x)
+            exe = base.Executor(base.CPUPlace())
+            (res,) = exe.run(
+                feed={'x': data}, fetch_list=[y], return_numpy=False
+            )
+        expect_out = np.array([[0, 0], [1, 1]])
+        np.testing.assert_allclose(expect_out, np.array(res), rtol=1e-05)
+
+        data = np.array([1, 1, 0], dtype="float32")
+        with program_guard(Program(), Program()):
+            x = paddle.static.data(name='x', shape=[-1], dtype='float32')
+            if not paddle.framework.use_pir_api():
+                x.desc.set_need_check_feed(False)
+            y = paddle.argwhere(x)
+            exe = base.Executor(base.CPUPlace())
+            (res,) = exe.run(
+                feed={'x': data}, fetch_list=[y], return_numpy=False
+            )
+        expect_out = np.array([[0], [1]])
+        np.testing.assert_allclose(expect_out, np.array(res), rtol=1e-05)
+
+    def test_dygraph_api(self):
+        data_x = np.array([[True, False], [False, True]])
+        with base.dygraph.guard():
+            x = paddle.to_tensor(data_x)
+            z = paddle.argwhere(x)
+            np_z = z.numpy()
+        expect_out = np.array([[0, 0], [1, 1]])
+
+
+# Base case
+class TestArgwhereOp(OpTest):
+    def setUp(self):
+        '''Test where_index op with random value'''
+        np.random.seed(2023)
+        self.op_type = "where_index"
+        self.python_api = call_argwhere
+        self.init_shape()
+        self.init_dtype()
+
+        self.inputs = self.create_inputs()
+        self.outputs = self.return_outputs()
+
+    def test_check_output(self):
+        self.check_output(check_pir=True, check_symbol_infer=False)
+
+    def init_shape(self):
+        self.shape = [8, 8]
+
+    def init_dtype(self):
+        self.dtype = np.float64
+
+    def create_inputs(self):
+        return {
+            'Condition': np.random.randint(5, size=self.shape).astype(
+                self.dtype
+            )
+        }
+
+    def return_outputs(self):
+        return {'Out': np.argwhere(self.inputs['Condition'])}
+
+
+class TestArgwhereComplex64Op(TestArgwhereOp):
+    def init_shape(self):
+        self.shape = [1, 2, 3]
+
+    def init_dtype(self):
+        self.dtype = np.complex64
+
+
+class TestArgwhereComplex128Op(TestArgwhereOp):
+    def init_shape(self):
+        self.shape = [1, 2, 3]
+
+    def init_dtype(self):
+        self.dtype = np.complex128
+
+
+class TestArgwhereFP32Op(TestArgwhereOp):
+    def init_shape(self):
+        self.shape = [2, 10, 2]
+
+    def init_dtype(self):
+        self.dtype = np.float32
+
+
+class TestArgwhereFP16Op(TestArgwhereOp):
+    def init_shape(self):
+        self.shape = [3, 4, 7]
+
+    def init_dtype(self):
+        self.dtype = np.float16
+
+
+class TestArgwhereBF16(OpTest):
+    def setUp(self):
+        '''Test where_index op with bfloat16 dtype'''
+        np.random.seed(2023)
+        self.op_type = "where_index"
+        self.python_api = call_argwhere
+        self.init_shape()
+        self.init_dtype()
+
+        self.inputs = self.create_inputs()
+        self.outputs = self.return_outputs()
+
+    def test_check_output(self):
+        self.check_output(check_pir=True, check_symbol_infer=False)
+
+    def init_shape(self):
+        self.shape = [12, 9]
+
+    def init_dtype(self):
+        self.dtype = np.uint16
+
+    def create_inputs(self):
+        return {
+            'Condition': convert_float_to_uint16(
+                np.random.randint(5, size=self.shape).astype(np.float32)
+            )
+        }
+
+    def return_outputs(self):
+        return {'Out': np.argwhere(self.inputs['Condition'])}
+
+
+class TestZeroSizeOp(TestArgwhereOp):
+
+    def init_shape(self):
+        self.shape = [0, 10]
+
+    def init_dtype(self):
+        self.dtype = np.float64
+
+
+class TestZeroSizeOpCase2(TestArgwhereOp):
+
+    def init_shape(self):
+        self.shape = [0, 10]
+
+    def init_dtype(self):
+        self.dtype = np.float64
+
+    def test_check_output(self):
+        self.check_output(check_pir=True, check_symbol_infer=True)
+
+
+if __name__ == "__main__":
+    unittest.main()
diff --git a/test/legacy_test/test_autograd_function.py b/test/legacy_test/test_autograd_function.py
new file mode 100644
index 0000000000000..4b1312ff9d61e
--- /dev/null
+++ b/test/legacy_test/test_autograd_function.py
@@ -0,0 +1,679 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import numpy as np
+
+import paddle
+from paddle.autograd import Function
+
+
+class TestFunction(unittest.TestCase):
+    def test_simple_function_multiple_output(self):
+        class tanh(Function):
+            @staticmethod
+            def forward(ctx, x1, x2, func1, func2=paddle.square):
+                ctx.func = func2
+                y1 = func1(x1)
+                y2 = func1(x2)
+                ctx.save_for_backward(y1, y2)
+                return y1, 1, y2, None
+
+            @staticmethod
+            def backward(ctx, dy1, dy2):
+                y1, y2 = ctx.saved_tensor()
+                re1 = dy1 * (1 - ctx.func(y1))
+                re2 = dy2 * (1 - paddle.square(y2))
+                return re1, re2
+
+        input1 = paddle.randn([2, 3]).astype("float64")
+        input2 = input1.detach().clone()
+        input1.stop_gradient = False
+        input2.stop_gradient = False
+        z = tanh.apply(input1, input1, paddle.tanh, paddle.square)
+        z = z[0] + z[2]
+        z.mean().backward()
+
+        z2 = paddle.tanh(input2) + paddle.tanh(input2)
+        z2.mean().backward()
+
+        self.assertTrue(
+            np.max(np.abs(input1.grad.numpy() - input2.grad.numpy())) < 1e-10
+        )
+
+    def test_simple_function_return_none_with_no_grad(self):
+        class tanh(Function):
+            @staticmethod
+            def forward(ctx, x1, x2, func1, func2=paddle.square):
+                ctx.func = func2
+                y1 = func1(x1)
+                y2 = func1(x2)
+                ctx.save_for_backward(y1, y2)
+                return 1, None, y1, y2, ''
+
+            @staticmethod
+            def backward(ctx, dy1, dy2):
+                y1, y2 = ctx.saved_tensor()
+                re1 = dy1 * (1 - ctx.func(y1))
+                re2 = dy2 * (1 - paddle.square(y2))
+                return re1, None
+
+        input1 = paddle.randn([2, 3]).astype("float64")
+        input2 = input1.detach().clone()
+        input3 = input1.detach().clone()
+        input4 = input1.detach().clone()
+        input1.stop_gradient = False
+        input2.stop_gradient = False
+        input3.stop_gradient = True
+        input4.stop_gradient = True
+        z = tanh.apply(input1, input3, paddle.tanh, paddle.square)
+        z = z[2] + z[3]
+        z.mean().backward()
+
+        z2 = paddle.tanh(input2) + paddle.tanh(input4)
+        z2.mean().backward()
+
+        self.assertTrue(
+            np.max(np.abs(input1.grad.numpy() - input2.grad.numpy())) < 1e-10
+        )
+
+    def test_simple_function_single_output(self):
+        class tanh(Function):
+            @staticmethod
+            def forward(ctx, x1, func1, func2=paddle.square):
+                ctx.func = func2
+                y1 = func1(x1)
+                ctx.save_for_backward(y1)
+                return y1
+
+            @staticmethod
+            def backward(ctx, dy1):
+                (y1,) = ctx.saved_tensor()
+                re1 = dy1 * (1 - ctx.func(y1))
+                return re1
+
+        input1 = paddle.randn([2, 3]).astype("float64")
+        input2 = input1.detach().clone()
+        input1.stop_gradient = False
+        input2.stop_gradient = False
+        z = tanh.apply(x1=input1, func1=paddle.tanh)
+        z.mean().backward()
+        z2 = paddle.tanh(input2)
+        z2.mean().backward()
+
+        self.assertTrue(
+            np.max(np.abs(input1.grad.numpy() - input2.grad.numpy())) < 1e-10
+        )
+
+    def test_simple_function_multi_output(self):
+        class tanh(Function):
+            @staticmethod
+            def forward(ctx, x1, func1, func2=paddle.split):
+                ctx.func = func2
+                y1 = func1(x1)
+                ctx.save_for_backward(y1)
+                return y1
+
+            @staticmethod
+            def backward(ctx, dy1):
+                (y1,) = ctx.saved_tensor()
+                re1 = ctx.func(dy1, 3)
+                return re1
+
+        input1 = paddle.randn([2, 3]).astype("float64")
+        input2 = paddle.randn([2, 3]).astype("float64")
+        input3 = paddle.randn([2, 3]).astype("float64")
+        input1.stop_gradient = False
+        input2.stop_gradient = False
+        input3.stop_gradient = False
+        z = tanh.apply(x1=[input1, input2, input3], func1=paddle.concat)
+        z.mean().backward()
+        z2 = paddle.concat([input1, input2, input3])
+        z2.mean().backward()
+
+        self.assertTrue(
+            np.max(np.abs(input1.grad.numpy() - input2.grad.numpy())) < 1e-10
+        )
+
+    def test_function_num_output_match(self):
+        class tanh(Function):
+            @staticmethod
+            def forward(
+                ctx,
+                x1,
+                x2,
+            ):
+                return x1 + x2
+
+            @staticmethod
+            def backward(ctx, dy1):
+                return dy1 + 1
+
+        input1 = paddle.randn([2, 3]).astype("float64")
+        input2 = input1.detach().clone()
+        input1.stop_gradient = False
+        input2.stop_gradient = False
+        z = tanh.apply(input1, input2)
+        with self.assertRaises(ValueError):
+            z.mean().backward()
+
+    def test_function_dtype(self):
+        class tanh(Function):
+            @staticmethod
+            def forward(ctx, x, dtype):
+                y = paddle.cast(x, dtype)
+                return y
+
+            @staticmethod
+            def backward(ctx, dy1):
+                return dy1
+
+        dtypes = [
+            'bool',
+            'float16',
+            'float32',
+            'float64',
+            'uint8',
+            'int32',
+            'int64',
+        ]
+        for dtype in dtypes:
+            input1 = paddle.randn([2, 3])
+            input1.stop_gradient = False
+            self.assertIsNone(input1.grad)
+
+            z = tanh.apply(input1, dtype)
+            z = paddle.cast(z, "float32")
+            z.sum().backward()
+            self.assertIsNotNone(input1.grad)
+
+    def test_function_Exception_forward(self):
+        class Layer_None1(Function):
+            @staticmethod
+            def forward(ctx, *args):
+                return None
+
+            @staticmethod
+            def backward(ctx, *args):
+                return args
+
+        input1 = paddle.randn([2, 3]).astype("float64")
+        with self.assertRaises(ValueError):
+            z = Layer_None1.apply(input1)
+
+        class Layer_None2(Function):
+            @staticmethod
+            def forward(ctx, *args):
+                return [None, args[0]]
+
+            @staticmethod
+            def backward(ctx, *args):
+                return args
+
+        input1 = paddle.randn([2, 3]).astype("float64")
+        # return None
+        z = Layer_None2.apply(input1)
+
+        class Layer_one1(Function):
+            @staticmethod
+            def forward(ctx, *args):
+                return 1
+
+            @staticmethod
+            def backward(ctx, *args):
+                return args
+
+        input1 = paddle.randn([2, 3]).astype("float64")
+        # At least one output of `Function.backward` is a `Tensor`
+        with self.assertRaises(ValueError):
+            z = Layer_one1.apply(input1)
+
+        class Layer_one2(Function):
+            @staticmethod
+            def forward(ctx, *args):
+                return [1, 2, args[0]]
+
+            @staticmethod
+            def backward(ctx, *args):
+                return args
+
+        input1 = paddle.randn([2, 3]).astype("float64")
+        # return int
+        z = Layer_one2.apply(input1)
+
+        class Layer_no_fw(Function):
+            @staticmethod
+            def backward(ctx, *args):
+                return args
+
+        input1 = paddle.randn([2, 3]).astype("float64")
+        with self.assertRaises(NotImplementedError):
+            z = Layer_no_fw.apply(input1)
+
+    def test_function_nograd(self):
+        class tanh(Function):
+            @staticmethod
+            def forward(ctx, x1, func1, func2=paddle.square, xx=None):
+                ctx.func = func2
+                y1 = func1(x1)
+                return y1
+
+            @staticmethod
+            def backward(ctx, x1, y1, dy1):
+                re1 = dy1 * (1 - ctx.func(y1))
+                return re1
+
+        input1 = paddle.randn([2, 3]).astype("float64")
+        z = tanh.apply(input1, paddle.tanh, paddle.square)
+        z.mean().backward()
+        self.assertIsNone(z.grad)
+
+    def test_function_Exception_bk(self):
+        class Layer_bk_none1(Function):
+            @staticmethod
+            def forward(ctx, x):
+                return x * 2
+
+            @staticmethod
+            def backward(ctx, dy1):
+                return None
+
+        input2 = paddle.randn([2, 3]).astype("float64")
+        input2.stop_gradient = False
+        z = Layer_bk_none1.apply(input2)
+
+        z.sum().backward()
+        self.assertEqual(input2.grad, None)
+
+        class Layer_bk_none2(Function):
+            @staticmethod
+            def forward(ctx, x1, x2):
+                return x1 + x2
+
+            @staticmethod
+            def backward(ctx, dy1):
+                return None, dy1
+
+        input1 = paddle.randn([2, 3]).astype("float64")
+        input1.stop_gradient = False
+        z = Layer_bk_none2.apply(input1, input1)
+
+        z.mean().backward()
+        self.assertIsNone(z.grad)
+
+        class Layer_bk_one1(Function):
+            @staticmethod
+            def forward(ctx, x):
+                return x + x
+
+            @staticmethod
+            def backward(ctx, dy):
+                return 1
+
+        input1 = paddle.randn([2, 3]).astype("float64")
+        input1.stop_gradient = False
+        z = Layer_bk_one1.apply(input1)
+
+        with self.assertRaises(ValueError):
+            z.mean().backward()
+
+        class Layer_bk_one2(Function):
+            @staticmethod
+            def forward(ctx, x1, x2):
+                return x1 * 2, x2 * 5
+
+            @staticmethod
+            def backward(ctx, *args):
+                return 1, 1
+
+        input1 = paddle.randn([2, 3]).astype("float64")
+        input1.stop_gradient = False
+
+        y = Layer_bk_one2.apply(input1, input1)
+        z = y[0] + y[1]
+        with self.assertRaises(ValueError):
+            z.mean().backward()
+
+        class Layer_no_bk(Function):
+            @staticmethod
+            def forward(ctx, x):
+                return x * 2, x * 5
+
+        input1 = paddle.randn([2, 3]).astype("float64")
+        input1.stop_gradient = False
+        z = Layer_no_bk.apply(input1)
+
+        with self.assertRaises(OSError):
+            z = z[0] + z[1]
+            z.mean().backward()
+
+        class Layer_bk_match(Function):
+            @staticmethod
+            def forward(ctx, x):
+                return x * 2, x * 5
+
+            @staticmethod
+            def backward(ctx, dy1, dy2):
+                return dy2 * 2, dy1 * 2
+
+        input1 = paddle.randn([2, 3]).astype("float64")
+        input1.stop_gradient = False
+        z = Layer_bk_match.apply(input1)
+        with self.assertRaises(ValueError):
+            z = z[0] + z[1]
+            z.mean().backward()
+
+    def test_function_bk_return_none(self):
+        class Layer_bk_none1(Function):
+            @staticmethod
+            def forward(ctx, x1, x2):
+                return x1 + x2
+
+            @staticmethod
+            def backward(ctx, dy):
+                return 1
+
+        input1 = paddle.randn([2, 3]).astype("float64")
+        input2 = paddle.randn([2, 3]).astype("float64")
+        input1.stop_gradient = True
+        input2.stop_gradient = False
+        z = Layer_bk_none1.apply(input1, input2)
+
+        with self.assertRaises(ValueError):
+            z.mean().backward()
+
+        class Layer_bk_none2(Function):
+            @staticmethod
+            def forward(ctx, x1, x2):
+                return x1 * 2, x2 * 5
+
+            @staticmethod
+            def backward(ctx, *args):
+                return 1, 1
+
+        input1 = paddle.randn([2, 3]).astype("float64")
+        input2 = paddle.randn([2, 3]).astype("float64")
+        input1.stop_gradient = True
+        input2.stop_gradient = False
+        z = Layer_bk_none2.apply(input1, input2)
+        z = z[0] + z[1]
+        with self.assertRaises(ValueError):
+            z.mean().backward()
+
+    def test_function_inplace(self):
+        class cus_tanh(Function):
+            @staticmethod
+            def forward(ctx, x):
+                return x
+
+            @staticmethod
+            def backward(ctx, dy):
+                return dy
+
+        class Layer(paddle.nn.Layer):
+            def __init__(self):
+                super().__init__()
+
+            def forward(self, data):
+                data = data**2
+                z = paddle.tanh(data)
+                z = cus_tanh.apply(data)
+                return z.mean()
+
+        for i in range(2):
+            data = paddle.ones([2, 3], dtype="float64") / (i + 1)
+            data.stop_gradient = False
+            layer = Layer()
+            z = layer(data)
+            z.backward()
+            self.assertIsNotNone(data.grad)
+
+    def test_function_inplace_backward_error(self):
+        class cus_tanh(Function):
+            @staticmethod
+            def forward(ctx, x):
+                return x
+
+            @staticmethod
+            def backward(ctx, dy):
+                return dy
+
+        class Layer(paddle.nn.Layer):
+            def __init__(self):
+                super().__init__()
+
+            def forward(self, data):
+                var_b = data**2
+                var_c = var_b**2
+                z = cus_tanh.apply(var_b)
+                loss = paddle.nn.functional.relu(var_c)
+                return loss
+
+        data = paddle.ones([2, 3], dtype="float64")
+        data.stop_gradient = False
+        layer = Layer()
+        z = layer(data)
+        with self.assertRaisesRegex(
+            RuntimeError,
+            f"received tensor_version:{1} != wrapper_version_snapshot:{0}",
+        ):
+            z.backward()
+
+    def test_function_inplace_backward_success_1(self):
+        class cus_tanh(Function):
+            @staticmethod
+            def forward(ctx, x):
+                return x
+
+            @staticmethod
+            def backward(ctx, dy):
+                return dy
+
+        class Layer(paddle.nn.Layer):
+            def __init__(self):
+                super().__init__()
+
+            def forward(self, data):
+                var_b = data**2
+                var_c = cus_tanh.apply(var_b)
+                var_d = var_c**2
+                loss = var_d.sum()
+                return loss
+
+        for i in range(2):
+            data = paddle.ones([2, 3], dtype="float64") / (i + 1)
+            data.stop_gradient = False
+            layer = Layer()
+            z = layer(data)
+            z.backward()
+            self.assertIsNotNone(data.grad)
+
+    def test_function_inplace_backward_success_2(self):
+        class cus_tanh(Function):
+            @staticmethod
+            def forward(ctx, x):
+                return x
+
+            @staticmethod
+            def backward(ctx, dy):
+                return dy
+
+        class Layer(paddle.nn.Layer):
+            def __init__(self):
+                super().__init__()
+
+            def forward(self, data):
+                var_b = data**2
+                var_c = cus_tanh.apply(var_b)
+                var_d = var_c + var_c
+                loss = var_d.sum()
+                return loss
+
+        for i in range(2):
+            data = paddle.ones([2, 3], dtype="float64") / (i + 1)
+            data.stop_gradient = False
+            layer = Layer()
+            z = layer(data)
+            z.backward()
+            self.assertIsNotNone(data.grad)
+
+    def test_function_inplace_and_leaf_exception(self):
+        class cus_function_op(Function):
+            @staticmethod
+            def forward(ctx, x):
+                return x
+
+            @staticmethod
+            def backward(ctx, dy):
+                return dy
+
+        class Layer(paddle.nn.Layer):
+            def __init__(self):
+                super().__init__()
+
+            def forward(self, data):
+                z = cus_function_op.apply(data)
+                return z.mean()
+
+        for i in range(2):
+            data = paddle.ones([2, 3], dtype="float64") / (i + 1)
+            data.stop_gradient = False
+            layer = Layer()
+
+            with self.assertRaises(ValueError):
+                z = layer(data)
+
+    def test_backward_in_backward(self):
+        class cus_tanh(Function):
+            @staticmethod
+            def forward(ctx, x):
+                temp = x.detach()
+                ctx.inputs = temp
+                return x.mean()
+
+            @staticmethod
+            def backward(ctx, dy):
+                with paddle.set_grad_enabled(True):
+                    temp = ctx.inputs
+                    temp.stop_gradient = False
+                    z = paddle.tanh(temp)
+                    z.backward()
+                    self.assertIsNotNone(temp.grad)
+                    return paddle.to_tensor(temp.grad)
+
+        for i in range(2):
+            data = paddle.ones([2, 3], dtype="float32") / (i + 1)
+            data.stop_gradient = False
+            data = paddle.nn.functional.relu(data)
+            z = paddle.tanh(data)
+            z = cus_tanh.apply(data)
+
+    def test_return_to_tensor(self):
+        class Tanh(Function):
+            @staticmethod
+            def forward(ctx, x1):
+                y1 = paddle.tanh(x1)
+                ctx.save_for_backward(y1)
+                tensor_1 = paddle.to_tensor([1, 2], dtype='float32')
+                return y1, 5, None, "helloworld", tensor_1
+
+            @staticmethod
+            def backward(ctx, dy1, dy2):
+                (y1,) = ctx.saved_tensor()
+                re1 = dy1 * (1 - paddle.square(y1))
+                return dy1
+
+        input1 = paddle.randn([2, 3]).astype("float32")
+        input2 = input1.detach().clone()
+        input1.stop_gradient = False
+        input2.stop_gradient = False
+        z, number, none_item, string_item, tensor1 = Tanh.apply(x1=input1)
+        z.mean().backward()
+
+    def test_materialize_grads(self):
+        class Tanh(Function):
+            @staticmethod
+            def forward(ctx, x):
+                ctx.mark_not_inplace(x)
+                return x, x + x
+
+            @staticmethod
+            def backward(ctx, grad, grad2):
+                self.assertEqual(grad2, paddle.zeros([1]))
+                return grad
+
+        x = paddle.ones([1], dtype="float64")
+        x.stop_gradient = False
+        Tanh.apply(x)[0].backward()
+
+    def test_dont_materialize_grads(self):
+        class Tanh(Function):
+            @staticmethod
+            def forward(ctx, x):
+                ctx.mark_not_inplace(x)
+                ctx.set_materialize_grads(False)
+                return x, x + x
+
+            @staticmethod
+            def backward(ctx, grad, grad2):
+                self.assertIsNone(grad2)
+                return grad
+
+        x = paddle.ones([1], dtype="float64")
+        x.stop_gradient = False
+        Tanh.apply(x)[0].backward()
+
+    def test_mark_non_differentiable(self):
+        class Tanh(Function):
+            @staticmethod
+            def forward(ctx, x):
+                a = x + x
+                ctx.mark_non_differentiable(a)
+                return a
+
+            @staticmethod
+            def backward(ctx, grad):
+                self.assertTrue(False)  # should not be call
+                return paddle.ones([1], dtype="float64")
+
+        x = paddle.ones([1], dtype="float64")
+        x.stop_gradient = False
+        y = Tanh.apply(x)
+        y.sum().backward()
+
+    def test_mark_non_differentiable2(self):
+        class Tanh(Function):
+            @staticmethod
+            def forward(ctx, x):
+                a = x + x
+                b = x + x + x
+                ctx.mark_non_differentiable(a)
+                return a, b
+
+            @staticmethod
+            def backward(ctx, grad_a, grad_b):
+                self.assertEqual(grad_a, paddle.zeros([1]))
+                self.assertEqual(grad_b, paddle.ones([1], dtype="float64"))
+                return grad_b
+
+        x = paddle.ones([1], dtype="float64")
+        x.stop_gradient = False
+        a, b = Tanh.apply(x)
+        b.sum().backward()
+        self.assertEqual(x.grad, paddle.ones([1], dtype="float64"))
+
+
+if __name__ == '__main__':
+    unittest.main()
diff --git a/test/legacy_test/test_mul.py b/test/legacy_test/test_mul.py
new file mode 100644
index 0000000000000..112d20c7ffd31
--- /dev/null
+++ b/test/legacy_test/test_mul.py
@@ -0,0 +1,138 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import numpy as np
+from op_test import get_device_place
+
+import paddle
+from paddle import static
+
+
+class TestMulApi(unittest.TestCase):
+    def setUp(self) -> None:
+        self.shape = [2, 3]
+        self.dtype = 'float32'
+        self.place = get_device_place()
+
+    def test_static_api(self):
+        paddle.enable_static()
+        x_np = np.random.rand(*self.shape).astype(self.dtype)
+        other2_np = np.random.rand(*self.shape).astype(self.dtype)
+        other3_np = np.random.rand(self.shape[0], 1).astype(self.dtype)
+        with static.program_guard(static.Program()):
+            x = paddle.static.data(name='x', shape=self.shape, dtype=self.dtype)
+            # other1 = 3.0
+            other2 = paddle.static.data(
+                name='other', shape=self.shape, dtype=self.dtype
+            )
+            other3 = paddle.static.data(
+                name='other3', shape=[self.shape[0], 1], dtype=self.dtype
+            )
+            # out1 = x.mul(other1)
+            out2 = x.mul(other2)
+            out3 = x.mul(other3)
+            exe = static.Executor(self.place)
+            outs = exe.run(
+                feed={'x': x_np, 'other': other2_np, 'other3': other3_np},
+                # fetch_list=[out1, out2, out3],
+                fetch_list=[out2, out3],
+            )
+            # np.testing.assert_allclose(
+            #     outs[0], np.multiply(x_np, other1), rtol=1e-05
+            # )
+            np.testing.assert_allclose(
+                outs[0], np.multiply(x_np, other2_np), rtol=1e-05
+            )
+            np.testing.assert_allclose(
+                outs[1], np.multiply(x_np, other3_np), rtol=1e-05
+            )
+
+    def test_dyn_api(self):
+        paddle.disable_static()
+        x_np = np.random.rand(*self.shape).astype(self.dtype)
+        other2_np = np.random.rand(*self.shape).astype(self.dtype)
+        other3_np = np.random.rand(self.shape[0], 1).astype(self.dtype)
+        x = paddle.to_tensor(x_np, place=self.place)
+        # other1 = 3.0
+        other2 = paddle.to_tensor(other2_np, place=self.place)
+        other3 = paddle.to_tensor(other3_np, place=self.place)
+
+        # out1 = x.mul(other1)
+        out2 = x.mul(other2)
+        out3 = x.mul(other3)
+
+        # np.testing.assert_allclose(
+        #     out1.numpy(), np.multiply(x_np, other1), rtol=1e-05
+        # )
+        np.testing.assert_allclose(
+            out2.numpy(), np.multiply(x_np, other2_np), rtol=1e-05
+        )
+        np.testing.assert_allclose(
+            out3.numpy(), np.multiply(x_np, other3_np), rtol=1e-05
+        )
+
+
+class TestMulInplaceApi(unittest.TestCase):
+    def setUp(self) -> None:
+        self.shape = [2, 3]
+        self.dtype = 'float32'
+
+    def test_dyn_api(self):
+        paddle.disable_static()
+        others = [
+            # 3.0,
+            paddle.to_tensor(np.random.rand(*self.shape).astype('float32')),
+            paddle.to_tensor(np.random.rand(*self.shape).astype('float32'))[
+                :, -1
+            ].unsqueeze(-1),
+        ]
+        for other in others:
+            x_np = np.random.rand(*self.shape).astype('float32')
+            x = paddle.to_tensor(x_np)
+            x.mul_(other)
+            np.testing.assert_allclose(
+                x.numpy(),
+                np.multiply(
+                    x_np,
+                    (
+                        other.numpy()
+                        if isinstance(other, paddle.Tensor)
+                        else other
+                    ),
+                ),
+                rtol=1e-05,
+            )
+
+
+class TestMulInplaceError(unittest.TestCase):
+    def test_errors(self):
+        paddle.disable_static()
+        # test dynamic computation graph: inputs must be broadcastable
+        x_data = np.random.rand(3, 4)
+        y_data = np.random.rand(2, 3, 4)
+        x = paddle.to_tensor(x_data)
+        y = paddle.to_tensor(y_data)
+
+        def multiply_shape_error():
+            with paddle.no_grad():
+                x.mul_(y)
+
+        self.assertRaises(ValueError, multiply_shape_error)
+        paddle.enable_static()
+
+
+if __name__ == '__main__':
+    unittest.main()