diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py index 9111fe8eda5af..8e1a3e891b44a 100644 --- a/python/paddle/__init__.py +++ b/python/paddle/__init__.py @@ -573,6 +573,7 @@ argmax, argmin, argsort, + argwhere, bucketize, index_sample, index_select, @@ -1121,6 +1122,7 @@ 'atleast_3d', 'reverse', 'nonzero', + 'argwhere', 'CUDAPinnedPlace', 'XPUPinnedPlace', 'logical_not', diff --git a/python/paddle/autograd/__init__.py b/python/paddle/autograd/__init__.py index bfc772395037c..e28e784e77506 100644 --- a/python/paddle/autograd/__init__.py +++ b/python/paddle/autograd/__init__.py @@ -28,11 +28,14 @@ from .py_layer import PyLayer, PyLayerContext from .saved_tensors_hooks import saved_tensors_hooks +Function = PyLayer + __all__ = [ 'jacobian', 'hessian', 'backward', 'PyLayer', + 'Function', 'PyLayerContext', 'saved_tensors_hooks', ] diff --git a/python/paddle/tensor/__init__.py b/python/paddle/tensor/__init__.py index 75d2882a04006..016cb20b50533 100644 --- a/python/paddle/tensor/__init__.py +++ b/python/paddle/tensor/__init__.py @@ -453,6 +453,7 @@ argmax, argmin, argsort, + argwhere, bucketize, index_sample, index_select, @@ -607,6 +608,8 @@ 'floor_mod_', 'multiply', 'multiply_', + 'mul', + 'mul_', 'add', 'add_', 'subtract', @@ -877,8 +880,12 @@ 'log_normal_', 'set_', 'resize_', + 'argwhere', ] +mul = multiply +mul_ = multiply_ + # this list used in math_op_patch.py for magic_method bind magic_method_func = [ ('__and__', 'bitwise_and'), diff --git a/python/paddle/tensor/search.py b/python/paddle/tensor/search.py index 3837d7595f8cc..b12dab154560a 100755 --- a/python/paddle/tensor/search.py +++ b/python/paddle/tensor/search.py @@ -561,6 +561,38 @@ def nonzero(x: Tensor, as_tuple=False): return tuple(list_out) +def argwhere(input: Tensor) -> Tensor: + """ + Return a tensor containing the indices of all non-zero elements of the `input` + tensor. The returned tensor has shape [z, n], where `z` is the number of all non-zero + elements in the `input` tensor, and `n` is the number of dimensions in the `input` + tensor. + + Args: + input (Tensor): The input tensor variable. + + Returns: + Tensor, The data type is int64. + + Examples: + + .. code-block:: python + + >>> import paddle + + >>> x = paddle.to_tensor([[1.0, 0.0, 0.0], + ... [0.0, 2.0, 0.0], + ... [0.0, 0.0, 3.0]]) + >>> out = paddle.tensor.search.argwhere(x) + >>> print(out) + Tensor(shape=[3, 2], dtype=int64, place=Place(cpu), stop_gradient=True, + [[0, 0], + [1, 1], + [2, 2]]) + """ + return nonzero(input, as_tuple=False) + + def _restrict_nonzero(condition: Tensor, total_true_num: int) -> Tensor: """ Return a tensor containing the indices of all non-zero elements of the `input` diff --git a/test/legacy_test/test_argwhere_api.py b/test/legacy_test/test_argwhere_api.py new file mode 100644 index 0000000000000..e5cf18ec775ba --- /dev/null +++ b/test/legacy_test/test_argwhere_api.py @@ -0,0 +1,187 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np +from op_test import OpTest, convert_float_to_uint16 + +import paddle +from paddle import base +from paddle.base import Program, program_guard + + +def call_argwhere(x): + input = paddle.to_tensor(x) + return paddle.argwhere(input) + + +class TestArgwhereAPI(unittest.TestCase): + def test_argwhere_api(self): + paddle.enable_static() + data = np.array([[1, 0], [0, 1]], dtype="float32") + with program_guard(Program(), Program()): + x = paddle.static.data(name='x', shape=[-1, 2], dtype='float32') + if not paddle.framework.use_pir_api(): + x.desc.set_need_check_feed(False) + y = paddle.argwhere(x) + exe = base.Executor(base.CPUPlace()) + (res,) = exe.run( + feed={'x': data}, fetch_list=[y], return_numpy=False + ) + expect_out = np.array([[0, 0], [1, 1]]) + np.testing.assert_allclose(expect_out, np.array(res), rtol=1e-05) + + data = np.array([1, 1, 0], dtype="float32") + with program_guard(Program(), Program()): + x = paddle.static.data(name='x', shape=[-1], dtype='float32') + if not paddle.framework.use_pir_api(): + x.desc.set_need_check_feed(False) + y = paddle.argwhere(x) + exe = base.Executor(base.CPUPlace()) + (res,) = exe.run( + feed={'x': data}, fetch_list=[y], return_numpy=False + ) + expect_out = np.array([[0], [1]]) + np.testing.assert_allclose(expect_out, np.array(res), rtol=1e-05) + + def test_dygraph_api(self): + data_x = np.array([[True, False], [False, True]]) + with base.dygraph.guard(): + x = paddle.to_tensor(data_x) + z = paddle.argwhere(x) + np_z = z.numpy() + expect_out = np.array([[0, 0], [1, 1]]) + + +# Base case +class TestArgwhereOp(OpTest): + def setUp(self): + '''Test where_index op with random value''' + np.random.seed(2023) + self.op_type = "where_index" + self.python_api = call_argwhere + self.init_shape() + self.init_dtype() + + self.inputs = self.create_inputs() + self.outputs = self.return_outputs() + + def test_check_output(self): + self.check_output(check_pir=True, check_symbol_infer=False) + + def init_shape(self): + self.shape = [8, 8] + + def init_dtype(self): + self.dtype = np.float64 + + def create_inputs(self): + return { + 'Condition': np.random.randint(5, size=self.shape).astype( + self.dtype + ) + } + + def return_outputs(self): + return {'Out': np.argwhere(self.inputs['Condition'])} + + +class TestArgwhereComplex64Op(TestArgwhereOp): + def init_shape(self): + self.shape = [1, 2, 3] + + def init_dtype(self): + self.dtype = np.complex64 + + +class TestArgwhereComplex128Op(TestArgwhereOp): + def init_shape(self): + self.shape = [1, 2, 3] + + def init_dtype(self): + self.dtype = np.complex128 + + +class TestArgwhereFP32Op(TestArgwhereOp): + def init_shape(self): + self.shape = [2, 10, 2] + + def init_dtype(self): + self.dtype = np.float32 + + +class TestArgwhereFP16Op(TestArgwhereOp): + def init_shape(self): + self.shape = [3, 4, 7] + + def init_dtype(self): + self.dtype = np.float16 + + +class TestArgwhereBF16(OpTest): + def setUp(self): + '''Test where_index op with bfloat16 dtype''' + np.random.seed(2023) + self.op_type = "where_index" + self.python_api = call_argwhere + self.init_shape() + self.init_dtype() + + self.inputs = self.create_inputs() + self.outputs = self.return_outputs() + + def test_check_output(self): + self.check_output(check_pir=True, check_symbol_infer=False) + + def init_shape(self): + self.shape = [12, 9] + + def init_dtype(self): + self.dtype = np.uint16 + + def create_inputs(self): + return { + 'Condition': convert_float_to_uint16( + np.random.randint(5, size=self.shape).astype(np.float32) + ) + } + + def return_outputs(self): + return {'Out': np.argwhere(self.inputs['Condition'])} + + +class TestZeroSizeOp(TestArgwhereOp): + + def init_shape(self): + self.shape = [0, 10] + + def init_dtype(self): + self.dtype = np.float64 + + +class TestZeroSizeOpCase2(TestArgwhereOp): + + def init_shape(self): + self.shape = [0, 10] + + def init_dtype(self): + self.dtype = np.float64 + + def test_check_output(self): + self.check_output(check_pir=True, check_symbol_infer=True) + + +if __name__ == "__main__": + unittest.main() diff --git a/test/legacy_test/test_autograd_function.py b/test/legacy_test/test_autograd_function.py new file mode 100644 index 0000000000000..4b1312ff9d61e --- /dev/null +++ b/test/legacy_test/test_autograd_function.py @@ -0,0 +1,679 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np + +import paddle +from paddle.autograd import Function + + +class TestFunction(unittest.TestCase): + def test_simple_function_multiple_output(self): + class tanh(Function): + @staticmethod + def forward(ctx, x1, x2, func1, func2=paddle.square): + ctx.func = func2 + y1 = func1(x1) + y2 = func1(x2) + ctx.save_for_backward(y1, y2) + return y1, 1, y2, None + + @staticmethod + def backward(ctx, dy1, dy2): + y1, y2 = ctx.saved_tensor() + re1 = dy1 * (1 - ctx.func(y1)) + re2 = dy2 * (1 - paddle.square(y2)) + return re1, re2 + + input1 = paddle.randn([2, 3]).astype("float64") + input2 = input1.detach().clone() + input1.stop_gradient = False + input2.stop_gradient = False + z = tanh.apply(input1, input1, paddle.tanh, paddle.square) + z = z[0] + z[2] + z.mean().backward() + + z2 = paddle.tanh(input2) + paddle.tanh(input2) + z2.mean().backward() + + self.assertTrue( + np.max(np.abs(input1.grad.numpy() - input2.grad.numpy())) < 1e-10 + ) + + def test_simple_function_return_none_with_no_grad(self): + class tanh(Function): + @staticmethod + def forward(ctx, x1, x2, func1, func2=paddle.square): + ctx.func = func2 + y1 = func1(x1) + y2 = func1(x2) + ctx.save_for_backward(y1, y2) + return 1, None, y1, y2, '' + + @staticmethod + def backward(ctx, dy1, dy2): + y1, y2 = ctx.saved_tensor() + re1 = dy1 * (1 - ctx.func(y1)) + re2 = dy2 * (1 - paddle.square(y2)) + return re1, None + + input1 = paddle.randn([2, 3]).astype("float64") + input2 = input1.detach().clone() + input3 = input1.detach().clone() + input4 = input1.detach().clone() + input1.stop_gradient = False + input2.stop_gradient = False + input3.stop_gradient = True + input4.stop_gradient = True + z = tanh.apply(input1, input3, paddle.tanh, paddle.square) + z = z[2] + z[3] + z.mean().backward() + + z2 = paddle.tanh(input2) + paddle.tanh(input4) + z2.mean().backward() + + self.assertTrue( + np.max(np.abs(input1.grad.numpy() - input2.grad.numpy())) < 1e-10 + ) + + def test_simple_function_single_output(self): + class tanh(Function): + @staticmethod + def forward(ctx, x1, func1, func2=paddle.square): + ctx.func = func2 + y1 = func1(x1) + ctx.save_for_backward(y1) + return y1 + + @staticmethod + def backward(ctx, dy1): + (y1,) = ctx.saved_tensor() + re1 = dy1 * (1 - ctx.func(y1)) + return re1 + + input1 = paddle.randn([2, 3]).astype("float64") + input2 = input1.detach().clone() + input1.stop_gradient = False + input2.stop_gradient = False + z = tanh.apply(x1=input1, func1=paddle.tanh) + z.mean().backward() + z2 = paddle.tanh(input2) + z2.mean().backward() + + self.assertTrue( + np.max(np.abs(input1.grad.numpy() - input2.grad.numpy())) < 1e-10 + ) + + def test_simple_function_multi_output(self): + class tanh(Function): + @staticmethod + def forward(ctx, x1, func1, func2=paddle.split): + ctx.func = func2 + y1 = func1(x1) + ctx.save_for_backward(y1) + return y1 + + @staticmethod + def backward(ctx, dy1): + (y1,) = ctx.saved_tensor() + re1 = ctx.func(dy1, 3) + return re1 + + input1 = paddle.randn([2, 3]).astype("float64") + input2 = paddle.randn([2, 3]).astype("float64") + input3 = paddle.randn([2, 3]).astype("float64") + input1.stop_gradient = False + input2.stop_gradient = False + input3.stop_gradient = False + z = tanh.apply(x1=[input1, input2, input3], func1=paddle.concat) + z.mean().backward() + z2 = paddle.concat([input1, input2, input3]) + z2.mean().backward() + + self.assertTrue( + np.max(np.abs(input1.grad.numpy() - input2.grad.numpy())) < 1e-10 + ) + + def test_function_num_output_match(self): + class tanh(Function): + @staticmethod + def forward( + ctx, + x1, + x2, + ): + return x1 + x2 + + @staticmethod + def backward(ctx, dy1): + return dy1 + 1 + + input1 = paddle.randn([2, 3]).astype("float64") + input2 = input1.detach().clone() + input1.stop_gradient = False + input2.stop_gradient = False + z = tanh.apply(input1, input2) + with self.assertRaises(ValueError): + z.mean().backward() + + def test_function_dtype(self): + class tanh(Function): + @staticmethod + def forward(ctx, x, dtype): + y = paddle.cast(x, dtype) + return y + + @staticmethod + def backward(ctx, dy1): + return dy1 + + dtypes = [ + 'bool', + 'float16', + 'float32', + 'float64', + 'uint8', + 'int32', + 'int64', + ] + for dtype in dtypes: + input1 = paddle.randn([2, 3]) + input1.stop_gradient = False + self.assertIsNone(input1.grad) + + z = tanh.apply(input1, dtype) + z = paddle.cast(z, "float32") + z.sum().backward() + self.assertIsNotNone(input1.grad) + + def test_function_Exception_forward(self): + class Layer_None1(Function): + @staticmethod + def forward(ctx, *args): + return None + + @staticmethod + def backward(ctx, *args): + return args + + input1 = paddle.randn([2, 3]).astype("float64") + with self.assertRaises(ValueError): + z = Layer_None1.apply(input1) + + class Layer_None2(Function): + @staticmethod + def forward(ctx, *args): + return [None, args[0]] + + @staticmethod + def backward(ctx, *args): + return args + + input1 = paddle.randn([2, 3]).astype("float64") + # return None + z = Layer_None2.apply(input1) + + class Layer_one1(Function): + @staticmethod + def forward(ctx, *args): + return 1 + + @staticmethod + def backward(ctx, *args): + return args + + input1 = paddle.randn([2, 3]).astype("float64") + # At least one output of `Function.backward` is a `Tensor` + with self.assertRaises(ValueError): + z = Layer_one1.apply(input1) + + class Layer_one2(Function): + @staticmethod + def forward(ctx, *args): + return [1, 2, args[0]] + + @staticmethod + def backward(ctx, *args): + return args + + input1 = paddle.randn([2, 3]).astype("float64") + # return int + z = Layer_one2.apply(input1) + + class Layer_no_fw(Function): + @staticmethod + def backward(ctx, *args): + return args + + input1 = paddle.randn([2, 3]).astype("float64") + with self.assertRaises(NotImplementedError): + z = Layer_no_fw.apply(input1) + + def test_function_nograd(self): + class tanh(Function): + @staticmethod + def forward(ctx, x1, func1, func2=paddle.square, xx=None): + ctx.func = func2 + y1 = func1(x1) + return y1 + + @staticmethod + def backward(ctx, x1, y1, dy1): + re1 = dy1 * (1 - ctx.func(y1)) + return re1 + + input1 = paddle.randn([2, 3]).astype("float64") + z = tanh.apply(input1, paddle.tanh, paddle.square) + z.mean().backward() + self.assertIsNone(z.grad) + + def test_function_Exception_bk(self): + class Layer_bk_none1(Function): + @staticmethod + def forward(ctx, x): + return x * 2 + + @staticmethod + def backward(ctx, dy1): + return None + + input2 = paddle.randn([2, 3]).astype("float64") + input2.stop_gradient = False + z = Layer_bk_none1.apply(input2) + + z.sum().backward() + self.assertEqual(input2.grad, None) + + class Layer_bk_none2(Function): + @staticmethod + def forward(ctx, x1, x2): + return x1 + x2 + + @staticmethod + def backward(ctx, dy1): + return None, dy1 + + input1 = paddle.randn([2, 3]).astype("float64") + input1.stop_gradient = False + z = Layer_bk_none2.apply(input1, input1) + + z.mean().backward() + self.assertIsNone(z.grad) + + class Layer_bk_one1(Function): + @staticmethod + def forward(ctx, x): + return x + x + + @staticmethod + def backward(ctx, dy): + return 1 + + input1 = paddle.randn([2, 3]).astype("float64") + input1.stop_gradient = False + z = Layer_bk_one1.apply(input1) + + with self.assertRaises(ValueError): + z.mean().backward() + + class Layer_bk_one2(Function): + @staticmethod + def forward(ctx, x1, x2): + return x1 * 2, x2 * 5 + + @staticmethod + def backward(ctx, *args): + return 1, 1 + + input1 = paddle.randn([2, 3]).astype("float64") + input1.stop_gradient = False + + y = Layer_bk_one2.apply(input1, input1) + z = y[0] + y[1] + with self.assertRaises(ValueError): + z.mean().backward() + + class Layer_no_bk(Function): + @staticmethod + def forward(ctx, x): + return x * 2, x * 5 + + input1 = paddle.randn([2, 3]).astype("float64") + input1.stop_gradient = False + z = Layer_no_bk.apply(input1) + + with self.assertRaises(OSError): + z = z[0] + z[1] + z.mean().backward() + + class Layer_bk_match(Function): + @staticmethod + def forward(ctx, x): + return x * 2, x * 5 + + @staticmethod + def backward(ctx, dy1, dy2): + return dy2 * 2, dy1 * 2 + + input1 = paddle.randn([2, 3]).astype("float64") + input1.stop_gradient = False + z = Layer_bk_match.apply(input1) + with self.assertRaises(ValueError): + z = z[0] + z[1] + z.mean().backward() + + def test_function_bk_return_none(self): + class Layer_bk_none1(Function): + @staticmethod + def forward(ctx, x1, x2): + return x1 + x2 + + @staticmethod + def backward(ctx, dy): + return 1 + + input1 = paddle.randn([2, 3]).astype("float64") + input2 = paddle.randn([2, 3]).astype("float64") + input1.stop_gradient = True + input2.stop_gradient = False + z = Layer_bk_none1.apply(input1, input2) + + with self.assertRaises(ValueError): + z.mean().backward() + + class Layer_bk_none2(Function): + @staticmethod + def forward(ctx, x1, x2): + return x1 * 2, x2 * 5 + + @staticmethod + def backward(ctx, *args): + return 1, 1 + + input1 = paddle.randn([2, 3]).astype("float64") + input2 = paddle.randn([2, 3]).astype("float64") + input1.stop_gradient = True + input2.stop_gradient = False + z = Layer_bk_none2.apply(input1, input2) + z = z[0] + z[1] + with self.assertRaises(ValueError): + z.mean().backward() + + def test_function_inplace(self): + class cus_tanh(Function): + @staticmethod + def forward(ctx, x): + return x + + @staticmethod + def backward(ctx, dy): + return dy + + class Layer(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward(self, data): + data = data**2 + z = paddle.tanh(data) + z = cus_tanh.apply(data) + return z.mean() + + for i in range(2): + data = paddle.ones([2, 3], dtype="float64") / (i + 1) + data.stop_gradient = False + layer = Layer() + z = layer(data) + z.backward() + self.assertIsNotNone(data.grad) + + def test_function_inplace_backward_error(self): + class cus_tanh(Function): + @staticmethod + def forward(ctx, x): + return x + + @staticmethod + def backward(ctx, dy): + return dy + + class Layer(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward(self, data): + var_b = data**2 + var_c = var_b**2 + z = cus_tanh.apply(var_b) + loss = paddle.nn.functional.relu(var_c) + return loss + + data = paddle.ones([2, 3], dtype="float64") + data.stop_gradient = False + layer = Layer() + z = layer(data) + with self.assertRaisesRegex( + RuntimeError, + f"received tensor_version:{1} != wrapper_version_snapshot:{0}", + ): + z.backward() + + def test_function_inplace_backward_success_1(self): + class cus_tanh(Function): + @staticmethod + def forward(ctx, x): + return x + + @staticmethod + def backward(ctx, dy): + return dy + + class Layer(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward(self, data): + var_b = data**2 + var_c = cus_tanh.apply(var_b) + var_d = var_c**2 + loss = var_d.sum() + return loss + + for i in range(2): + data = paddle.ones([2, 3], dtype="float64") / (i + 1) + data.stop_gradient = False + layer = Layer() + z = layer(data) + z.backward() + self.assertIsNotNone(data.grad) + + def test_function_inplace_backward_success_2(self): + class cus_tanh(Function): + @staticmethod + def forward(ctx, x): + return x + + @staticmethod + def backward(ctx, dy): + return dy + + class Layer(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward(self, data): + var_b = data**2 + var_c = cus_tanh.apply(var_b) + var_d = var_c + var_c + loss = var_d.sum() + return loss + + for i in range(2): + data = paddle.ones([2, 3], dtype="float64") / (i + 1) + data.stop_gradient = False + layer = Layer() + z = layer(data) + z.backward() + self.assertIsNotNone(data.grad) + + def test_function_inplace_and_leaf_exception(self): + class cus_function_op(Function): + @staticmethod + def forward(ctx, x): + return x + + @staticmethod + def backward(ctx, dy): + return dy + + class Layer(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward(self, data): + z = cus_function_op.apply(data) + return z.mean() + + for i in range(2): + data = paddle.ones([2, 3], dtype="float64") / (i + 1) + data.stop_gradient = False + layer = Layer() + + with self.assertRaises(ValueError): + z = layer(data) + + def test_backward_in_backward(self): + class cus_tanh(Function): + @staticmethod + def forward(ctx, x): + temp = x.detach() + ctx.inputs = temp + return x.mean() + + @staticmethod + def backward(ctx, dy): + with paddle.set_grad_enabled(True): + temp = ctx.inputs + temp.stop_gradient = False + z = paddle.tanh(temp) + z.backward() + self.assertIsNotNone(temp.grad) + return paddle.to_tensor(temp.grad) + + for i in range(2): + data = paddle.ones([2, 3], dtype="float32") / (i + 1) + data.stop_gradient = False + data = paddle.nn.functional.relu(data) + z = paddle.tanh(data) + z = cus_tanh.apply(data) + + def test_return_to_tensor(self): + class Tanh(Function): + @staticmethod + def forward(ctx, x1): + y1 = paddle.tanh(x1) + ctx.save_for_backward(y1) + tensor_1 = paddle.to_tensor([1, 2], dtype='float32') + return y1, 5, None, "helloworld", tensor_1 + + @staticmethod + def backward(ctx, dy1, dy2): + (y1,) = ctx.saved_tensor() + re1 = dy1 * (1 - paddle.square(y1)) + return dy1 + + input1 = paddle.randn([2, 3]).astype("float32") + input2 = input1.detach().clone() + input1.stop_gradient = False + input2.stop_gradient = False + z, number, none_item, string_item, tensor1 = Tanh.apply(x1=input1) + z.mean().backward() + + def test_materialize_grads(self): + class Tanh(Function): + @staticmethod + def forward(ctx, x): + ctx.mark_not_inplace(x) + return x, x + x + + @staticmethod + def backward(ctx, grad, grad2): + self.assertEqual(grad2, paddle.zeros([1])) + return grad + + x = paddle.ones([1], dtype="float64") + x.stop_gradient = False + Tanh.apply(x)[0].backward() + + def test_dont_materialize_grads(self): + class Tanh(Function): + @staticmethod + def forward(ctx, x): + ctx.mark_not_inplace(x) + ctx.set_materialize_grads(False) + return x, x + x + + @staticmethod + def backward(ctx, grad, grad2): + self.assertIsNone(grad2) + return grad + + x = paddle.ones([1], dtype="float64") + x.stop_gradient = False + Tanh.apply(x)[0].backward() + + def test_mark_non_differentiable(self): + class Tanh(Function): + @staticmethod + def forward(ctx, x): + a = x + x + ctx.mark_non_differentiable(a) + return a + + @staticmethod + def backward(ctx, grad): + self.assertTrue(False) # should not be call + return paddle.ones([1], dtype="float64") + + x = paddle.ones([1], dtype="float64") + x.stop_gradient = False + y = Tanh.apply(x) + y.sum().backward() + + def test_mark_non_differentiable2(self): + class Tanh(Function): + @staticmethod + def forward(ctx, x): + a = x + x + b = x + x + x + ctx.mark_non_differentiable(a) + return a, b + + @staticmethod + def backward(ctx, grad_a, grad_b): + self.assertEqual(grad_a, paddle.zeros([1])) + self.assertEqual(grad_b, paddle.ones([1], dtype="float64")) + return grad_b + + x = paddle.ones([1], dtype="float64") + x.stop_gradient = False + a, b = Tanh.apply(x) + b.sum().backward() + self.assertEqual(x.grad, paddle.ones([1], dtype="float64")) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/legacy_test/test_mul.py b/test/legacy_test/test_mul.py new file mode 100644 index 0000000000000..112d20c7ffd31 --- /dev/null +++ b/test/legacy_test/test_mul.py @@ -0,0 +1,138 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np +from op_test import get_device_place + +import paddle +from paddle import static + + +class TestMulApi(unittest.TestCase): + def setUp(self) -> None: + self.shape = [2, 3] + self.dtype = 'float32' + self.place = get_device_place() + + def test_static_api(self): + paddle.enable_static() + x_np = np.random.rand(*self.shape).astype(self.dtype) + other2_np = np.random.rand(*self.shape).astype(self.dtype) + other3_np = np.random.rand(self.shape[0], 1).astype(self.dtype) + with static.program_guard(static.Program()): + x = paddle.static.data(name='x', shape=self.shape, dtype=self.dtype) + # other1 = 3.0 + other2 = paddle.static.data( + name='other', shape=self.shape, dtype=self.dtype + ) + other3 = paddle.static.data( + name='other3', shape=[self.shape[0], 1], dtype=self.dtype + ) + # out1 = x.mul(other1) + out2 = x.mul(other2) + out3 = x.mul(other3) + exe = static.Executor(self.place) + outs = exe.run( + feed={'x': x_np, 'other': other2_np, 'other3': other3_np}, + # fetch_list=[out1, out2, out3], + fetch_list=[out2, out3], + ) + # np.testing.assert_allclose( + # outs[0], np.multiply(x_np, other1), rtol=1e-05 + # ) + np.testing.assert_allclose( + outs[0], np.multiply(x_np, other2_np), rtol=1e-05 + ) + np.testing.assert_allclose( + outs[1], np.multiply(x_np, other3_np), rtol=1e-05 + ) + + def test_dyn_api(self): + paddle.disable_static() + x_np = np.random.rand(*self.shape).astype(self.dtype) + other2_np = np.random.rand(*self.shape).astype(self.dtype) + other3_np = np.random.rand(self.shape[0], 1).astype(self.dtype) + x = paddle.to_tensor(x_np, place=self.place) + # other1 = 3.0 + other2 = paddle.to_tensor(other2_np, place=self.place) + other3 = paddle.to_tensor(other3_np, place=self.place) + + # out1 = x.mul(other1) + out2 = x.mul(other2) + out3 = x.mul(other3) + + # np.testing.assert_allclose( + # out1.numpy(), np.multiply(x_np, other1), rtol=1e-05 + # ) + np.testing.assert_allclose( + out2.numpy(), np.multiply(x_np, other2_np), rtol=1e-05 + ) + np.testing.assert_allclose( + out3.numpy(), np.multiply(x_np, other3_np), rtol=1e-05 + ) + + +class TestMulInplaceApi(unittest.TestCase): + def setUp(self) -> None: + self.shape = [2, 3] + self.dtype = 'float32' + + def test_dyn_api(self): + paddle.disable_static() + others = [ + # 3.0, + paddle.to_tensor(np.random.rand(*self.shape).astype('float32')), + paddle.to_tensor(np.random.rand(*self.shape).astype('float32'))[ + :, -1 + ].unsqueeze(-1), + ] + for other in others: + x_np = np.random.rand(*self.shape).astype('float32') + x = paddle.to_tensor(x_np) + x.mul_(other) + np.testing.assert_allclose( + x.numpy(), + np.multiply( + x_np, + ( + other.numpy() + if isinstance(other, paddle.Tensor) + else other + ), + ), + rtol=1e-05, + ) + + +class TestMulInplaceError(unittest.TestCase): + def test_errors(self): + paddle.disable_static() + # test dynamic computation graph: inputs must be broadcastable + x_data = np.random.rand(3, 4) + y_data = np.random.rand(2, 3, 4) + x = paddle.to_tensor(x_data) + y = paddle.to_tensor(y_data) + + def multiply_shape_error(): + with paddle.no_grad(): + x.mul_(y) + + self.assertRaises(ValueError, multiply_shape_error) + paddle.enable_static() + + +if __name__ == '__main__': + unittest.main()