From 35707c5d69650b338bbc6ced081f716ac944208a Mon Sep 17 00:00:00 2001 From: Enigmatisms Date: Tue, 5 Aug 2025 16:38:01 +0000 Subject: [PATCH 01/24] [API-Compat] paddle.compat.split is added and tested --- python/paddle/__init__.py | 1 + python/paddle/tensor/compat.py | 8 ++++++++ python/paddle/tensor/manipulation.py | 30 ++++++++++++++++++++++++++++ 3 files changed, 39 insertions(+) diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py index df3f0f2509d16c..c969cdfc276e54 100644 --- a/python/paddle/__init__.py +++ b/python/paddle/__init__.py @@ -200,6 +200,7 @@ pdist, ) from .nn.initializer.lazy_init import LazyGuard +from .tensor import compat as compat from .tensor.attribute import ( imag, is_complex, diff --git a/python/paddle/tensor/compat.py b/python/paddle/tensor/compat.py index ad7ec15d1cfae0..837a31f6ba2a84 100644 --- a/python/paddle/tensor/compat.py +++ b/python/paddle/tensor/compat.py @@ -18,10 +18,18 @@ import paddle from paddle import _C_ops +from paddle.tensor import fill_constant +from ..base.data_feeder import ( + check_dtype, + check_type, + check_variable_and_dtype, +) from ..base.framework import Variable from ..framework import ( + LayerHelper, in_dynamic_mode, + in_pir_mode, ) if TYPE_CHECKING: diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py index 403f48d17c2334..4ecae74c7d3d17 100644 --- a/python/paddle/tensor/manipulation.py +++ b/python/paddle/tensor/manipulation.py @@ -69,6 +69,36 @@ __all__ = [] +def forbid_keywords(illegal_keys, correct_func_name): + """ + A decorator that hints users to use the correct `compat` functions, when erroneous keyword arguments are detected + + illegal_keys: list[str] | str - Forbidden keyword names + correct_func_name: str - Recommended function name + """ + if isinstance(illegal_keys, str): + illegal_keys = [illegal_keys] + + def decorator(func): + def wrapper(*args, **kwargs): + found_keys = [key for key in illegal_keys if key in kwargs] + + if found_keys: + keys_str = ", ".join(f"'{key}'" for key in found_keys) + plural = "s" if len(found_keys) > 1 else "" + + raise TypeError( + f"{func.__name__}() received unexpected keyword argument{plural} {keys_str}. " + f"\nDid you mean to use {correct_func_name}() instead?" + ) + + return func(*args, **kwargs) + + return wrapper + + return decorator + + def tensor_array_to_tensor( input: Tensor | list[Tensor], axis: int = 1, From 23c422df2d8b8a9b8d82241ff056a0719d2096a7 Mon Sep 17 00:00:00 2001 From: Enigmatisms Date: Wed, 6 Aug 2025 09:33:46 +0000 Subject: [PATCH 02/24] [API-Compat] paddle.compat.split is rigorously tested --- python/paddle/__init__.py | 1 - python/paddle/tensor/compat.py | 8 ---- python/paddle/tensor/manipulation.py | 10 +---- python/paddle/utils/__init__.py | 1 + python/paddle/utils/compat_kwarg_check.py | 52 +++++++++++++++++++++++ 5 files changed, 54 insertions(+), 18 deletions(-) create mode 100644 python/paddle/utils/compat_kwarg_check.py diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py index c969cdfc276e54..df3f0f2509d16c 100644 --- a/python/paddle/__init__.py +++ b/python/paddle/__init__.py @@ -200,7 +200,6 @@ pdist, ) from .nn.initializer.lazy_init import LazyGuard -from .tensor import compat as compat from .tensor.attribute import ( imag, is_complex, diff --git a/python/paddle/tensor/compat.py b/python/paddle/tensor/compat.py index 837a31f6ba2a84..ad7ec15d1cfae0 100644 --- a/python/paddle/tensor/compat.py +++ b/python/paddle/tensor/compat.py @@ -18,18 +18,10 @@ import paddle from paddle import _C_ops -from paddle.tensor import fill_constant -from ..base.data_feeder import ( - check_dtype, - check_type, - check_variable_and_dtype, -) from ..base.framework import Variable from ..framework import ( - LayerHelper, in_dynamic_mode, - in_pir_mode, ) if TYPE_CHECKING: diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py index 4ecae74c7d3d17..8036f328697885 100644 --- a/python/paddle/tensor/manipulation.py +++ b/python/paddle/tensor/manipulation.py @@ -87,16 +87,8 @@ def wrapper(*args, **kwargs): keys_str = ", ".join(f"'{key}'" for key in found_keys) plural = "s" if len(found_keys) > 1 else "" - raise TypeError( - f"{func.__name__}() received unexpected keyword argument{plural} {keys_str}. " - f"\nDid you mean to use {correct_func_name}() instead?" - ) - - return func(*args, **kwargs) - return wrapper - - return decorator +__all__ = [] def tensor_array_to_tensor( diff --git a/python/paddle/utils/__init__.py b/python/paddle/utils/__init__.py index 3fbcf6af86b4df..25b2301d064c8f 100644 --- a/python/paddle/utils/__init__.py +++ b/python/paddle/utils/__init__.py @@ -14,6 +14,7 @@ from ..base.framework import require_version from . import ( # noqa: F401 + compat_kwarg_check, cpp_extension, decorator_utils, dlpack, diff --git a/python/paddle/utils/compat_kwarg_check.py b/python/paddle/utils/compat_kwarg_check.py new file mode 100644 index 00000000000000..0e8c6729e89bd9 --- /dev/null +++ b/python/paddle/utils/compat_kwarg_check.py @@ -0,0 +1,52 @@ +# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +from typing import Any, Callable, TypeVar + +F = TypeVar('F', bound=Callable[..., Any]) + + +def forbid_keywords( + illegal_keys: list[str] | str, correct_func_name: str +) -> Callable[[F], F]: + """ + A decorator that hints users to use the correct `compat` functions, when erroneous keyword arguments are detected + + Args: + illegal_keys: list[str] | str - Forbidden keyword names + correct_func_name: str - Recommended function name + """ + if isinstance(illegal_keys, str): + illegal_keys = [illegal_keys] + + def decorator(func: F) -> F: + def wrapper(*args: Any, **kwargs: Any) -> Any: + found_keys = [key for key in illegal_keys if key in kwargs] + + if found_keys: + keys_str = ", ".join(f"'{key}'" for key in found_keys) + plural = "s" if len(found_keys) > 1 else "" + + raise TypeError( + f"{func.__name__}() received unexpected keyword argument{plural} {keys_str}. " + f"\nDid you mean to use {correct_func_name}() instead?" + ) + + return func(*args, **kwargs) + + return wrapper + + return decorator From 309b44ad30ac3549670667aeb60d66fa9eb01c22 Mon Sep 17 00:00:00 2001 From: Enigmatisms Date: Wed, 6 Aug 2025 12:23:16 +0000 Subject: [PATCH 03/24] [API-Compat] Make the forbid_keywords decorator transparent --- python/paddle/utils/compat_kwarg_check.py | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/python/paddle/utils/compat_kwarg_check.py b/python/paddle/utils/compat_kwarg_check.py index 0e8c6729e89bd9..1da6a1584598c1 100644 --- a/python/paddle/utils/compat_kwarg_check.py +++ b/python/paddle/utils/compat_kwarg_check.py @@ -14,7 +14,9 @@ from __future__ import annotations -from typing import Any, Callable, TypeVar +import functools +import inspect +from typing import Any, Callable, TypeVar, cast F = TypeVar('F', bound=Callable[..., Any]) @@ -29,10 +31,12 @@ def forbid_keywords( illegal_keys: list[str] | str - Forbidden keyword names correct_func_name: str - Recommended function name """ - if isinstance(illegal_keys, str): - illegal_keys = [illegal_keys] + keys = [illegal_keys] if isinstance(illegal_keys, str) else illegal_keys def decorator(func: F) -> F: + orig_sig = inspect.signature(func) + + @functools.wraps(func) def wrapper(*args: Any, **kwargs: Any) -> Any: found_keys = [key for key in illegal_keys if key in kwargs] @@ -47,6 +51,14 @@ def wrapper(*args: Any, **kwargs: Any) -> Any: return func(*args, **kwargs) - return wrapper + # Important: function signatures / specs should be copied to avoid erroneous input/output extraction (particularly in static graph, like test_split_op.py) + wrapper.__signature__ = orig_sig + if hasattr(func, "__defaults__"): + wrapper.__defaults__ = func.__defaults__ + if hasattr(func, "__kwdefaults__"): + wrapper.__kwdefaults__ = func.__kwdefaults__ + wrapper.__wrapped__ = func + + return cast('F', wrapper) return decorator From 2a33744d1d99469dab203ca9d6ba2caf1f4184e7 Mon Sep 17 00:00:00 2001 From: Enigmatisms Date: Wed, 6 Aug 2025 12:43:42 +0000 Subject: [PATCH 04/24] [API-Compat] Fixed decorator str input --- python/paddle/utils/compat_kwarg_check.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/paddle/utils/compat_kwarg_check.py b/python/paddle/utils/compat_kwarg_check.py index 1da6a1584598c1..37384a49ef7942 100644 --- a/python/paddle/utils/compat_kwarg_check.py +++ b/python/paddle/utils/compat_kwarg_check.py @@ -31,7 +31,9 @@ def forbid_keywords( illegal_keys: list[str] | str - Forbidden keyword names correct_func_name: str - Recommended function name """ - keys = [illegal_keys] if isinstance(illegal_keys, str) else illegal_keys + illegal_keys = ( + [illegal_keys] if isinstance(illegal_keys, str) else illegal_keys + ) def decorator(func: F) -> F: orig_sig = inspect.signature(func) From 11d9640ac26ce15360dabac0fca5d7871ca8f982 Mon Sep 17 00:00:00 2001 From: Enigmatisms Date: Sat, 9 Aug 2025 06:03:28 +0000 Subject: [PATCH 05/24] [API-Compat] More unittest & static graph check & updated decorator --- python/paddle/tensor/manipulation.py | 22 -------- python/paddle/utils/__init__.py | 1 - python/paddle/utils/compat_kwarg_check.py | 66 ----------------------- 3 files changed, 89 deletions(-) delete mode 100644 python/paddle/utils/compat_kwarg_check.py diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py index 8036f328697885..403f48d17c2334 100644 --- a/python/paddle/tensor/manipulation.py +++ b/python/paddle/tensor/manipulation.py @@ -69,28 +69,6 @@ __all__ = [] -def forbid_keywords(illegal_keys, correct_func_name): - """ - A decorator that hints users to use the correct `compat` functions, when erroneous keyword arguments are detected - - illegal_keys: list[str] | str - Forbidden keyword names - correct_func_name: str - Recommended function name - """ - if isinstance(illegal_keys, str): - illegal_keys = [illegal_keys] - - def decorator(func): - def wrapper(*args, **kwargs): - found_keys = [key for key in illegal_keys if key in kwargs] - - if found_keys: - keys_str = ", ".join(f"'{key}'" for key in found_keys) - plural = "s" if len(found_keys) > 1 else "" - - -__all__ = [] - - def tensor_array_to_tensor( input: Tensor | list[Tensor], axis: int = 1, diff --git a/python/paddle/utils/__init__.py b/python/paddle/utils/__init__.py index 25b2301d064c8f..3fbcf6af86b4df 100644 --- a/python/paddle/utils/__init__.py +++ b/python/paddle/utils/__init__.py @@ -14,7 +14,6 @@ from ..base.framework import require_version from . import ( # noqa: F401 - compat_kwarg_check, cpp_extension, decorator_utils, dlpack, diff --git a/python/paddle/utils/compat_kwarg_check.py b/python/paddle/utils/compat_kwarg_check.py deleted file mode 100644 index 37384a49ef7942..00000000000000 --- a/python/paddle/utils/compat_kwarg_check.py +++ /dev/null @@ -1,66 +0,0 @@ -# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import annotations - -import functools -import inspect -from typing import Any, Callable, TypeVar, cast - -F = TypeVar('F', bound=Callable[..., Any]) - - -def forbid_keywords( - illegal_keys: list[str] | str, correct_func_name: str -) -> Callable[[F], F]: - """ - A decorator that hints users to use the correct `compat` functions, when erroneous keyword arguments are detected - - Args: - illegal_keys: list[str] | str - Forbidden keyword names - correct_func_name: str - Recommended function name - """ - illegal_keys = ( - [illegal_keys] if isinstance(illegal_keys, str) else illegal_keys - ) - - def decorator(func: F) -> F: - orig_sig = inspect.signature(func) - - @functools.wraps(func) - def wrapper(*args: Any, **kwargs: Any) -> Any: - found_keys = [key for key in illegal_keys if key in kwargs] - - if found_keys: - keys_str = ", ".join(f"'{key}'" for key in found_keys) - plural = "s" if len(found_keys) > 1 else "" - - raise TypeError( - f"{func.__name__}() received unexpected keyword argument{plural} {keys_str}. " - f"\nDid you mean to use {correct_func_name}() instead?" - ) - - return func(*args, **kwargs) - - # Important: function signatures / specs should be copied to avoid erroneous input/output extraction (particularly in static graph, like test_split_op.py) - wrapper.__signature__ = orig_sig - if hasattr(func, "__defaults__"): - wrapper.__defaults__ = func.__defaults__ - if hasattr(func, "__kwdefaults__"): - wrapper.__kwdefaults__ = func.__kwdefaults__ - wrapper.__wrapped__ = func - - return cast('F', wrapper) - - return decorator From 6a584709602492e1681ab8762bff9d63fff31b2c Mon Sep 17 00:00:00 2001 From: Enigmatisms Date: Fri, 8 Aug 2025 10:33:51 +0000 Subject: [PATCH 06/24] [API-Compat] Add paddle.compat.min/max and new PHI kernel (min/max_with_index) --- .../infer_symbolic_shape/unary_infer_sym.cc | 23 +- .../infer_symbolic_shape/unary_infer_sym.h | 2 + paddle/phi/infermeta/unary.cc | 84 +++++ paddle/phi/infermeta/unary.h | 8 + .../kernels/gpu/min_max_with_index_kernel.cu | 312 ++++++++++++++++++ paddle/phi/kernels/gpu/reduce_kernel.cu | 52 +++ .../min_max_with_index_grad_kernel.h.h | 42 +++ .../phi/kernels/min_max_with_index_kernel.h | 40 +++ paddle/phi/ops/yaml/backward.yaml | 20 ++ paddle/phi/ops/yaml/ops.yaml | 22 ++ python/paddle/compat.py | 4 +- python/paddle/tensor/compat.py | 151 ++++++++- 12 files changed, 752 insertions(+), 8 deletions(-) create mode 100644 paddle/phi/kernels/gpu/min_max_with_index_kernel.cu create mode 100644 paddle/phi/kernels/min_max_with_index_grad_kernel.h.h create mode 100644 paddle/phi/kernels/min_max_with_index_kernel.h diff --git a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/unary_infer_sym.cc b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/unary_infer_sym.cc index 6750759633d0b8..16ee03501fa4ab 100644 --- a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/unary_infer_sym.cc +++ b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/unary_infer_sym.cc @@ -315,8 +315,9 @@ bool AnyOpInferSymbolicShape(pir::Operation *op, axis.size() == 0 /*reduce_all*/); } -bool ArgmaxOpInferSymbolicShape(pir::Operation *op, - pir::InferSymbolicShapeContext *infer_context) { +bool MinMaxOpInferSymbolicShape(pir::Operation *op, + pir::InferSymbolicShapeContext *infer_context, + bool output_val_and_ind = false) { bool flatten = GetBoolAttr(op, "flatten"); bool keepdims = GetBoolAttr(op, "keepdims"); @@ -357,13 +358,23 @@ bool ArgmaxOpInferSymbolicShape(pir::Operation *op, symbol::TensorShapeOrDataDimExprs(out_sym_shape)}; infer_context->SetShapeOrDataForValue(op->result(0), shape_data); + if (output_val_and_ind) + infer_context->SetShapeOrDataForValue(op->result(1), shape_data); return true; } -bool ArgminOpInferSymbolicShape(pir::Operation *op, - pir::InferSymbolicShapeContext *infer_context) { - return ArgmaxOpInferSymbolicShape(op, infer_context); -} +#define DEFINE_MINMAX_OP_INFER_FUNC(OpName, output_val_and_ind) \ + bool OpName##OpInferSymbolicShape( \ + pir::Operation *op, pir::InferSymbolicShapeContext *infer_context) { \ + return MinMaxOpInferSymbolicShape(op, infer_context, output_val_and_ind); \ + } + +DEFINE_MINMAX_OP_INFER_FUNC(Argmin, false) +DEFINE_MINMAX_OP_INFER_FUNC(Argmax, false) +DEFINE_MINMAX_OP_INFER_FUNC(MinWithIndex, true) +DEFINE_MINMAX_OP_INFER_FUNC(MaxWithIndex, true) + +#undef DEFINE_MINMAX_OP_INFER_FUNC bool AsComplexOpInferSymbolicShape( pir::Operation *op, pir::InferSymbolicShapeContext *infer_context) { diff --git a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/unary_infer_sym.h b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/unary_infer_sym.h index 9868d08d8a290d..8d21b51eb2719f 100755 --- a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/unary_infer_sym.h +++ b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/unary_infer_sym.h @@ -93,8 +93,10 @@ OP_DECLARE_INFER_SYMBOLIC_SHAPE(Lu) OP_DECLARE_INFER_SYMBOLIC_SHAPE(Lu_) OP_DECLARE_INFER_SYMBOLIC_SHAPE(Mode) OP_DECLARE_INFER_SYMBOLIC_SHAPE(Max) +OP_DECLARE_INFER_SYMBOLIC_SHAPE(MaxWithIndex) OP_DECLARE_INFER_SYMBOLIC_SHAPE(Maxout) OP_DECLARE_INFER_SYMBOLIC_SHAPE(Min) +OP_DECLARE_INFER_SYMBOLIC_SHAPE(MinWithIndex) OP_DECLARE_INFER_SYMBOLIC_SHAPE(Mean) OP_DECLARE_INFER_SYMBOLIC_SHAPE(MeanAll) OP_DECLARE_INFER_SYMBOLIC_SHAPE(MatrixPower) diff --git a/paddle/phi/infermeta/unary.cc b/paddle/phi/infermeta/unary.cc index 405528589b824e..fe014446c88ce2 100644 --- a/paddle/phi/infermeta/unary.cc +++ b/paddle/phi/infermeta/unary.cc @@ -366,6 +366,90 @@ void ArgMinMaxInferMeta(const MetaTensor& x, } } +void MinMaxWithIndexInferMeta(const MetaTensor& x, + const Scalar& axis, + bool keepdims, + bool flatten, + MetaTensor* val_out, + MetaTensor* ind_out, + MetaConfig config) { + DataType val_dtype = x.dtype(); + + if (!config.is_runtime && axis.FromTensor()) { + std::vector vec; + if (flatten) { + if (keepdims) { // NOLINT + vec = std::vector(x.dims().size(), -1); + } else { + vec = {}; + } + } else { + if (keepdims) { + vec = std::vector(x.dims().size(), -1); + } else { + vec = std::vector(x.dims().size() - 1, -1); + } + } + val_out->set_dims(common::make_ddim(vec)); + val_out->set_dtype(val_dtype); + ind_out->set_dims(common::make_ddim(vec)); + ind_out->set_dtype(DataType::INT64); + return; + } + auto int_axis = axis.to(); + const auto& x_dims = x.dims(); + + auto x_rank = x.dims().size(); + if (x_rank > 0) { + PADDLE_ENFORCE_GE(int_axis, + -x_rank, + common::errors::InvalidArgument( + "'axis'(%d) must be greater than or equal to" + " -Rank(X)(%d).", + int_axis, + -x_rank)); + PADDLE_ENFORCE_LT( + int_axis, + x_rank, + common::errors::InvalidArgument( + "'axis'(%d) must be less than Rank(X)(%d) of Input(X).", + int_axis, + x_rank)); + } else { + // 0-dim tensor + PADDLE_ENFORCE_EQ(int_axis == 0 || int_axis == -1, + true, + common::errors::InvalidArgument( + "'axis'(%d) must be 0 or -1 if input tensor is " + "0-dim.", + int_axis)); + } + + if (int_axis < 0) int_axis += x_rank; + + std::vector vec; + if (flatten) { + if (keepdims) { // NOLINT + vec = std::vector(x.dims().size(), 1); + } else { + vec = {}; + } + } else { + for (int64_t i = 0; i < int_axis; i++) + vec.emplace_back(x_dims[static_cast(i)]); + if (keepdims) { + vec.emplace_back(static_cast(1)); + } + for (int64_t i = int_axis + 1; i < x_rank; i++) + vec.emplace_back(x_dims[static_cast(i)]); + } + + val_out->set_dims(common::make_ddim(vec)); + val_out->set_dtype(val_dtype); + ind_out->set_dims(common::make_ddim(vec)); + ind_out->set_dtype(DataType::INT64); +} + void ArgsortInferMeta(const MetaTensor& input, int axis, bool descending, diff --git a/paddle/phi/infermeta/unary.h b/paddle/phi/infermeta/unary.h index 7334ee476c0ad9..ea6c95748c16c5 100644 --- a/paddle/phi/infermeta/unary.h +++ b/paddle/phi/infermeta/unary.h @@ -66,6 +66,14 @@ void ArgMinMaxInferMeta(const MetaTensor& x, MetaTensor* out, MetaConfig config = MetaConfig()); +void MinMaxWithIndexInferMeta(const MetaTensor& x, + const Scalar& axis, + bool keepdims, + bool flatten, + MetaTensor* val_out, + MetaTensor* ind_out, + MetaConfig config = MetaConfig()); + void ArgsortInferMeta(const MetaTensor& input, int axis, bool descending, diff --git a/paddle/phi/kernels/gpu/min_max_with_index_kernel.cu b/paddle/phi/kernels/gpu/min_max_with_index_kernel.cu new file mode 100644 index 00000000000000..57699f2f97e83e --- /dev/null +++ b/paddle/phi/kernels/gpu/min_max_with_index_kernel.cu @@ -0,0 +1,312 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/min_max_with_index_kernel.h" + +#include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/core/kernel_registry.h" + +#if defined(__NVCC__) || defined(__HIPCC__) + +#ifdef __NVCC__ +#include "cub/cub.cuh" +#endif +#ifdef __HIPCC__ +#include +namespace cub = hipcub; +#endif +#include + +#include "paddle/common/ddim.h" +#include "paddle/phi/core/utils/data_type.h" +#include "paddle/phi/kernels/funcs/math_function.h" +namespace phi { + +namespace { // NOLINT +template +using KeyValuePair = cub::KeyValuePair; + +} // namespace + +#define FIXED_BLOCK_DIM_CASE_BASE(log2_block_dim, ...) \ + case (1 << (log2_block_dim)): { \ + constexpr auto kBlockDim = (1 << (log2_block_dim)); \ + __VA_ARGS__; \ + } break + +#define FIXED_BLOCK_DIM_CASE(...) \ + FIXED_BLOCK_DIM_CASE_BASE(10, ##__VA_ARGS__); \ + FIXED_BLOCK_DIM_CASE_BASE(9, ##__VA_ARGS__); \ + FIXED_BLOCK_DIM_CASE_BASE(8, ##__VA_ARGS__); \ + FIXED_BLOCK_DIM_CASE_BASE(7, ##__VA_ARGS__); \ + FIXED_BLOCK_DIM_CASE_BASE(6, ##__VA_ARGS__); \ + FIXED_BLOCK_DIM_CASE_BASE(5, ##__VA_ARGS__); \ + FIXED_BLOCK_DIM_CASE_BASE(4, ##__VA_ARGS__); \ + FIXED_BLOCK_DIM_CASE_BASE(3, ##__VA_ARGS__); + +template +__global__ void MinMaxWithIndexKernel(const int64_t height, // n * h + const int64_t width, // c + const int64_t post_size, // h + const Reducer reducer, + const T init, + const T* in, + T* val_out, + IndType* key_out) { + typedef cub::BlockReduce, BlockDim> BlockReduce; + __shared__ typename BlockReduce::TempStorage temp_storage; + + for (IndexType idx = blockIdx.x; idx < height; idx += gridDim.x) { + KeyValuePair kv_pair = {-1, init}; + IndexType h = idx / post_size; + IndexType w = idx % post_size; + for (IndexType k = threadIdx.x; k < width; k += blockDim.x) { + kv_pair = + reducer({k, in[h * width * post_size + k * post_size + w]}, kv_pair); + } + kv_pair = BlockReduce(temp_storage).Reduce(kv_pair, reducer); + if (threadIdx.x == 0) { + val_out[idx] = static_cast(kv_pair.value); + key_out[idx] = static_cast(kv_pair.key); + } + __syncthreads(); + } +} + +template +void ComputeMinMaxWithIndex(const phi::GPUContext& dev_ctx, + const DenseTensor& input, + DenseTensor* values, + DenseTensor* indices, + const int64_t pre, + const int64_t post, + const int64_t n) { + auto cu_stream = dev_ctx.stream(); + auto ComputeBlockSize = [](int64_t col) { + auto block_size = 8; + if (col > 512) + block_size = 1024; + else if (col > 256) + block_size = 512; + else if (col > 128) + block_size = 256; + else if (col > 64) + block_size = 128; + else if (col > 32) + block_size = 64; + else if (col > 16) + block_size = 32; + else if (col > 8) + block_size = 16; + return block_size; + }; + + int64_t max_grid_dimx = dev_ctx.GetCUDAMaxGridDimSize()[0]; + int64_t height = pre * post; + int64_t width = n; + int64_t grid_size = height < max_grid_dimx ? height : max_grid_dimx; + + const T* in_data = input.data(); + + T* val_data = dev_ctx.template Alloc(values); + IndType* ind_data = dev_ctx.template Alloc(indices); + + if (typeid(Reducer) == typeid(cub::ArgMax)) { + switch (ComputeBlockSize(width)) { + FIXED_BLOCK_DIM_CASE( + MinMaxWithIndexKernel + <<>>( + height, + width, + post, + Reducer(), + std::numeric_limits::lowest(), + in_data, + val_data, + ind_data)); + } + } else { + switch (ComputeBlockSize(width)) { + FIXED_BLOCK_DIM_CASE( + MinMaxWithIndexKernel + <<>>( + height, + width, + post, + Reducer(), + std::numeric_limits::max(), + in_data, + val_data, + ind_data)); + } + } +} + +template +struct VisitDataCudaMinMaxWithIndexFunctor { + const Context& dev_ctx; + const DenseTensor& x; + int64_t axis; + bool keepdims; + bool flatten; + DenseTensor* val_out; + DenseTensor* ind_out; + + explicit VisitDataCudaMinMaxWithIndexFunctor(const Context& dev_ctx, + const DenseTensor& x, + int64_t axis, + bool keepdims, + bool flatten, + DenseTensor* val_out, + DenseTensor* ind_out) + : dev_ctx(dev_ctx), + x(x), + axis(axis), + keepdims(keepdims), + flatten(flatten), + val_out(val_out), + ind_out(ind_out) {} + + template + void apply() const { + phi::DDim x_dims; + int new_axis = axis; + if (flatten) { + x_dims = common::make_ddim({x.numel()}); + // if flatten, the axis just as 0 + new_axis = 0; + } else { + x_dims = x.dims(); + if (axis < 0) new_axis = axis + x.dims().size(); + } + if (x.numel() == 0) { + dev_ctx.template Alloc(val_out); + dev_ctx.template Alloc(ind_out); + return; + } + // For 0D Tensor + if (x.dims().size() == 0) { + dev_ctx.template Alloc(val_out); + dev_ctx.template Alloc(ind_out); + phi::funcs::set_constant(dev_ctx, ind_out, static_cast(0)); + phi::Copy(dev_ctx, x, dev_ctx.GetPlace(), false, val_out); + return; + } + + int64_t numel = x.numel(); + int64_t groups = numel / x_dims[new_axis]; + int64_t pre = 1; + int64_t post = 1; + int64_t n = x_dims[new_axis]; + + for (int i = 0; i < new_axis; i++) { + pre *= x_dims[i]; + } + + for (int i = new_axis + 1; i < x_dims.size(); i++) { + post *= x_dims[i]; + } + + if (numel > std::numeric_limits::max()) { + ComputeMinMaxWithIndex( + dev_ctx, x, val_out, ind_out, pre, post, n); + } else { + ComputeMinMaxWithIndex( + dev_ctx, x, val_out, ind_out, pre, post, n); + } + } +}; + +template +void MinMaxWithIndexOpCUDAKernel(const Context& dev_ctx, + const DenseTensor& x, + const Scalar& axis, + bool keepdims, + bool flatten, + DenseTensor* val_out, + DenseTensor* ind_out) { + PADDLE_ENFORCE_GE( + x.numel(), + 0, + common::errors::InvalidArgument( + "(min/max)_with_index input numel must > 0, bug got %d", x.numel())); + phi::VisitDataTypeTiny( + phi::DataType::INT64, + VisitDataCudaMinMaxWithIndexFunctor( + dev_ctx, x, axis.to(), keepdims, flatten, val_out, ind_out)); +} + +template +void MinWithIndexKernel(const Context& dev_ctx, + const DenseTensor& x, + const Scalar& axis, + bool keepdims, + bool flatten, + DenseTensor* val_out, + DenseTensor* ind_out) { + MinMaxWithIndexOpCUDAKernel( + dev_ctx, x, axis, keepdims, flatten, val_out, ind_out); +} + +template +void MaxWithIndexKernel(const Context& dev_ctx, + const DenseTensor& x, + const Scalar& axis, + bool keepdims, + bool flatten, + DenseTensor* val_out, + DenseTensor* ind_out) { + MinMaxWithIndexOpCUDAKernel( + dev_ctx, x, axis, keepdims, flatten, val_out, ind_out); +} + +#endif + +} // namespace phi + +PD_REGISTER_KERNEL(min_with_index, + GPU, + ALL_LAYOUT, + phi::MinWithIndexKernel, + phi::dtype::float16, + phi::dtype::bfloat16, + float, + double, + int32_t, + int64_t, + int16_t, + uint8_t) { + kernel->OutputAt(0).SetDataType(kernel->InputAt(0).dtype); + kernel->OutputAt(1).SetDataType(phi::DataType::INT64); +} + +PD_REGISTER_KERNEL(max_with_index, + GPU, + ALL_LAYOUT, + phi::MaxWithIndexKernel, + phi::dtype::float16, + phi::dtype::bfloat16, + float, + double, + int32_t, + int64_t, + int16_t, + uint8_t) { + kernel->OutputAt(0).SetDataType(kernel->InputAt(0).dtype); + kernel->OutputAt(1).SetDataType(phi::DataType::INT64); +} diff --git a/paddle/phi/kernels/gpu/reduce_kernel.cu b/paddle/phi/kernels/gpu/reduce_kernel.cu index 95132d09e2cc22..bc4de24540346a 100644 --- a/paddle/phi/kernels/gpu/reduce_kernel.cu +++ b/paddle/phi/kernels/gpu/reduce_kernel.cu @@ -159,6 +159,36 @@ void ReduceAMaxGradKernel(const Context& dev_ctx, dev_ctx, x, out, out_grad, dims, keep_dim, reduce_all, x_grad); } +template +void MinWithIndexGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& values, + const DenseTensor& values_grad, + const Scalar& dim, + bool keepdims, + bool flatten, + DenseTensor* x_grad) { + int64_t dim_val = dim.to(); + flatten = recompute_reduce_all(x, {dim_val}, flatten); + ReduceCudaAMaxAMinGrad( + dev_ctx, x, values, values_grad, {dim_val}, keepdims, flatten, x_grad); +} + +template +void MaxWithIndexGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& values, + const DenseTensor& values_grad, + const Scalar& dim, + bool keepdims, + bool flatten, + DenseTensor* x_grad) { + int64_t dim_val = dim.to(); + flatten = recompute_reduce_all(x, {dim_val}, flatten); + ReduceCudaAMaxAMinGrad( + dev_ctx, x, values, values_grad, {dim_val}, keepdims, flatten, x_grad); +} + template void ReduceMaxGradKernel(const Context& dev_ctx, const DenseTensor& x, @@ -284,6 +314,17 @@ PD_REGISTER_KERNEL(max_grad, phi::dtype::float16, phi::dtype::bfloat16) {} +PD_REGISTER_KERNEL(max_with_index_grad, + GPU, + ALL_LAYOUT, + phi::MaxWithIndexGradKernel, + float, + double, + int, + int64_t, + phi::dtype::float16, + phi::dtype::bfloat16) {} + PD_REGISTER_KERNEL(mean_grad, GPU, ALL_LAYOUT, @@ -310,6 +351,17 @@ PD_REGISTER_KERNEL(min_grad, phi::dtype::float16, phi::dtype::bfloat16) {} +PD_REGISTER_KERNEL(min_with_index_grad, + GPU, + ALL_LAYOUT, + phi::MinWithIndexGradKernel, + float, + double, + int, + int64_t, + phi::dtype::float16, + phi::dtype::bfloat16) {} + PD_REGISTER_KERNEL(sum_grad, GPU, ALL_LAYOUT, diff --git a/paddle/phi/kernels/min_max_with_index_grad_kernel.h.h b/paddle/phi/kernels/min_max_with_index_grad_kernel.h.h new file mode 100644 index 00000000000000..30c4fd34bb281e --- /dev/null +++ b/paddle/phi/kernels/min_max_with_index_grad_kernel.h.h @@ -0,0 +1,42 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/phi/common/scalar.h" +#include "paddle/phi/core/dense_tensor.h" + +namespace phi { + +template +void MaxWithIndexGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& values, + const DenseTensor& values_grad, + const Scalar& dim, + bool keepdims, + bool flatten, + DenseTensor* x_grad); + +template +void MinWithIndexGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& values, + const DenseTensor& values_grad, + const Scalar& dim, + bool keepdims, + bool flatten, + DenseTensor* x_grad); + +} // namespace phi diff --git a/paddle/phi/kernels/min_max_with_index_kernel.h b/paddle/phi/kernels/min_max_with_index_kernel.h new file mode 100644 index 00000000000000..2e5ad70feaaec4 --- /dev/null +++ b/paddle/phi/kernels/min_max_with_index_kernel.h @@ -0,0 +1,40 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/phi/common/scalar.h" +#include "paddle/phi/core/dense_tensor.h" + +namespace phi { + +template +void MinWithIndexKernel(const Context& dev_ctx, + const DenseTensor& x, + const Scalar& axis, + bool keepdims, + bool flatten, + DenseTensor* val_out, + DenseTensor* ind_out); + +template +void MaxWithIndexKernel(const Context& dev_ctx, + const DenseTensor& x, + const Scalar& axis, + bool keepdims, + bool flatten, + DenseTensor* val_out, + DenseTensor* ind_out); + +} // namespace phi diff --git a/paddle/phi/ops/yaml/backward.yaml b/paddle/phi/ops/yaml/backward.yaml index 5364fa6ff73b9c..f75e509f6aaa22 100644 --- a/paddle/phi/ops/yaml/backward.yaml +++ b/paddle/phi/ops/yaml/backward.yaml @@ -2277,6 +2277,16 @@ kernel : func : max_pool3d_with_index_grad +- backward_op : max_with_index_grad + forward : max_with_index (Tensor x, Scalar axis, bool keepdims, bool flatten) -> Tensor(values), Tensor(indices) + args : (Tensor x, Tensor values, Tensor values_grad, Scalar axis, bool keepdims, bool flatten) + output : Tensor(x_grad) + infer_meta : + func : UnchangedInferMeta + param : [x] + kernel : + func : max_with_index_grad + - backward_op : maxout_grad forward : maxout(Tensor x, int groups, int axis) -> Tensor(out) args : (Tensor x, Tensor out, Tensor out_grad, int groups, int axis) @@ -2340,6 +2350,16 @@ func : meshgrid_grad data_type : out_grad +- backward_op : min_with_index_grad + forward : min_with_index (Tensor x, Scalar axis, bool keepdims, bool flatten) -> Tensor(values), Tensor(indices) + args : (Tensor x, Tensor values, Tensor values_grad, Scalar axis, bool keepdims, bool flatten) + output : Tensor(x_grad) + infer_meta : + func : UnchangedInferMeta + param : [x] + kernel : + func : min_with_index_grad + - backward_op : mish_grad forward : mish (Tensor x, float lambda) -> Tensor(out) args : (Tensor x, Tensor out_grad, float lambda) diff --git a/paddle/phi/ops/yaml/ops.yaml b/paddle/phi/ops/yaml/ops.yaml index b5f4d6371a82b1..9d84600531a792 100644 --- a/paddle/phi/ops/yaml/ops.yaml +++ b/paddle/phi/ops/yaml/ops.yaml @@ -3577,6 +3577,17 @@ backward : max_pool3d_with_index_grad interfaces : paddle::dialect::InferSymbolicShapeInterface +- op : max_with_index + args : (Tensor x, Scalar(int64_t) axis, bool keepdims = false, bool flatten = false) + output : Tensor(values), Tensor(indices) + infer_meta : + func : MinMaxWithIndexInferMeta + kernel : + func : max_with_index + data_type : x + backward : max_with_index_grad + interfaces : paddle::dialect::InferSymbolicShapeInterface, paddle::dialect::LayoutTransformationInterface + - op : maxout args : (Tensor x, int groups, int axis = 1) output : Tensor(out) @@ -3686,6 +3697,17 @@ backward : meshgrid_grad interfaces : paddle::dialect::InferSymbolicShapeInterface +- op : min_with_index + args : (Tensor x, Scalar(int64_t) axis, bool keepdims = false, bool flatten = false) + output : Tensor(values), Tensor(indices) + infer_meta : + func : MinMaxWithIndexInferMeta + kernel : + func : min_with_index + data_type : x + backward : min_with_index_grad + interfaces : paddle::dialect::InferSymbolicShapeInterface, paddle::dialect::LayoutTransformationInterface + - op : mish args : (Tensor x, float lambda) output : Tensor diff --git a/python/paddle/compat.py b/python/paddle/compat.py index 2a37393e9053f8..023fe2efcbe325 100644 --- a/python/paddle/compat.py +++ b/python/paddle/compat.py @@ -14,8 +14,10 @@ from .tensor.compat import ( Unfold, + max, + min, sort, split, ) -__all__ = ['split', 'sort', 'Unfold'] +__all__ = ['split', 'sort', 'Unfold', 'min', 'max'] diff --git a/python/paddle/tensor/compat.py b/python/paddle/tensor/compat.py index ad7ec15d1cfae0..0c3c613a8b83e8 100644 --- a/python/paddle/tensor/compat.py +++ b/python/paddle/tensor/compat.py @@ -14,7 +14,7 @@ from __future__ import annotations -from typing import TYPE_CHECKING, NamedTuple +from typing import TYPE_CHECKING, Any, NamedTuple import paddle from paddle import _C_ops @@ -223,6 +223,10 @@ class SortRetType(NamedTuple): values: Tensor indices: Tensor +class MinMaxRetType(NamedTuple): + values: Tensor + indices: Tensor + def _check_out_status( out: Tensor | tuple[Tensor, Tensor] | list[Tensor], @@ -398,3 +402,148 @@ def to_list_if_necessary(x, size_check=False): dilations=to_list_if_necessary(self.dilations), name=self.name, ) +def _min_max_param_checker(func_name: str, *args: Any, **kwargs: Any): + def invalid_arguments_exception(error_prefix=""): + type_strs = [type(v).__name__ for v in args] + type_strs.extend([f"{k}={type(v).__name__}" for k, v in kwargs.items()]) + signature = ", ".join(type_strs) + + error_msg = ( + f"Invalid arguments for `paddle.compat.{func_name}`:\n{error_prefix}" + f"Got: (paddle.Tensor input, {signature}), but expect one of:\n" + f" - (input: paddle.Tensor) for reduce_{func_name} on all dims.\n" + f" - (input: paddle.Tensor, other: paddle.Tensor) -> see paddle.{func_name}imum\n" + f" - (input: paddle.Tensor, int dim (cannot be None), bool keepdim = False)\n" + ) + return TypeError(error_msg) + + def try_get_keys(key): + res = None + try: + res = kwargs[key] + except KeyError: + raise invalid_arguments_exception() from None + return res + found_key = None + + dim_or_other = None + keepdim = False + + num_args = len(args) + total_arg_num = num_args + len(kwargs) + if total_arg_num > 2: + raise invalid_arguments_exception() + elif total_arg_num == 2: + if num_args == 2: + dim_or_other, keepdim = args + if dim_or_other is None or isinstance( + dim_or_other, (Variable, paddle.pir.Value) + ): + raise invalid_arguments_exception() + elif num_args == 1: + dim_or_other = args[0] + if dim_or_other is None or isinstance( + dim_or_other, (Variable, paddle.pir.Value) + ): + raise invalid_arguments_exception() + keepdim = try_get_keys("keepdim") + else: + dim_or_other = try_get_keys("dim") + keepdim = try_get_keys("keepdim") + elif total_arg_num == 1: + if num_args: + dim_or_other = args[0] + if dim_or_other is None: + raise invalid_arguments_exception() + else: + if "dim" in kwargs: + dim_or_other = kwargs["dim"] + elif "other" in kwargs: + dim_or_other = kwargs["other"] + if not isinstance(dim_or_other, (Variable, paddle.pir.Value)): + raise invalid_arguments_exception() + if dim_or_other is None: + raise invalid_arguments_exception() + + if ( + dim_or_other is not None + and not isinstance(dim_or_other, (Variable, paddle.pir.Value)) + and type(dim_or_other) is not int + ): + raise invalid_arguments_exception( + f"The second input must be int or Tensor or implicit None in compat.min, but received {type(dim_or_other)}.\n" + ) + + return dim_or_other, keepdim + + +@forbid_keywords(['x', 'axis'], 'paddle.min') +def min(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType: + if not isinstance(input, paddle.pir.Value) and not isinstance( + input, paddle.Tensor + ): + raise TypeError( + f"input should be a tensor, but got an instance with type '{type(input).__name__}'" + ) + + dim_or_other, keepdim = _min_max_param_checker("min", *args, **kwargs) + + if dim_or_other is None: + return _C_ops.min(input, None, False) + elif isinstance(dim_or_other, int): + if input.place.is_gpu_place(): + vals, inds = _C_ops.min_with_index( + input, dim_or_other, keepdim, False + ) + inds.stop_gradient = True + return MinMaxRetType(values=vals, indices=inds) + else: + # CPUPlace and other placements are implemented by composition + indices = _C_ops.argmin( + input, dim_or_other, True, False, paddle.int64 + ) + values = _C_ops.take_along_axis(input, indices, dim_or_other) + if keepdim: + return MinMaxRetType(values=values, indices=indices) + return MinMaxRetType( + values=values.squeeze_(axis=dim_or_other), + indices=indices.squeeze_(axis=dim_or_other), + ) + else: + return _C_ops.minimum(input, dim_or_other) + + +@forbid_keywords(['x', 'axis'], 'paddle.max') +def max(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType: + if not isinstance(input, paddle.pir.Value) and not isinstance( + input, paddle.Tensor + ): + raise TypeError( + f"input should be a tensor, but got an instance with type '{type(input).__name__}'" + ) + + dim_or_other, keepdim = _min_max_param_checker("max", *args, **kwargs) + + if dim_or_other is None: + return _C_ops.max(input, None, False) + elif isinstance(dim_or_other, int): + if input.place.is_gpu_place(): + vals, inds = _C_ops.max_with_index( + input, dim_or_other, keepdim, False + ) + inds.stop_gradient = True + return MinMaxRetType(values=vals, indices=inds) + else: + # CPUPlace and other placements are implemented by composition + indices = _C_ops.argmax( + input, dim_or_other, True, False, paddle.int64 + ) + values = _C_ops.take_along_axis(input, indices, dim_or_other) + if keepdim: + return MinMaxRetType(values=values, indices=indices) + return MinMaxRetType( + values=values.squeeze_(axis=dim_or_other), + indices=indices.squeeze_(axis=dim_or_other), + ) + else: + return _C_ops.maximum(input, dim_or_other) From 6255ed9c6925ae7c2f6bcd015cb888058dad1bc3 Mon Sep 17 00:00:00 2001 From: Enigmatisms Date: Sat, 9 Aug 2025 17:16:21 +0000 Subject: [PATCH 07/24] [API-Compat] Add compat.min/max EN doc Attempting to fix integral type gradient computation (rejection) --- paddle/phi/kernels/gpu/reduce_kernel.cu | 53 ++++++- python/paddle/tensor/compat.py | 177 +++++++++++++++++++++++- 2 files changed, 226 insertions(+), 4 deletions(-) diff --git a/paddle/phi/kernels/gpu/reduce_kernel.cu b/paddle/phi/kernels/gpu/reduce_kernel.cu index bc4de24540346a..089cb3601f0a5b 100644 --- a/paddle/phi/kernels/gpu/reduce_kernel.cu +++ b/paddle/phi/kernels/gpu/reduce_kernel.cu @@ -13,6 +13,7 @@ // limitations under the License. #include "paddle/phi/kernels/reduce_kernel.h" +#include #include "paddle/phi/kernels/gpu/reduce_amin_amax_common.h" #include "paddle/phi/kernels/reduce_amin_grad_kernel.h" @@ -159,7 +160,15 @@ void ReduceAMaxGradKernel(const Context& dev_ctx, dev_ctx, x, out, out_grad, dims, keep_dim, reduce_all, x_grad); } -template +template +using EnableIfInteger = + typename std::enable_if::value, int>::type; + +template +using EnableIfNonInteger = + typename std::enable_if::value, int>::type; + +template = 0> void MinWithIndexGradKernel(const Context& dev_ctx, const DenseTensor& x, const DenseTensor& values, @@ -174,7 +183,25 @@ void MinWithIndexGradKernel(const Context& dev_ctx, dev_ctx, x, values, values_grad, {dim_val}, keepdims, flatten, x_grad); } -template +template = 0> +void MinWithIndexGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& values, + const DenseTensor& values_grad, + const Scalar& dim, + bool keepdims, + bool flatten, + DenseTensor* x_grad) { + std::string dtype_name = phi::DataTypeToString(x.dtype()); + PADDLE_ENFORCE_EQ( + 0, + 1, + phi::errors::InvalidArgument( + "Integer type '%s' is not allowed to have stop_gradient=False.", + dtype_name.c_str())); +} + +template = 0> void MaxWithIndexGradKernel(const Context& dev_ctx, const DenseTensor& x, const DenseTensor& values, @@ -189,6 +216,24 @@ void MaxWithIndexGradKernel(const Context& dev_ctx, dev_ctx, x, values, values_grad, {dim_val}, keepdims, flatten, x_grad); } +template = 0> +void MaxWithIndexGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& values, + const DenseTensor& values_grad, + const Scalar& dim, + bool keepdims, + bool flatten, + DenseTensor* x_grad) { + std::string dtype_name = phi::DataTypeToString(x.dtype()); + PADDLE_ENFORCE_EQ( + 0, + 1, + phi::errors::InvalidArgument( + "Integer type '%s' is not allowed to have stop_gradient=False.", + dtype_name.c_str())); +} + template void ReduceMaxGradKernel(const Context& dev_ctx, const DenseTensor& x, @@ -320,7 +365,9 @@ PD_REGISTER_KERNEL(max_with_index_grad, phi::MaxWithIndexGradKernel, float, double, + uint8_t, int, + int16_t, int64_t, phi::dtype::float16, phi::dtype::bfloat16) {} @@ -357,7 +404,9 @@ PD_REGISTER_KERNEL(min_with_index_grad, phi::MinWithIndexGradKernel, float, double, + uint8_t, int, + int16_t, int64_t, phi::dtype::float16, phi::dtype::bfloat16) {} diff --git a/python/paddle/tensor/compat.py b/python/paddle/tensor/compat.py index 0c3c613a8b83e8..c6cfdee988d34b 100644 --- a/python/paddle/tensor/compat.py +++ b/python/paddle/tensor/compat.py @@ -477,14 +477,108 @@ def try_get_keys(key): return dim_or_other, keepdim -@forbid_keywords(['x', 'axis'], 'paddle.min') +def _min_max_tensor_allow_grad(input: Tensor): + """Prevent integral input tensor type to have `stop_gradient=False`""" + in_dtype = input.dtype + if ( + in_dtype == paddle.int32 + or in_dtype == paddle.int64 + or in_dtype == paddle.uint8 + or in_dtype == paddle.int16 + ): + if not input.stop_gradient: + raise TypeError( + f"Tensors with integral type: '{in_dtype}' should stop gradient." + ) + + +@ForbidKeywordsDecorator( + illegal_keys=['x', 'axis'], + func_name="paddle.compat.min", + correct_name='paddle.min', +) def min(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType: + """ + + Computes the minimum of tensor elements. There are mainly 3 cases (functionalities): + 1. paddle.compat.min(input: Tensor): reduce min over all dims, return a single value Tensor + 2. paddle.compat.min(input: Tensor, dim: int (cannot be None), keepdim=False): reduce min over the given dim, + returns a named tuple MinMaxRetType(values: Tensor, indices: Tensor) + 3. paddle.compat.min(input: Tensor, other: Tensor): see `paddle.minimum` + + Note: If there are multiple minimum elements, this API evenly distributes gradient between these equal values, + following torch.min. The gradient behavior of `values` for case 2 is the same as `paddle.amin`. + + Args: + input (Tensor): A tensor, the data type is bfloat16, float16, float32, float64, int32, int64. + dim (int, optional): The dim along which the minimum is computed. + If this is not specified: see case 1, note that: `None` cannot be passed to this (TypeError will be thrown) + compute the minimum over all elements of `input` and return a Tensor with a single element, + otherwise must be in the range :math:`[-input.ndim, input.ndim)`. + If :math:`dim < 0`, the axis to reduce is :math:`input.ndim + dim`. + keepdim (bool, optional): Whether to reserve the reduced dimension in the + output Tensor. The result tensor will have one fewer dimension + than the `input` unless :attr:`keepdim` is true, default + value is False. Note that if `dim` does not appear in neither (*args) or (**kwargs), this parameter cannot be passed alone + other (Tensor, optional): the other tensor to perform `paddle.minimum` with. This Tensor should + have the same or broadcast-able shape as the `input`. Note that (`dim` & `keepdim`) and `other` are mutually exclusive + meaning that trying to composite both will result in TypeError + + Returns: + - For case 1: a single value Tensor (0-dim) + - For case 2: a named tuple MinMaxRetType(values: Tensor, indices: Tensor), `values` has the same data type as the `input`, + while indices is always an int64 Tensor, with exactly the same shape as `values`. + MinMaxRetType can be used (indexed, packed, unpacked) in the same way as a regular tuple + - For case 3: see `paddle.minimum` + + + Examples: + .. code-block:: python + + >>> import paddle + + >>> # data_x is a Tensor with shape [2, 4] + >>> # the axis is a int element + >>> x = paddle.to_tensor([[0.2, 0.3, 0.5, 0.9], + ... [0.1, 0.2, 0.6, 0.7]], + ... dtype='float64', stop_gradient=False) + >>> # Case 1: reduce over all dims + >>> result1 = paddle.compat.min(x) + >>> result1 + Tensor(shape=[], dtype=float64, place=Place(gpu:0), stop_gradient=False, + 0.10000000) + + >>> # Case 2: reduce over specified dim + >>> x.clear_grad() + >>> result2 = paddle.compat.min(x, dim=1) + >>> result2 + MinMaxRetType(values=Tensor(shape=[2], dtype=float64, place=Place(gpu:0), stop_gradient=False, + [0.20000000, 0.10000000]), indices=Tensor(shape=[2], dtype=int64, place=Place(gpu:0), stop_gradient=True, + [0, 0])) + >>> result2[0].backward() + >>> x.grad + Tensor(shape=[2, 4], dtype=float64, place=Place(gpu:0), stop_gradient=False, + [[1., 0., 0., 0.], + [1., 0., 0., 0.]]) + + >>> # Case 3: equivalent to `paddle.minimum` + >>> x.clear_grad() + >>> y = paddle.to_tensor([[0.5, 0.4, 0.1, 0.2], + ... [0.3, 0.1, 0.6, 0.7]], + ... dtype='float64', stop_gradient=False) + >>> result3 = paddle.compat.min(x, y) + >>> result3 + Tensor(shape=[2, 4], dtype=float64, place=Place(gpu:0), stop_gradient=False, + [[0.20000000, 0.30000000, 0.10000000, 0.20000000], + [0.10000000, 0.10000000, 0.60000000, 0.70000000]]) + """ if not isinstance(input, paddle.pir.Value) and not isinstance( input, paddle.Tensor ): raise TypeError( f"input should be a tensor, but got an instance with type '{type(input).__name__}'" ) + _min_max_tensor_allow_grad(input) dim_or_other, keepdim = _min_max_param_checker("min", *args, **kwargs) @@ -513,14 +607,93 @@ def min(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType: return _C_ops.minimum(input, dim_or_other) -@forbid_keywords(['x', 'axis'], 'paddle.max') +@ForbidKeywordsDecorator( + illegal_keys=['x', 'axis'], + func_name="paddle.compat.max", + correct_name='paddle.max', +) def max(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType: + """ + + Computes the maximum of tensor elements. There are mainly 3 cases (functionalities): + 1. paddle.compat.max(input: Tensor): reduce max over all dims, return a single value Tensor + 2. paddle.compat.max(input: Tensor, dim: int (cannot be None), keepdim=False): reduce max over the given dim, + returns a named tuple MinMaxRetType(values: Tensor, indices: Tensor) + 3. paddle.compat.max(input: Tensor, other: Tensor): see `paddle.maximum` + + Note: If there are multiple maximum elements, this API evenly distributes gradient between these equal values, + following torch.max. The gradient behavior of `values` for case 2 is the same as `paddle.amax`. + + Args: + input (Tensor): A tensor, the data type is bfloat16, float16, float32, float64, int32, int64. + dim (int, optional): The dim along which the maximum is computed. + If this is not specified: see case 1, note that: `None` cannot be passed to this (TypeError will be thrown) + compute the maximum over all elements of `input` and return a Tensor with a single element, + otherwise must be in the range :math:`[-input.ndim, input.ndim)`. + If :math:`dim < 0`, the axis to reduce is :math:`input.ndim + dim`. + keepdim (bool, optional): Whether to reserve the reduced dimension in the + output Tensor. The result tensor will have one fewer dimension + than the `input` unless :attr:`keepdim` is true, default + value is False. Note that if `dim` does not appear in neither (*args) or (**kwargs), this parameter cannot be passed alone + other (Tensor, optional): the other tensor to perform `paddle.maximum` with. This Tensor should + have the same or broadcast-able shape as the `input`. Note that (`dim` & `keepdim`) and `other` are mutually exclusive + meaning that trying to composite both will result in TypeError + + Returns: + - For case 1: a single value Tensor (0-dim) + - For case 2: a named tuple MinMaxRetType(values: Tensor, indices: Tensor), `values` has the same data type as the `input`, + while indices is always an int64 Tensor, with exactly the same shape as `values`. + MinMaxRetType can be used (indexed, packed, unpacked) in the same way as a regular tuple + - For case 3: see `paddle.maximum` + + + Examples: + .. code-block:: python + + >>> import paddle + + >>> # data_x is a Tensor with shape [2, 4] + >>> # the axis is a int element + >>> x = paddle.to_tensor([[0.2, 0.3, 0.5, 0.9], + ... [0.1, 0.2, 0.6, 0.7]], + ... dtype='float64', stop_gradient=False) + >>> # Case 1: reduce over all dims + >>> result1 = paddle.compat.max(x) + >>> result1 + Tensor(shape=[], dtype=float64, place=Place(gpu:0), stop_gradient=False, + 0.90000000) + + >>> # Case 2: reduce over specified dim + >>> x.clear_grad() + >>> result2 = paddle.compat.max(x, dim=1) + >>> result2 + MinMaxRetType(values=Tensor(shape=[2], dtype=float64, place=Place(gpu:0), stop_gradient=False, + [0.90000000, 0.70000000]), indices=Tensor(shape=[2], dtype=int64, place=Place(gpu:0), stop_gradient=True, + [3, 3])) + >>> result2[0].backward() + >>> x.grad + Tensor(shape=[2, 4], dtype=float64, place=Place(gpu:0), stop_gradient=False, + [[0., 0., 0., 1.], + [0., 0., 0., 1.]]) + + >>> # Case 3: equivalent to `paddle.maximum` + >>> x.clear_grad() + >>> y = paddle.to_tensor([[0.5, 0.4, 0.1, 0.2], + ... [0.3, 0.1, 0.6, 0.7]], + ... dtype='float64', stop_gradient=False) + >>> result3 = paddle.compat.max(x, y) + >>> result3 + Tensor(shape=[2, 4], dtype=float64, place=Place(gpu:0), stop_gradient=False, + [[0.50000000, 0.40000000, 0.50000000, 0.90000000], + [0.30000000, 0.20000000, 0.60000000, 0.70000000]]) + """ if not isinstance(input, paddle.pir.Value) and not isinstance( input, paddle.Tensor ): raise TypeError( f"input should be a tensor, but got an instance with type '{type(input).__name__}'" ) + _min_max_tensor_allow_grad(input) dim_or_other, keepdim = _min_max_param_checker("max", *args, **kwargs) From 6fa8807ed77c48646afd050783e51cd4e27dee4d Mon Sep 17 00:00:00 2001 From: Enigmatisms Date: Sun, 10 Aug 2025 04:47:00 +0000 Subject: [PATCH 08/24] [WIP][API-Compat] Add dyna-graph unittests for min/max --- .../pir/dialect/op_generator/op_build_gen.py | 1 + test/legacy_test/test_compat_minmax.py | 255 ++++++++++++++++++ 2 files changed, 256 insertions(+) create mode 100644 test/legacy_test/test_compat_minmax.py diff --git a/paddle/fluid/pir/dialect/op_generator/op_build_gen.py b/paddle/fluid/pir/dialect/op_generator/op_build_gen.py index f8510480b2fca4..60840cc60ec5e9 100644 --- a/paddle/fluid/pir/dialect/op_generator/op_build_gen.py +++ b/paddle/fluid/pir/dialect/op_generator/op_build_gen.py @@ -135,6 +135,7 @@ 'KthvalueInferMeta', 'MaxPoolWithIndexInferMeta', 'MaxPoolV2InferMeta', + 'MinMaxWithIndexInferMeta', 'MultinomialInferMeta', 'OverlapAddInferMeta', 'PadInferMeta', diff --git a/test/legacy_test/test_compat_minmax.py b/test/legacy_test/test_compat_minmax.py new file mode 100644 index 00000000000000..1db80522e7b7f5 --- /dev/null +++ b/test/legacy_test/test_compat_minmax.py @@ -0,0 +1,255 @@ +# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np + +import paddle + + +class TestCompatMinMax(unittest.TestCase): + def setUp(self): + """Make sure we are in a dynamic graph env""" + paddle.disable_static() + + def test_case1_simple_reduce_all(self): + data = paddle.to_tensor([[1.0, 2.0], [3.0, 4.0]], dtype='float32') + min_val = paddle.compat.min(data) + max_val = paddle.compat.max(data) + + self.assertAlmostEqual(min_val.item(), 1.0) + self.assertAlmostEqual(max_val.item(), 4.0) + + data = paddle.to_tensor( + [[1.0, 1.0], [2.0, 3.0]], dtype='float32', stop_gradient=False + ) + min_val = paddle.compat.min(data) + min_val.backward() + + expected_grad = np.array([[0.5, 0.5], [0.0, 0.0]]) + np.testing.assert_allclose(data.grad.numpy(), expected_grad) + + def test_case2_reduce_dim(self): + """Test dim/keepdim""" + data = paddle.to_tensor( + [[[5, 8], [2, 1]], [[7, 3], [9, 6]]], dtype='float32' + ) + + min_result = paddle.compat.min(data, dim=1) + self.assertEqual(min_result.values.shape, [2, 2]) + np.testing.assert_array_equal( + min_result.values.numpy(), np.array([[2, 1], [7, 3]]) + ) + np.testing.assert_array_equal( + min_result.indices.numpy(), np.array([[1, 1], [0, 0]]) + ) + + max_result = paddle.compat.max(data, dim=2) + self.assertEqual(max_result.values.shape, [2, 2]) + np.testing.assert_array_equal( + max_result.values.numpy(), np.array([[8, 2], [7, 9]]) + ) + np.testing.assert_array_equal( + max_result.indices.numpy(), np.array([[1, 0], [0, 0]]) + ) + + min_result_keep = paddle.compat.min(data, dim=0, keepdim=True) + self.assertEqual(min_result_keep.values.shape, [1, 2, 2]) + np.testing.assert_array_equal( + min_result_keep.values.numpy(), np.array([[[5, 3], [2, 1]]]) + ) + + min_result_neg = paddle.compat.min(data, dim=-2) + np.testing.assert_array_equal( + min_result_neg.values.numpy(), min_result.values.numpy() + ) + + def test_case2_grad(self): + data = paddle.to_tensor( + [[[1.0, 2.0], [1.0, 3.0]], [[4.0, 1.0], [5.0, 1.0]]], + dtype='float32', + stop_gradient=False, + ) + y = data * 2 + + min_result = paddle.compat.min(y, dim=2) + min_result.values.backward() + + expected_grad = np.array( + [[[2.0, 0.0], [2.0, 0.0]], [[0.0, 2.0], [0.0, 2.0]]] + ) + np.testing.assert_allclose(data.grad.numpy(), expected_grad, atol=1e-6) + + def test_case3_elementwise(self): + """minimum/maximum""" + x = paddle.to_tensor([[1, 5], [4, 2]], dtype='float32') + y = paddle.to_tensor([[3, 2], [1, 6]], dtype='float32') + + min_result = paddle.compat.min(x, y) + np.testing.assert_array_equal( + min_result.numpy(), np.array([[1, 2], [1, 2]]) + ) + + max_result = paddle.compat.max(x, y) + np.testing.assert_array_equal( + max_result.numpy(), np.array([[3, 5], [4, 6]]) + ) + + z = paddle.to_tensor([3, 4], dtype='float32') + broadcast_min = paddle.compat.min(x, z) + np.testing.assert_array_equal( + broadcast_min.numpy(), np.array([[1, 4], [3, 2]]) + ) + + def test_case3_grad(self): + x = paddle.to_tensor( + [[1.0, 2.0], [3.0, 4.0]], dtype=paddle.float16, stop_gradient=False + ) + y = paddle.to_tensor( + [[0.5, 2.5], [2.0, 3.5]], dtype=paddle.float16, stop_gradient=False + ) + + min_val = paddle.compat.min(x, y) + min_val.backward() + + expected_x_grad = np.array([[0.0, 1.0], [0.0, 0.0]]) + np.testing.assert_allclose(x.grad.numpy(), expected_x_grad) + + expected_y_grad = np.array([[1.0, 0.0], [1.0, 1.0]]) + np.testing.assert_allclose(y.grad.numpy(), expected_y_grad) + + def test_edge_cases(self): + """Edge cases test""" + # uniform distributed gradient + uniform_data = paddle.ones([2, 3], dtype='float64') + uniform_data.stop_gradient = False + min_val = paddle.compat.min(uniform_data, 0) + min_val.values.sum().backward() + + expected_grad = np.full((2, 3), 0.5) + np.testing.assert_allclose(uniform_data.grad.numpy(), expected_grad) + + # 0-dim tensor + dim0_tensor = paddle.to_tensor(2, dtype='float32') + max_val = paddle.compat.max(dim0_tensor) + np.testing.assert_allclose( + max_val.numpy(), np.array(2.0, dtype=np.float32) + ) + + # 1-dim tensor + dim1_tensor = paddle.to_tensor([1], dtype='uint8') + max_val = paddle.compat.max(dim1_tensor, dim=-1, keepdim=True) + np.testing.assert_array_equal( + max_val[0].numpy(), np.array([1], dtype=np.uint8) + ) + np.testing.assert_array_equal( + max_val[1].numpy(), np.array([0], dtype=np.int64) + ) + + def test_compare_with_index_ops_to_origin(self): + dtypes = ['float32', 'float64', 'bfloat16', 'float16', 'int32', 'int64'] + + for i, dtype in enumerate(dtypes): + data = paddle.to_tensor([[1, 2, 3], [4, 5, 6]], dtype=dtype) + min_vals_inds = paddle.compat.min(data, dim=0) + self.assertEqual(min_vals_inds.values.dtype, data.dtype) + self.assertEqual(min_vals_inds.indices.dtype, paddle.int64) + + origin_values = paddle.min(data, axis=0) + origin_indices = paddle.argmin(data, axis=0, dtype="int64") + if i < 4: # floating point + np.testing.assert_allclose( + min_vals_inds.values.numpy(), origin_values.numpy() + ) + else: + np.testing.assert_array_equal( + min_vals_inds.values.numpy(), origin_values.numpy() + ) + np.testing.assert_array_equal( + min_vals_inds[1].numpy(), origin_indices.numpy() + ) + + def test_error_handling(self): + """Test whether correct exception will be thrown. Skip error messages (some of them are long)""" + + err_msg1 = ( + "Tensors with integral type: 'paddle.int32' should stop gradient." + ) + + # empty tensor + empty_tensor = paddle.to_tensor([], dtype='float32') + with self.assertRaises(ValueError): + paddle.compat.min(empty_tensor) + + # mixed parameters case 1 + input_ts = paddle.to_tensor([1, 2, 3], dtype='float32') + other_ts = paddle.to_tensor([1]) + with self.assertRaises(TypeError): + paddle.compat.min(input_ts, other=other_ts, dim=0) + + # mixed parameters case 2 + with self.assertRaises(TypeError): + paddle.compat.min(input_ts, 0, other=other_ts) + + # trying to perform grad ops for integral types + with self.assertRaises(TypeError) as cm: + tensor = paddle.ones([2, 2], dtype=paddle.int32) + tensor.stop_gradient = False + tensors = paddle.compat.max(tensor, dim=0) + self.assertEqual(str(cm.exception), err_msg1) + + # explicit None case 1 + with self.assertRaises(TypeError) as cm: + paddle.compat.min(input_ts, dim=None) + + # explicit None case 2 + with self.assertRaises(TypeError) as cm: + paddle.compat.min(input_ts, None, keepdim=True) + + # keepdim specified without specifying dim + with self.assertRaises(TypeError) as cm: + paddle.compat.min(input_ts, keepdim=True) + + # Wrong *args specification case 1 + with self.assertRaises(TypeError) as cm: + paddle.compat.min(input_ts, False) + + # Wrong *args specification case 2 + with self.assertRaises(TypeError) as cm: + paddle.compat.min(input_ts, other_ts, True) + + # Tensor input for dim case 1 + with self.assertRaises(TypeError) as cm: + paddle.compat.min(input_ts, dim=paddle.to_tensor([0])) + + # Tensor input for dim case 2 + with self.assertRaises(TypeError) as cm: + paddle.compat.min(input_ts, dim=paddle.to_tensor(0)) + + # Duplicate Arguments case 1 + with self.assertRaises(TypeError) as cm: + paddle.compat.max(input_ts, 0, dim=0) + + # Duplicate Arguments case 2 + with self.assertRaises(TypeError) as cm: + paddle.compat.max(input_ts, other_ts, other=0) + + # Duplicate Arguments case 3 + with self.assertRaises(TypeError) as cm: + paddle.compat.max(input_ts, dim=0, other=0, keepdim=True) + + +if __name__ == '__main__': + unittest.main() From adb4c253ac77956757ae77bad48dfa1f348e3761 Mon Sep 17 00:00:00 2001 From: Enigmatisms Date: Sun, 10 Aug 2025 08:19:17 +0000 Subject: [PATCH 09/24] [WIP][API-Compat] Fixed CPU failure --- python/paddle/tensor/compat.py | 12 ++++-------- python/paddle/tensor/math.py | 12 ++++++++++++ test/legacy_test/test_compat_minmax.py | 18 ++++++++++++++++++ 3 files changed, 34 insertions(+), 8 deletions(-) diff --git a/python/paddle/tensor/compat.py b/python/paddle/tensor/compat.py index c6cfdee988d34b..9df69ca7e24d28 100644 --- a/python/paddle/tensor/compat.py +++ b/python/paddle/tensor/compat.py @@ -593,10 +593,8 @@ def min(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType: return MinMaxRetType(values=vals, indices=inds) else: # CPUPlace and other placements are implemented by composition - indices = _C_ops.argmin( - input, dim_or_other, True, False, paddle.int64 - ) - values = _C_ops.take_along_axis(input, indices, dim_or_other) + indices = paddle.argmin(input, axis=dim_or_other, keepdim=True) + values = paddle.take_along_axis(input, indices, axis=dim_or_other) if keepdim: return MinMaxRetType(values=values, indices=indices) return MinMaxRetType( @@ -708,10 +706,8 @@ def max(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType: return MinMaxRetType(values=vals, indices=inds) else: # CPUPlace and other placements are implemented by composition - indices = _C_ops.argmax( - input, dim_or_other, True, False, paddle.int64 - ) - values = _C_ops.take_along_axis(input, indices, dim_or_other) + indices = paddle.argmax(input, axis=dim_or_other, keepdim=True) + values = paddle.take_along_axis(input, indices, axis=dim_or_other) if keepdim: return MinMaxRetType(values=values, indices=indices) return MinMaxRetType( diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py index 1f84b1d6067e4f..da5eedfe992c26 100644 --- a/python/paddle/tensor/math.py +++ b/python/paddle/tensor/math.py @@ -110,6 +110,8 @@ from paddle import Tensor from paddle._typing import DTypeLike +from paddle.utils.decorator_utils import ForbidKeywordsDecorator + __all__ = [] _supported_int_dtype_ = [ @@ -3272,6 +3274,11 @@ def _check_input(x): return out +@ForbidKeywordsDecorator( + illegal_keys=["input", "dim", "other"], + func_name="paddle.max", + correct_name="paddle.compat.max", +) def max( x: Tensor, axis: int | Sequence[int] | None = None, @@ -3431,6 +3438,11 @@ def max( return out +@ForbidKeywordsDecorator( + illegal_keys=["input", "dim", "other"], + func_name="paddle.min", + correct_name="paddle.compat.min", +) def min( x: Tensor, axis: int | Sequence[int] | None = None, diff --git a/test/legacy_test/test_compat_minmax.py b/test/legacy_test/test_compat_minmax.py index 1db80522e7b7f5..5adf80fecbf5d8 100644 --- a/test/legacy_test/test_compat_minmax.py +++ b/test/legacy_test/test_compat_minmax.py @@ -187,6 +187,14 @@ def test_error_handling(self): err_msg1 = ( "Tensors with integral type: 'paddle.int32' should stop gradient." ) + err_msg2 = ( + "paddle.min() received unexpected keyword arguments 'input', 'dim'. " + "\nDid you mean to use paddle.compat.min() instead?" + ) + err_msg3 = ( + "paddle.compat.max() received unexpected keyword argument 'axis'. " + "\nDid you mean to use paddle.max() instead?" + ) # empty tensor empty_tensor = paddle.to_tensor([], dtype='float32') @@ -250,6 +258,16 @@ def test_error_handling(self): with self.assertRaises(TypeError) as cm: paddle.compat.max(input_ts, dim=0, other=0, keepdim=True) + # Wrong API used case 1 + with self.assertRaises(TypeError) as cm: + paddle.min(input=input_ts, dim=0) + self.assertEqual(str(cm.exception), err_msg2) + + # Wrong API used case 2 + with self.assertRaises(TypeError) as cm: + paddle.compat.max(input_ts, axis=0) + self.assertEqual(str(cm.exception), err_msg3) + if __name__ == '__main__': unittest.main() From fd6adf01e125cc59cea26b517b765959f95001ad Mon Sep 17 00:00:00 2001 From: Enigmatisms Date: Sun, 10 Aug 2025 16:35:43 +0000 Subject: [PATCH 10/24] [API-Compat] Correct min/max_with index gradient behavior --- .../gpu/min_max_with_index_grad_kernel.cu | 115 ++++++++++++++++++ .../kernels/gpu/min_max_with_index_kernel.cu | 12 +- paddle/phi/kernels/gpu/reduce_kernel.cu | 100 --------------- .../min_max_with_index_grad_kernel.h.h | 42 ------- .../phi/kernels/min_max_with_index_kernel.h | 8 +- paddle/phi/ops/yaml/backward.yaml | 8 +- paddle/phi/ops/yaml/ops.yaml | 4 +- python/paddle/tensor/compat.py | 23 +++- test/legacy_test/test_compat_minmax.py | 49 +++++++- 9 files changed, 195 insertions(+), 166 deletions(-) create mode 100644 paddle/phi/kernels/gpu/min_max_with_index_grad_kernel.cu delete mode 100644 paddle/phi/kernels/min_max_with_index_grad_kernel.h.h diff --git a/paddle/phi/kernels/gpu/min_max_with_index_grad_kernel.cu b/paddle/phi/kernels/gpu/min_max_with_index_grad_kernel.cu new file mode 100644 index 00000000000000..f34d03bf07e506 --- /dev/null +++ b/paddle/phi/kernels/gpu/min_max_with_index_grad_kernel.cu @@ -0,0 +1,115 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/common/place.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/core/utils/data_type.h" +#include "paddle/phi/kernels/funcs/gather_scatter_functor.h" +#include "paddle/phi/kernels/funcs/math_function.h" + +namespace phi { + +template +using EnableIfInteger = + typename std::enable_if::value, int>::type; + +template +using EnableIfNonInteger = + typename std::enable_if::value, int>::type; + +// Here if keepdim=True, this will fallback to a simplified version of +// take_along_axis. However, if keepdim=False (by default), indices will +// not have equal rank will the input values (and values_grad), therefore +// needs an unsqueeze operation by shallow copying indices and Resize +#define DEFINE_WITH_INDEX_GRAD_KERNEL(OpType) \ + template = 0> \ + void OpType##WithIndexGradKernel(const Context& dev_ctx, \ + const DenseTensor& x, \ + const DenseTensor& values, \ + const DenseTensor& indices, \ + const DenseTensor& values_grad, \ + const Scalar& dim, \ + bool keepdim, \ + DenseTensor* x_grad) { \ + x_grad->Resize(x.dims()); \ + dev_ctx.template Alloc(x_grad); \ + if (x_grad->numel() == 0) { \ + return; \ + } \ + int64_t dim_val = dim.to(); \ + if (dim_val < 0) { \ + dim_val += x.dims().size(); \ + } \ + DenseTensor shallow_copied_inds(indices); \ + if (!keepdim) { \ + auto indices_dim = x.dims(); \ + indices_dim[dim_val] = 1; \ + shallow_copied_inds.Resize(indices_dim); \ + } \ + phi::funcs::SetConstant functor; \ + functor(dev_ctx, x_grad, static_cast(0)); \ + phi::funcs::gpu_scatter_add_kernel( \ + *x_grad, dim_val, shallow_copied_inds, values_grad, true, dev_ctx); \ + } \ + template = 0> \ + void OpType##WithIndexGradKernel(const Context& dev_ctx, \ + const DenseTensor& x, \ + const DenseTensor& values, \ + const DenseTensor& indices, \ + const DenseTensor& values_grad, \ + const Scalar& dim, \ + bool keepdim, \ + DenseTensor* x_grad) { \ + std::string dtype_name = phi::DataTypeToString(values.dtype()); \ + PADDLE_ENFORCE_EQ( \ + 0, \ + 1, \ + phi::errors::InvalidArgument( \ + "Integer type '%s' is not allowed to have stop_gradient=False.", \ + dtype_name.c_str())); \ + } + +DEFINE_WITH_INDEX_GRAD_KERNEL(Max) +DEFINE_WITH_INDEX_GRAD_KERNEL(Min) + +#undef DEFINE_WITH_INDEX_GRAD_KERNEL + +} // namespace phi + +PD_REGISTER_KERNEL(max_with_index_grad, + GPU, + ALL_LAYOUT, + phi::MaxWithIndexGradKernel, + float, + double, + uint8_t, + int, + int16_t, + int64_t, + phi::dtype::float16, + phi::dtype::bfloat16) {} + +PD_REGISTER_KERNEL(min_with_index_grad, + GPU, + ALL_LAYOUT, + phi::MinWithIndexGradKernel, + float, + double, + uint8_t, + int, + int16_t, + int64_t, + phi::dtype::float16, + phi::dtype::bfloat16) {} diff --git a/paddle/phi/kernels/gpu/min_max_with_index_kernel.cu b/paddle/phi/kernels/gpu/min_max_with_index_kernel.cu index 57699f2f97e83e..2509c34fb0c8fd 100644 --- a/paddle/phi/kernels/gpu/min_max_with_index_kernel.cu +++ b/paddle/phi/kernels/gpu/min_max_with_index_kernel.cu @@ -254,25 +254,25 @@ void MinMaxWithIndexOpCUDAKernel(const Context& dev_ctx, template void MinWithIndexKernel(const Context& dev_ctx, const DenseTensor& x, - const Scalar& axis, - bool keepdims, + const Scalar& dim, + bool keepdim, bool flatten, DenseTensor* val_out, DenseTensor* ind_out) { MinMaxWithIndexOpCUDAKernel( - dev_ctx, x, axis, keepdims, flatten, val_out, ind_out); + dev_ctx, x, dim, keepdim, flatten, val_out, ind_out); } template void MaxWithIndexKernel(const Context& dev_ctx, const DenseTensor& x, - const Scalar& axis, - bool keepdims, + const Scalar& dim, + bool keepdim, bool flatten, DenseTensor* val_out, DenseTensor* ind_out) { MinMaxWithIndexOpCUDAKernel( - dev_ctx, x, axis, keepdims, flatten, val_out, ind_out); + dev_ctx, x, dim, keepdim, flatten, val_out, ind_out); } #endif diff --git a/paddle/phi/kernels/gpu/reduce_kernel.cu b/paddle/phi/kernels/gpu/reduce_kernel.cu index 089cb3601f0a5b..3f55297474015c 100644 --- a/paddle/phi/kernels/gpu/reduce_kernel.cu +++ b/paddle/phi/kernels/gpu/reduce_kernel.cu @@ -160,80 +160,6 @@ void ReduceAMaxGradKernel(const Context& dev_ctx, dev_ctx, x, out, out_grad, dims, keep_dim, reduce_all, x_grad); } -template -using EnableIfInteger = - typename std::enable_if::value, int>::type; - -template -using EnableIfNonInteger = - typename std::enable_if::value, int>::type; - -template = 0> -void MinWithIndexGradKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& values, - const DenseTensor& values_grad, - const Scalar& dim, - bool keepdims, - bool flatten, - DenseTensor* x_grad) { - int64_t dim_val = dim.to(); - flatten = recompute_reduce_all(x, {dim_val}, flatten); - ReduceCudaAMaxAMinGrad( - dev_ctx, x, values, values_grad, {dim_val}, keepdims, flatten, x_grad); -} - -template = 0> -void MinWithIndexGradKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& values, - const DenseTensor& values_grad, - const Scalar& dim, - bool keepdims, - bool flatten, - DenseTensor* x_grad) { - std::string dtype_name = phi::DataTypeToString(x.dtype()); - PADDLE_ENFORCE_EQ( - 0, - 1, - phi::errors::InvalidArgument( - "Integer type '%s' is not allowed to have stop_gradient=False.", - dtype_name.c_str())); -} - -template = 0> -void MaxWithIndexGradKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& values, - const DenseTensor& values_grad, - const Scalar& dim, - bool keepdims, - bool flatten, - DenseTensor* x_grad) { - int64_t dim_val = dim.to(); - flatten = recompute_reduce_all(x, {dim_val}, flatten); - ReduceCudaAMaxAMinGrad( - dev_ctx, x, values, values_grad, {dim_val}, keepdims, flatten, x_grad); -} - -template = 0> -void MaxWithIndexGradKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& values, - const DenseTensor& values_grad, - const Scalar& dim, - bool keepdims, - bool flatten, - DenseTensor* x_grad) { - std::string dtype_name = phi::DataTypeToString(x.dtype()); - PADDLE_ENFORCE_EQ( - 0, - 1, - phi::errors::InvalidArgument( - "Integer type '%s' is not allowed to have stop_gradient=False.", - dtype_name.c_str())); -} - template void ReduceMaxGradKernel(const Context& dev_ctx, const DenseTensor& x, @@ -359,19 +285,6 @@ PD_REGISTER_KERNEL(max_grad, phi::dtype::float16, phi::dtype::bfloat16) {} -PD_REGISTER_KERNEL(max_with_index_grad, - GPU, - ALL_LAYOUT, - phi::MaxWithIndexGradKernel, - float, - double, - uint8_t, - int, - int16_t, - int64_t, - phi::dtype::float16, - phi::dtype::bfloat16) {} - PD_REGISTER_KERNEL(mean_grad, GPU, ALL_LAYOUT, @@ -398,19 +311,6 @@ PD_REGISTER_KERNEL(min_grad, phi::dtype::float16, phi::dtype::bfloat16) {} -PD_REGISTER_KERNEL(min_with_index_grad, - GPU, - ALL_LAYOUT, - phi::MinWithIndexGradKernel, - float, - double, - uint8_t, - int, - int16_t, - int64_t, - phi::dtype::float16, - phi::dtype::bfloat16) {} - PD_REGISTER_KERNEL(sum_grad, GPU, ALL_LAYOUT, diff --git a/paddle/phi/kernels/min_max_with_index_grad_kernel.h.h b/paddle/phi/kernels/min_max_with_index_grad_kernel.h.h deleted file mode 100644 index 30c4fd34bb281e..00000000000000 --- a/paddle/phi/kernels/min_max_with_index_grad_kernel.h.h +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#pragma once - -#include "paddle/phi/common/scalar.h" -#include "paddle/phi/core/dense_tensor.h" - -namespace phi { - -template -void MaxWithIndexGradKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& values, - const DenseTensor& values_grad, - const Scalar& dim, - bool keepdims, - bool flatten, - DenseTensor* x_grad); - -template -void MinWithIndexGradKernel(const Context& dev_ctx, - const DenseTensor& x, - const DenseTensor& values, - const DenseTensor& values_grad, - const Scalar& dim, - bool keepdims, - bool flatten, - DenseTensor* x_grad); - -} // namespace phi diff --git a/paddle/phi/kernels/min_max_with_index_kernel.h b/paddle/phi/kernels/min_max_with_index_kernel.h index 2e5ad70feaaec4..eca50fc3a752e8 100644 --- a/paddle/phi/kernels/min_max_with_index_kernel.h +++ b/paddle/phi/kernels/min_max_with_index_kernel.h @@ -22,8 +22,8 @@ namespace phi { template void MinWithIndexKernel(const Context& dev_ctx, const DenseTensor& x, - const Scalar& axis, - bool keepdims, + const Scalar& dim, + bool keepdim, bool flatten, DenseTensor* val_out, DenseTensor* ind_out); @@ -31,8 +31,8 @@ void MinWithIndexKernel(const Context& dev_ctx, template void MaxWithIndexKernel(const Context& dev_ctx, const DenseTensor& x, - const Scalar& axis, - bool keepdims, + const Scalar& dim, + bool keepdim, bool flatten, DenseTensor* val_out, DenseTensor* ind_out); diff --git a/paddle/phi/ops/yaml/backward.yaml b/paddle/phi/ops/yaml/backward.yaml index f75e509f6aaa22..154b99e557fabf 100644 --- a/paddle/phi/ops/yaml/backward.yaml +++ b/paddle/phi/ops/yaml/backward.yaml @@ -2278,8 +2278,8 @@ func : max_pool3d_with_index_grad - backward_op : max_with_index_grad - forward : max_with_index (Tensor x, Scalar axis, bool keepdims, bool flatten) -> Tensor(values), Tensor(indices) - args : (Tensor x, Tensor values, Tensor values_grad, Scalar axis, bool keepdims, bool flatten) + forward : max_with_index (Tensor x, Scalar dim, bool keepdim, bool flatten) -> Tensor(values), Tensor(indices) + args : (Tensor x, Tensor values, Tensor indices, Tensor values_grad, Scalar dim, bool keepdim) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta @@ -2351,8 +2351,8 @@ data_type : out_grad - backward_op : min_with_index_grad - forward : min_with_index (Tensor x, Scalar axis, bool keepdims, bool flatten) -> Tensor(values), Tensor(indices) - args : (Tensor x, Tensor values, Tensor values_grad, Scalar axis, bool keepdims, bool flatten) + forward : min_with_index (Tensor x, Scalar dim, bool keepdim, bool flatten) -> Tensor(values), Tensor(indices) + args : (Tensor x, Tensor values, Tensor indices, Tensor values_grad, Scalar dim, bool keepdim) output : Tensor(x_grad) infer_meta : func : UnchangedInferMeta diff --git a/paddle/phi/ops/yaml/ops.yaml b/paddle/phi/ops/yaml/ops.yaml index 9d84600531a792..78dcca6d579589 100644 --- a/paddle/phi/ops/yaml/ops.yaml +++ b/paddle/phi/ops/yaml/ops.yaml @@ -3578,7 +3578,7 @@ interfaces : paddle::dialect::InferSymbolicShapeInterface - op : max_with_index - args : (Tensor x, Scalar(int64_t) axis, bool keepdims = false, bool flatten = false) + args : (Tensor x, Scalar(int64_t) dim, bool keepdim = false, bool flatten = false) output : Tensor(values), Tensor(indices) infer_meta : func : MinMaxWithIndexInferMeta @@ -3698,7 +3698,7 @@ interfaces : paddle::dialect::InferSymbolicShapeInterface - op : min_with_index - args : (Tensor x, Scalar(int64_t) axis, bool keepdims = false, bool flatten = false) + args : (Tensor x, Scalar(int64_t) dim, bool keepdim = false, bool flatten = false) output : Tensor(values), Tensor(indices) infer_meta : func : MinMaxWithIndexInferMeta diff --git a/python/paddle/tensor/compat.py b/python/paddle/tensor/compat.py index 9df69ca7e24d28..2ff268d916a2d6 100644 --- a/python/paddle/tensor/compat.py +++ b/python/paddle/tensor/compat.py @@ -492,6 +492,21 @@ def _min_max_tensor_allow_grad(input: Tensor): ) +def _min_max_allow_cpu_composite(input: Tensor): + """paddle.min/argmin(max/argmax), paddle.take_along_axis reject the following types""" + in_dtype = input.dtype + if ( + in_dtype == paddle.float16 + or in_dtype == paddle.bfloat16 + or in_dtype == paddle.int16 + ): + if not input.place.is_gpu_place(): + raise TypeError( + f"Non-CUDA GPU placed Tensor does not have '{in_dtype}' op registered.\n" + "Paddle support following DataTypes: int32, int64, float64, float32, uint8" + ) + + @ForbidKeywordsDecorator( illegal_keys=['x', 'axis'], func_name="paddle.compat.min", @@ -510,7 +525,8 @@ def min(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType: following torch.min. The gradient behavior of `values` for case 2 is the same as `paddle.amin`. Args: - input (Tensor): A tensor, the data type is bfloat16, float16, float32, float64, int32, int64. + input (Tensor): A tensor, the data type is bfloat16, float16, float32, float64, int32, int64 on GPU. + uint8, int32, int64, float32, float64 are allowed on CPU. dim (int, optional): The dim along which the minimum is computed. If this is not specified: see case 1, note that: `None` cannot be passed to this (TypeError will be thrown) compute the minimum over all elements of `input` and return a Tensor with a single element, @@ -592,6 +608,7 @@ def min(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType: inds.stop_gradient = True return MinMaxRetType(values=vals, indices=inds) else: + _min_max_allow_cpu_composite(input) # CPUPlace and other placements are implemented by composition indices = paddle.argmin(input, axis=dim_or_other, keepdim=True) values = paddle.take_along_axis(input, indices, axis=dim_or_other) @@ -623,7 +640,8 @@ def max(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType: following torch.max. The gradient behavior of `values` for case 2 is the same as `paddle.amax`. Args: - input (Tensor): A tensor, the data type is bfloat16, float16, float32, float64, int32, int64. + input (Tensor): A tensor, the data type is bfloat16, float16, float32, float64, int32, int64 on GPU. + uint8, int32, int64, float32, float64 are allowed on CPU. dim (int, optional): The dim along which the maximum is computed. If this is not specified: see case 1, note that: `None` cannot be passed to this (TypeError will be thrown) compute the maximum over all elements of `input` and return a Tensor with a single element, @@ -705,6 +723,7 @@ def max(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType: inds.stop_gradient = True return MinMaxRetType(values=vals, indices=inds) else: + _min_max_allow_cpu_composite(input) # CPUPlace and other placements are implemented by composition indices = paddle.argmax(input, axis=dim_or_other, keepdim=True) values = paddle.take_along_axis(input, indices, axis=dim_or_other) diff --git a/test/legacy_test/test_compat_minmax.py b/test/legacy_test/test_compat_minmax.py index 5adf80fecbf5d8..4bf3f7ebed52f6 100644 --- a/test/legacy_test/test_compat_minmax.py +++ b/test/legacy_test/test_compat_minmax.py @@ -92,6 +92,15 @@ def test_case2_grad(self): ) np.testing.assert_allclose(data.grad.numpy(), expected_grad, atol=1e-6) + data.clear_grad() + y = data * data + min_result = paddle.compat.min(y, dim=1) + min_result[0].backward() + expected_grad = np.array( + [[[2.0, 4.0], [0.0, 0.0]], [[8.0, 2.0], [0.0, 0.0]]] + ) + np.testing.assert_allclose(data.grad.numpy(), expected_grad, atol=1e-6) + def test_case3_elementwise(self): """minimum/maximum""" x = paddle.to_tensor([[1, 5], [4, 2]], dtype='float32') @@ -115,10 +124,10 @@ def test_case3_elementwise(self): def test_case3_grad(self): x = paddle.to_tensor( - [[1.0, 2.0], [3.0, 4.0]], dtype=paddle.float16, stop_gradient=False + [[1.0, 2.0], [3.0, 4.0]], dtype=paddle.float32, stop_gradient=False ) y = paddle.to_tensor( - [[0.5, 2.5], [2.0, 3.5]], dtype=paddle.float16, stop_gradient=False + [[0.5, 2.5], [2.0, 3.5]], dtype=paddle.float32, stop_gradient=False ) min_val = paddle.compat.min(x, y) @@ -135,10 +144,17 @@ def test_edge_cases(self): # uniform distributed gradient uniform_data = paddle.ones([2, 3], dtype='float64') uniform_data.stop_gradient = False + min_val = paddle.compat.min(uniform_data) + min_val.sum().backward() + # uniformly distributed (amin) + expected_grad = np.full((2, 3), 1.0 / 6.0) + np.testing.assert_allclose(uniform_data.grad.numpy(), expected_grad) + + uniform_data.clear_grad() min_val = paddle.compat.min(uniform_data, 0) min_val.values.sum().backward() - - expected_grad = np.full((2, 3), 0.5) + # take_along_axis like gradient behavior + expected_grad = np.array([[1.0, 1.0, 1.0], [0.0, 0.0, 0.0]]) np.testing.assert_allclose(uniform_data.grad.numpy(), expected_grad) # 0-dim tensor @@ -159,16 +175,26 @@ def test_edge_cases(self): ) def test_compare_with_index_ops_to_origin(self): - dtypes = ['float32', 'float64', 'bfloat16', 'float16', 'int32', 'int64'] + dtypes = ['float32', 'float64', 'int32', 'int64', 'uint8'] + cpu_reject_types = {'int16', 'bfloat16', 'float16'} for i, dtype in enumerate(dtypes): data = paddle.to_tensor([[1, 2, 3], [4, 5, 6]], dtype=dtype) + # `bfloat16` and `float16` are rejected on CPU + if not data.place.is_gpu_place() and dtype in cpu_reject_types: + continue min_vals_inds = paddle.compat.min(data, dim=0) self.assertEqual(min_vals_inds.values.dtype, data.dtype) self.assertEqual(min_vals_inds.indices.dtype, paddle.int64) - origin_values = paddle.min(data, axis=0) origin_indices = paddle.argmin(data, axis=0, dtype="int64") + if dtype != 'uint8': + origin_values = paddle.min(data, axis=0) + else: + origin_values = paddle.take_along_axis( + data, origin_indices.unsqueeze(0), axis=0 + ) + origin_values.squeeze_(axis=0) if i < 4: # floating point np.testing.assert_allclose( min_vals_inds.values.numpy(), origin_values.numpy() @@ -195,6 +221,10 @@ def test_error_handling(self): "paddle.compat.max() received unexpected keyword argument 'axis'. " "\nDid you mean to use paddle.max() instead?" ) + err_msg4 = ( + "Non-CUDA GPU placed Tensor does not have 'paddle.float16' op registered.\n" + "Paddle support following DataTypes: int32, int64, float64, float32, uint8" + ) # empty tensor empty_tensor = paddle.to_tensor([], dtype='float32') @@ -268,6 +298,13 @@ def test_error_handling(self): paddle.compat.max(input_ts, axis=0) self.assertEqual(str(cm.exception), err_msg3) + # Rejected on CPU types + with self.assertRaises(TypeError) as cm: + tensor = paddle.to_tensor([1, 2, 3], dtype="float16") + cpu_tensor = tensor.to("cpu") + paddle.compat.max(cpu_tensor, dim=0) + self.assertEqual(str(cm.exception), err_msg4) + if __name__ == '__main__': unittest.main() From 30815564cb95b3f57acf7b4a948d723a0d6e1879 Mon Sep 17 00:00:00 2001 From: Enigmatisms Date: Mon, 11 Aug 2025 02:01:36 +0000 Subject: [PATCH 11/24] [API-Compat] XPU fix (attempt) --- python/paddle/tensor/compat.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/python/paddle/tensor/compat.py b/python/paddle/tensor/compat.py index 2ff268d916a2d6..150e10dc27485a 100644 --- a/python/paddle/tensor/compat.py +++ b/python/paddle/tensor/compat.py @@ -599,7 +599,11 @@ def min(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType: dim_or_other, keepdim = _min_max_param_checker("min", *args, **kwargs) if dim_or_other is None: - return _C_ops.min(input, None, False) + if input.numel() == 0: + raise ValueError( + "Reduce max cannot apply on empty tensor (numel == 0)" + ) + return paddle.amin(input) elif isinstance(dim_or_other, int): if input.place.is_gpu_place(): vals, inds = _C_ops.min_with_index( @@ -714,7 +718,11 @@ def max(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType: dim_or_other, keepdim = _min_max_param_checker("max", *args, **kwargs) if dim_or_other is None: - return _C_ops.max(input, None, False) + if input.numel() == 0: + raise ValueError( + "Reduce max cannot apply on empty tensor (numel == 0)" + ) + return paddle.amax(input) elif isinstance(dim_or_other, int): if input.place.is_gpu_place(): vals, inds = _C_ops.max_with_index( From cd8d6aec949fa67aad4671685bd2b9ca90489e00 Mon Sep 17 00:00:00 2001 From: Enigmatisms Date: Mon, 11 Aug 2025 02:35:15 +0000 Subject: [PATCH 12/24] [API-Compat] Updated ForbidKeywordsDecorator --- python/paddle/utils/decorator_utils.py | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/python/paddle/utils/decorator_utils.py b/python/paddle/utils/decorator_utils.py index 8f0c55e38caf5c..a9bee20a6efae8 100644 --- a/python/paddle/utils/decorator_utils.py +++ b/python/paddle/utils/decorator_utils.py @@ -247,32 +247,6 @@ def wrapper(*args: _InputT.args, **kwargs: _InputT.kwargs) -> _RetT: return decorator -# *size => shape decorator -class SizeArgsDecorator(DecoratorBase): - """ - Usage Example: - - paddle.ones(1, dtype=paddle.float32) - paddle.ones(1, 2, 3, dtype=paddle.float32) - paddle.ones([1, 2, 3], dtype=paddle.float32) - paddle.ones(size=[1, 2, 3], dtype=paddle.float32) - - paddle.ones([1, 2, 3], paddle.float32) - paddle.ones(shape=[1, 2, 3], dtype=paddle.float32) - """ - - def process( - self, args: tuple[Any, ...], kwargs: dict[str, Any] - ) -> tuple[tuple[Any, ...], dict[str, Any]]: - if 'size' in kwargs: - kwargs['shape'] = kwargs.pop('size') - elif len(args) >= 1 and isinstance(args[0], int): - kwargs['shape'] = list(args) - args = () - - return args, kwargs - - class VariableArgsDecorator(DecoratorBase): def __init__(self, var: str) -> None: super().__init__() From 085801eae05704451fb109c19e59b4c22c4df0d7 Mon Sep 17 00:00:00 2001 From: zhwesky2010 <1183042833@qq.com> Date: Mon, 11 Aug 2025 18:08:43 +0800 Subject: [PATCH 13/24] some create api support more usage (#74494) --- python/paddle/utils/decorator_utils.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/python/paddle/utils/decorator_utils.py b/python/paddle/utils/decorator_utils.py index a9bee20a6efae8..55adce7e8961c4 100644 --- a/python/paddle/utils/decorator_utils.py +++ b/python/paddle/utils/decorator_utils.py @@ -127,6 +127,21 @@ def __init__( self.default_params = default_params warnings.simplefilter("always", category=Warning) + +# *size => shape decorator +class SizeArgsDecorator(DecoratorBase): + """ + Usage Example: + + paddle.ones(1, dtype=paddle.float32) + paddle.ones(1, 2, 3, dtype=paddle.float32) + paddle.ones([1, 2, 3], dtype=paddle.float32) + paddle.ones(size=[1, 2, 3], dtype=paddle.float32) + + paddle.ones([1, 2, 3], paddle.float32) + paddle.ones(shape=[1, 2, 3], dtype=paddle.float32) + """ + def process( self, args: tuple[Any, ...], kwargs: dict[str, Any] ) -> tuple[tuple[Any, ...], dict[str, Any]]: From 2864eb09e901d66871c0c8c6fc581484a8bf1c58 Mon Sep 17 00:00:00 2001 From: Enigmatisms Date: Mon, 11 Aug 2025 12:29:41 +0000 Subject: [PATCH 14/24] [API-Compat] Static Graph and CPU end debug --- .../infer_symbolic_shape/unary_infer_sym.cc | 42 ++- .../kernels/cpu/min_max_with_index_kernel.cc | 96 ++++++ python/paddle/tensor/compat.py | 60 ++-- test/legacy_test/test_compat_minmax.py | 290 +++++++++++------- 4 files changed, 347 insertions(+), 141 deletions(-) create mode 100644 paddle/phi/kernels/cpu/min_max_with_index_kernel.cc diff --git a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/unary_infer_sym.cc b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/unary_infer_sym.cc index 16ee03501fa4ab..9bf285da4d77a9 100644 --- a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/unary_infer_sym.cc +++ b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/unary_infer_sym.cc @@ -319,16 +319,33 @@ bool MinMaxOpInferSymbolicShape(pir::Operation *op, pir::InferSymbolicShapeContext *infer_context, bool output_val_and_ind = false) { bool flatten = GetBoolAttr(op, "flatten"); - bool keepdims = GetBoolAttr(op, "keepdims"); + bool keepdims = false; + int axis = 0; + if (output_val_and_ind) { + keepdims = GetBoolAttr(op, "keepdim"); + + PADDLE_ENFORCE_NE( + op->attributes().find("dim"), + op->attributes().end(), + common::errors::InvalidArgument( + "'dim' Attribute is expected for Min/MaxWithIndexOp. ")); + axis = op->attributes() + .at("dim") + .dyn_cast() + .data() + .to(); + } else { + keepdims = GetBoolAttr(op, "keepdims"); + const auto &axis_shape_or_data = + infer_context->GetShapeOrDataForValue(op->operand_source(1)); + axis = static_cast( + axis_shape_or_data.data().value().at(0).Get()); + } const auto &input_sym_shape = infer_context->GetShapeOrDataForValue(op->operand_source(0)).shape(); - int rank = input_sym_shape.size(); - const auto &axis_shape_or_data = - infer_context->GetShapeOrDataForValue(op->operand_source(1)); - int axis = - static_cast(axis_shape_or_data.data().value().at(0).Get()); + int rank = input_sym_shape.size(); if (axis < 0) axis += rank; const auto &out_sym_shape = [&] { @@ -369,13 +386,20 @@ bool MinMaxOpInferSymbolicShape(pir::Operation *op, return MinMaxOpInferSymbolicShape(op, infer_context, output_val_and_ind); \ } -DEFINE_MINMAX_OP_INFER_FUNC(Argmin, false) DEFINE_MINMAX_OP_INFER_FUNC(Argmax, false) -DEFINE_MINMAX_OP_INFER_FUNC(MinWithIndex, true) DEFINE_MINMAX_OP_INFER_FUNC(MaxWithIndex, true) - #undef DEFINE_MINMAX_OP_INFER_FUNC +bool ArgminOpInferSymbolicShape(pir::Operation *op, + pir::InferSymbolicShapeContext *infer_context) { + return ArgmaxOpInferSymbolicShape(op, infer_context); +} + +bool MinWithIndexOpInferSymbolicShape( + pir::Operation *op, pir::InferSymbolicShapeContext *infer_context) { + return MaxWithIndexOpInferSymbolicShape(op, infer_context); +} + bool AsComplexOpInferSymbolicShape( pir::Operation *op, pir::InferSymbolicShapeContext *infer_context) { pir::Value operand_source = op->operand_source(0); diff --git a/paddle/phi/kernels/cpu/min_max_with_index_kernel.cc b/paddle/phi/kernels/cpu/min_max_with_index_kernel.cc new file mode 100644 index 00000000000000..f373553389e422 --- /dev/null +++ b/paddle/phi/kernels/cpu/min_max_with_index_kernel.cc @@ -0,0 +1,96 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/min_max_with_index_kernel.h" + +#include "paddle/common/ddim.h" +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/core/utils/data_type.h" +#include "paddle/phi/kernels/funcs/eigen/common.h" +#include "paddle/phi/kernels/funcs/math_function.h" + +#define DEFINE_WITH_INDEX_KERNEL(OpType, name) \ + template \ + void OpType##WithIndexKernel(const Context& dev_ctx, \ + const DenseTensor& x, \ + const Scalar& dim, \ + bool keepdim, \ + bool flatten, \ + DenseTensor* val_out, \ + DenseTensor* ind_out) { \ + PADDLE_ENFORCE_EQ(0, \ + 1, \ + phi::errors::Unimplemented( \ + "In static graph mode, %s PHI kernel is not " \ + "currently available on non-GPU devices.", \ + #name)); \ + } \ + template \ + void OpType##WithIndexGradKernel(const Context& dev_ctx, \ + const DenseTensor& x, \ + const DenseTensor& values, \ + const DenseTensor& indices, \ + const DenseTensor& values_grad, \ + const Scalar& dim, \ + bool keepdim, \ + DenseTensor* x_grad) { \ + PADDLE_ENFORCE_EQ(0, \ + 1, \ + phi::errors::Unimplemented( \ + "In static graph mode, %s PHI kernel is not " \ + "currently available on non-GPU devices.", \ + #name)); \ + } + +namespace phi { + +DEFINE_WITH_INDEX_KERNEL(Min, min_with_index) +DEFINE_WITH_INDEX_KERNEL(Max, max_with_index) +#undef DEFINE_WITH_INDEX_KERNEL + +} // namespace phi + +#define REGISTER_CPU_KERNELS(OpType, OpName) \ + PD_REGISTER_KERNEL(OpName, \ + CPU, \ + ALL_LAYOUT, \ + phi::OpType##WithIndexKernel, \ + phi::dtype::float16, \ + phi::dtype::bfloat16, \ + float, \ + double, \ + int32_t, \ + int64_t, \ + int16_t, \ + uint8_t) { \ + kernel->OutputAt(0).SetDataType(kernel->InputAt(0).dtype); \ + kernel->OutputAt(1).SetDataType(phi::DataType::INT64); \ + } \ + PD_REGISTER_KERNEL(OpName##_grad, \ + CPU, \ + ALL_LAYOUT, \ + phi::OpType##WithIndexGradKernel, \ + float, \ + double, \ + uint8_t, \ + int, \ + int16_t, \ + int64_t, \ + phi::dtype::float16, \ + phi::dtype::bfloat16) {} + +REGISTER_CPU_KERNELS(Min, min_with_index) +REGISTER_CPU_KERNELS(Max, max_with_index) +#undef REGISTER_CPU_KERNELS diff --git a/python/paddle/tensor/compat.py b/python/paddle/tensor/compat.py index 150e10dc27485a..fb7e21f5c5ebfe 100644 --- a/python/paddle/tensor/compat.py +++ b/python/paddle/tensor/compat.py @@ -471,7 +471,7 @@ def try_get_keys(key): and type(dim_or_other) is not int ): raise invalid_arguments_exception( - f"The second input must be int or Tensor or implicit None in compat.min, but received {type(dim_or_other)}.\n" + f"The second input must be int or Tensor or implicit None in compat.{func_name}, but received {type(dim_or_other)}.\n" ) return dim_or_other, keepdim @@ -500,11 +500,10 @@ def _min_max_allow_cpu_composite(input: Tensor): or in_dtype == paddle.bfloat16 or in_dtype == paddle.int16 ): - if not input.place.is_gpu_place(): - raise TypeError( - f"Non-CUDA GPU placed Tensor does not have '{in_dtype}' op registered.\n" - "Paddle support following DataTypes: int32, int64, float64, float32, uint8" - ) + raise TypeError( + f"Non-CUDA GPU placed Tensor does not have '{in_dtype}' op registered.\n" + "Paddle support following DataTypes: int32, int64, float64, float32, uint8" + ) @ForbidKeywordsDecorator( @@ -521,8 +520,12 @@ def min(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType: returns a named tuple MinMaxRetType(values: Tensor, indices: Tensor) 3. paddle.compat.min(input: Tensor, other: Tensor): see `paddle.minimum` - Note: If there are multiple minimum elements, this API evenly distributes gradient between these equal values, - following torch.min. The gradient behavior of `values` for case 2 is the same as `paddle.amin`. + Special warning: the gradient behavior is NOT well-documented by PyTorch, the actual behavior should be: + 1. Case 1: the same as `amin` + 2. Case 2: NOT evenly distributing the gradient for equal minimum elements! PyTorch actually only propagates to the elements with indices, + for example: Tensor([1, 1, 1]) -> min(..., dim=0) -> values=Tensor(0, ...), indices=Tensor(0), the gradient for input tensor won't be + Tensor([1/3, 1/3, 1/3]) as stated in their documentation, but will be Tensor([1, 0, 0]). This API implements a similar backward kernel. + 3. Case 3: the same as `minimum` Args: input (Tensor): A tensor, the data type is bfloat16, float16, float32, float64, int32, int64 on GPU. @@ -532,6 +535,8 @@ def min(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType: compute the minimum over all elements of `input` and return a Tensor with a single element, otherwise must be in the range :math:`[-input.ndim, input.ndim)`. If :math:`dim < 0`, the axis to reduce is :math:`input.ndim + dim`. + Warning: if `dim` is specified, execute static graph will throw exceptions + when not on a GPU device, since max_with_index is not implemented for non-GPU devices keepdim (bool, optional): Whether to reserve the reduced dimension in the output Tensor. The result tensor will have one fewer dimension than the `input` unless :attr:`keepdim` is true, default @@ -605,13 +610,7 @@ def min(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType: ) return paddle.amin(input) elif isinstance(dim_or_other, int): - if input.place.is_gpu_place(): - vals, inds = _C_ops.min_with_index( - input, dim_or_other, keepdim, False - ) - inds.stop_gradient = True - return MinMaxRetType(values=vals, indices=inds) - else: + if in_dynamic_mode() and not input.place.is_gpu_place(): _min_max_allow_cpu_composite(input) # CPUPlace and other placements are implemented by composition indices = paddle.argmin(input, axis=dim_or_other, keepdim=True) @@ -622,6 +621,12 @@ def min(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType: values=values.squeeze_(axis=dim_or_other), indices=indices.squeeze_(axis=dim_or_other), ) + else: + vals, inds = _C_ops.min_with_index( + input, dim_or_other, keepdim, False + ) + inds.stop_gradient = True + return MinMaxRetType(values=vals, indices=inds) else: return _C_ops.minimum(input, dim_or_other) @@ -640,8 +645,12 @@ def max(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType: returns a named tuple MinMaxRetType(values: Tensor, indices: Tensor) 3. paddle.compat.max(input: Tensor, other: Tensor): see `paddle.maximum` - Note: If there are multiple maximum elements, this API evenly distributes gradient between these equal values, - following torch.max. The gradient behavior of `values` for case 2 is the same as `paddle.amax`. + Special warning: the gradient behavior is NOT well-documented by PyTorch, the actual behavior should be: + 1. Case 1: the same as `amax` + 2. Case 2: NOT evenly distributing the gradient for equal maximum elements! PyTorch actually only propagates to the elements with indices, + for example: Tensor([1, 1, 1]) -> max(..., dim=0) -> values=Tensor(0, ...), indices=Tensor(0), the gradient for input tensor won't be + Tensor([1/3, 1/3, 1/3]) as stated in their documentation, but will be Tensor([1, 0, 0]). This API implements a similar backward kernel. + 3. Case 3: the same as `maximum` Args: input (Tensor): A tensor, the data type is bfloat16, float16, float32, float64, int32, int64 on GPU. @@ -651,6 +660,8 @@ def max(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType: compute the maximum over all elements of `input` and return a Tensor with a single element, otherwise must be in the range :math:`[-input.ndim, input.ndim)`. If :math:`dim < 0`, the axis to reduce is :math:`input.ndim + dim`. + Warning: if `dim` is specified, execute static graph will throw exceptions + when not on a GPU device, since max_with_index is not implemented for non-GPU devices keepdim (bool, optional): Whether to reserve the reduced dimension in the output Tensor. The result tensor will have one fewer dimension than the `input` unless :attr:`keepdim` is true, default @@ -724,15 +735,8 @@ def max(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType: ) return paddle.amax(input) elif isinstance(dim_or_other, int): - if input.place.is_gpu_place(): - vals, inds = _C_ops.max_with_index( - input, dim_or_other, keepdim, False - ) - inds.stop_gradient = True - return MinMaxRetType(values=vals, indices=inds) - else: + if in_dynamic_mode() and not input.place.is_gpu_place(): _min_max_allow_cpu_composite(input) - # CPUPlace and other placements are implemented by composition indices = paddle.argmax(input, axis=dim_or_other, keepdim=True) values = paddle.take_along_axis(input, indices, axis=dim_or_other) if keepdim: @@ -741,5 +745,11 @@ def max(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType: values=values.squeeze_(axis=dim_or_other), indices=indices.squeeze_(axis=dim_or_other), ) + else: + vals, inds = _C_ops.max_with_index( + input, dim_or_other, keepdim, False + ) + inds.stop_gradient = True + return MinMaxRetType(values=vals, indices=inds) else: return _C_ops.maximum(input, dim_or_other) diff --git a/test/legacy_test/test_compat_minmax.py b/test/legacy_test/test_compat_minmax.py index 4bf3f7ebed52f6..00245894df0480 100644 --- a/test/legacy_test/test_compat_minmax.py +++ b/test/legacy_test/test_compat_minmax.py @@ -19,26 +19,44 @@ import paddle -class TestCompatMinMax(unittest.TestCase): - def setUp(self): - """Make sure we are in a dynamic graph env""" +class TestCompatMinMaxBase(unittest.TestCase): + """The default base class is for testing min-related ops""" + + def __init__( + self, + *args, + test_op=paddle.compat.min, + origin_op=paddle.min, + index_op=paddle.argmin, + test_op_name="paddle.compat.min", + origin_op_name="paddle.min", + **kwargs, + ): + super().__init__(*args, **kwargs) paddle.disable_static() + self.test_op = test_op + self.origin_op = origin_op + self.index_op = index_op + self.test_op_name = test_op_name + self.origin_op_name = origin_op_name def test_case1_simple_reduce_all(self): data = paddle.to_tensor([[1.0, 2.0], [3.0, 4.0]], dtype='float32') - min_val = paddle.compat.min(data) - max_val = paddle.compat.max(data) + val = self.test_op(data) - self.assertAlmostEqual(min_val.item(), 1.0) - self.assertAlmostEqual(max_val.item(), 4.0) + if self.test_op_name.endswith("min"): + self.assertAlmostEqual(val.item(), 1.0) + expected_grad = np.array([[0.5, 0.5], [0.0, 0.0]]) + else: + self.assertAlmostEqual(val.item(), 4.0) + expected_grad = np.array([[0.0, 0.0], [0.0, 1.0]]) data = paddle.to_tensor( [[1.0, 1.0], [2.0, 3.0]], dtype='float32', stop_gradient=False ) - min_val = paddle.compat.min(data) - min_val.backward() + val = self.test_op(data) + val.backward() - expected_grad = np.array([[0.5, 0.5], [0.0, 0.0]]) np.testing.assert_allclose(data.grad.numpy(), expected_grad) def test_case2_reduce_dim(self): @@ -46,34 +64,36 @@ def test_case2_reduce_dim(self): data = paddle.to_tensor( [[[5, 8], [2, 1]], [[7, 3], [9, 6]]], dtype='float32' ) + if self.test_op_name.endswith("min"): + in_dim = 1 + result = self.test_op(data, dim=in_dim) + expected_res = np.array([[[5, 3], [2, 1]]]) + self.assertEqual(result.values.shape, [2, 2]) + np.testing.assert_array_equal( + result.values.numpy(), np.array([[2, 1], [7, 3]]) + ) + np.testing.assert_array_equal( + result.indices.numpy(), np.array([[1, 1], [0, 0]]) + ) + else: + in_dim = 2 + result = self.test_op(data, dim=in_dim) + expected_res = np.array([[[7, 8], [9, 6]]]) + self.assertEqual(result.values.shape, [2, 2]) + np.testing.assert_array_equal( + result.values.numpy(), np.array([[8, 2], [7, 9]]) + ) + np.testing.assert_array_equal( + result.indices.numpy(), np.array([[1, 0], [0, 0]]) + ) - min_result = paddle.compat.min(data, dim=1) - self.assertEqual(min_result.values.shape, [2, 2]) - np.testing.assert_array_equal( - min_result.values.numpy(), np.array([[2, 1], [7, 3]]) - ) - np.testing.assert_array_equal( - min_result.indices.numpy(), np.array([[1, 1], [0, 0]]) - ) - - max_result = paddle.compat.max(data, dim=2) - self.assertEqual(max_result.values.shape, [2, 2]) - np.testing.assert_array_equal( - max_result.values.numpy(), np.array([[8, 2], [7, 9]]) - ) - np.testing.assert_array_equal( - max_result.indices.numpy(), np.array([[1, 0], [0, 0]]) - ) - - min_result_keep = paddle.compat.min(data, dim=0, keepdim=True) - self.assertEqual(min_result_keep.values.shape, [1, 2, 2]) - np.testing.assert_array_equal( - min_result_keep.values.numpy(), np.array([[[5, 3], [2, 1]]]) - ) + result_keep = self.test_op(data, dim=0, keepdim=True) + self.assertEqual(result_keep.values.shape, [1, 2, 2]) + np.testing.assert_array_equal(result_keep.values.numpy(), expected_res) - min_result_neg = paddle.compat.min(data, dim=-2) + result_neg = self.test_op(data, dim=in_dim - 3) np.testing.assert_array_equal( - min_result_neg.values.numpy(), min_result.values.numpy() + result_neg.values.numpy(), result.values.numpy() ) def test_case2_grad(self): @@ -84,43 +104,52 @@ def test_case2_grad(self): ) y = data * 2 - min_result = paddle.compat.min(y, dim=2) - min_result.values.backward() + result = self.test_op(y, dim=2) + result.values.backward() - expected_grad = np.array( - [[[2.0, 0.0], [2.0, 0.0]], [[0.0, 2.0], [0.0, 2.0]]] - ) + if self.test_op_name.endswith("min"): + expected_grad = np.array( + [[[2.0, 0.0], [2.0, 0.0]], [[0.0, 2.0], [0.0, 2.0]]] + ) + expected_grad2 = np.array( + [[[2.0, 4.0], [0.0, 0.0]], [[8.0, 2.0], [0.0, 0.0]]] + ) + else: + expected_grad = np.array( + [[[0.0, 2.0], [0.0, 2.0]], [[2.0, 0.0], [2.0, 0.0]]] + ) + expected_grad2 = np.array( + [[[2.0, 0.0], [0.0, 6.0]], [[0.0, 2.0], [10.0, 0.0]]] + ) np.testing.assert_allclose(data.grad.numpy(), expected_grad, atol=1e-6) data.clear_grad() y = data * data - min_result = paddle.compat.min(y, dim=1) - min_result[0].backward() - expected_grad = np.array( - [[[2.0, 4.0], [0.0, 0.0]], [[8.0, 2.0], [0.0, 0.0]]] - ) - np.testing.assert_allclose(data.grad.numpy(), expected_grad, atol=1e-6) + result = self.test_op(y, dim=1) + result[0].backward() + np.testing.assert_allclose(data.grad.numpy(), expected_grad2, atol=1e-6) def test_case3_elementwise(self): - """minimum/maximum""" x = paddle.to_tensor([[1, 5], [4, 2]], dtype='float32') y = paddle.to_tensor([[3, 2], [1, 6]], dtype='float32') - - min_result = paddle.compat.min(x, y) - np.testing.assert_array_equal( - min_result.numpy(), np.array([[1, 2], [1, 2]]) - ) - - max_result = paddle.compat.max(x, y) - np.testing.assert_array_equal( - max_result.numpy(), np.array([[3, 5], [4, 6]]) - ) - z = paddle.to_tensor([3, 4], dtype='float32') - broadcast_min = paddle.compat.min(x, z) - np.testing.assert_array_equal( - broadcast_min.numpy(), np.array([[1, 4], [3, 2]]) - ) + broadcast_res = self.test_op(x, z) + + result = self.test_op(x, y) + if self.test_op_name.endswith("min"): + np.testing.assert_array_equal( + result.numpy(), np.array([[1, 2], [1, 2]]) + ) + np.testing.assert_array_equal( + broadcast_res.numpy(), np.array([[1, 4], [3, 2]]) + ) + else: + np.testing.assert_array_equal( + result.numpy(), np.array([[3, 5], [4, 6]]) + ) + np.testing.assert_array_equal( + broadcast_res.numpy(), np.array([[3, 5], [4, 4]]) + ) def test_case3_grad(self): x = paddle.to_tensor( @@ -130,13 +159,16 @@ def test_case3_grad(self): [[0.5, 2.5], [2.0, 3.5]], dtype=paddle.float32, stop_gradient=False ) - min_val = paddle.compat.min(x, y) - min_val.backward() + val = self.test_op(x, y) + val.backward() expected_x_grad = np.array([[0.0, 1.0], [0.0, 0.0]]) - np.testing.assert_allclose(x.grad.numpy(), expected_x_grad) - expected_y_grad = np.array([[1.0, 0.0], [1.0, 1.0]]) + if self.test_op_name.endswith("max"): + expected_x_grad = 1 - expected_x_grad + expected_y_grad = 1 - expected_y_grad + + np.testing.assert_allclose(x.grad.numpy(), expected_x_grad) np.testing.assert_allclose(y.grad.numpy(), expected_y_grad) def test_edge_cases(self): @@ -144,34 +176,32 @@ def test_edge_cases(self): # uniform distributed gradient uniform_data = paddle.ones([2, 3], dtype='float64') uniform_data.stop_gradient = False - min_val = paddle.compat.min(uniform_data) - min_val.sum().backward() - # uniformly distributed (amin) + val = self.test_op(uniform_data) + val.sum().backward() + # uniformly distributed expected_grad = np.full((2, 3), 1.0 / 6.0) np.testing.assert_allclose(uniform_data.grad.numpy(), expected_grad) uniform_data.clear_grad() - min_val = paddle.compat.min(uniform_data, 0) - min_val.values.sum().backward() + val = self.test_op(uniform_data, 0) + val.values.sum().backward() # take_along_axis like gradient behavior expected_grad = np.array([[1.0, 1.0, 1.0], [0.0, 0.0, 0.0]]) np.testing.assert_allclose(uniform_data.grad.numpy(), expected_grad) # 0-dim tensor dim0_tensor = paddle.to_tensor(2, dtype='float32') - max_val = paddle.compat.max(dim0_tensor) - np.testing.assert_allclose( - max_val.numpy(), np.array(2.0, dtype=np.float32) - ) + val = self.test_op(dim0_tensor) + np.testing.assert_allclose(val.numpy(), np.array(2.0, dtype=np.float32)) # 1-dim tensor dim1_tensor = paddle.to_tensor([1], dtype='uint8') - max_val = paddle.compat.max(dim1_tensor, dim=-1, keepdim=True) + val = self.test_op(dim1_tensor, dim=-1, keepdim=True) np.testing.assert_array_equal( - max_val[0].numpy(), np.array([1], dtype=np.uint8) + val[0].numpy(), np.array([1], dtype=np.uint8) ) np.testing.assert_array_equal( - max_val[1].numpy(), np.array([0], dtype=np.int64) + val[1].numpy(), np.array([0], dtype=np.int64) ) def test_compare_with_index_ops_to_origin(self): @@ -183,13 +213,13 @@ def test_compare_with_index_ops_to_origin(self): # `bfloat16` and `float16` are rejected on CPU if not data.place.is_gpu_place() and dtype in cpu_reject_types: continue - min_vals_inds = paddle.compat.min(data, dim=0) - self.assertEqual(min_vals_inds.values.dtype, data.dtype) - self.assertEqual(min_vals_inds.indices.dtype, paddle.int64) + vals_inds = self.test_op(data, dim=0) + self.assertEqual(vals_inds.values.dtype, data.dtype) + self.assertEqual(vals_inds.indices.dtype, paddle.int64) - origin_indices = paddle.argmin(data, axis=0, dtype="int64") + origin_indices = self.index_op(data, axis=0, dtype="int64") if dtype != 'uint8': - origin_values = paddle.min(data, axis=0) + origin_values = self.origin_op(data, axis=0) else: origin_values = paddle.take_along_axis( data, origin_indices.unsqueeze(0), axis=0 @@ -197,14 +227,14 @@ def test_compare_with_index_ops_to_origin(self): origin_values.squeeze_(axis=0) if i < 4: # floating point np.testing.assert_allclose( - min_vals_inds.values.numpy(), origin_values.numpy() + vals_inds.values.numpy(), origin_values.numpy() ) else: np.testing.assert_array_equal( - min_vals_inds.values.numpy(), origin_values.numpy() + vals_inds.values.numpy(), origin_values.numpy() ) np.testing.assert_array_equal( - min_vals_inds[1].numpy(), origin_indices.numpy() + vals_inds[1].numpy(), origin_indices.numpy() ) def test_error_handling(self): @@ -214,12 +244,12 @@ def test_error_handling(self): "Tensors with integral type: 'paddle.int32' should stop gradient." ) err_msg2 = ( - "paddle.min() received unexpected keyword arguments 'input', 'dim'. " - "\nDid you mean to use paddle.compat.min() instead?" + f"{self.origin_op_name}() received unexpected keyword arguments 'input', 'dim'. " + f"\nDid you mean to use {self.test_op_name}() instead?" ) err_msg3 = ( - "paddle.compat.max() received unexpected keyword argument 'axis'. " - "\nDid you mean to use paddle.max() instead?" + f"{self.test_op_name}() received unexpected keyword argument 'axis'. " + f"\nDid you mean to use {self.origin_op_name}() instead?" ) err_msg4 = ( "Non-CUDA GPU placed Tensor does not have 'paddle.float16' op registered.\n" @@ -229,82 +259,128 @@ def test_error_handling(self): # empty tensor empty_tensor = paddle.to_tensor([], dtype='float32') with self.assertRaises(ValueError): - paddle.compat.min(empty_tensor) + self.test_op(empty_tensor) # mixed parameters case 1 input_ts = paddle.to_tensor([1, 2, 3], dtype='float32') other_ts = paddle.to_tensor([1]) with self.assertRaises(TypeError): - paddle.compat.min(input_ts, other=other_ts, dim=0) + self.test_op(input_ts, other=other_ts, dim=0) # mixed parameters case 2 with self.assertRaises(TypeError): - paddle.compat.min(input_ts, 0, other=other_ts) + self.test_op(input_ts, 0, other=other_ts) # trying to perform grad ops for integral types with self.assertRaises(TypeError) as cm: tensor = paddle.ones([2, 2], dtype=paddle.int32) tensor.stop_gradient = False - tensors = paddle.compat.max(tensor, dim=0) + tensors = self.test_op(tensor, dim=0) self.assertEqual(str(cm.exception), err_msg1) # explicit None case 1 with self.assertRaises(TypeError) as cm: - paddle.compat.min(input_ts, dim=None) + self.test_op(input_ts, dim=None) # explicit None case 2 with self.assertRaises(TypeError) as cm: - paddle.compat.min(input_ts, None, keepdim=True) + self.test_op(input_ts, None, keepdim=True) # keepdim specified without specifying dim with self.assertRaises(TypeError) as cm: - paddle.compat.min(input_ts, keepdim=True) + self.test_op(input_ts, keepdim=True) # Wrong *args specification case 1 with self.assertRaises(TypeError) as cm: - paddle.compat.min(input_ts, False) + self.test_op(input_ts, False) # Wrong *args specification case 2 with self.assertRaises(TypeError) as cm: - paddle.compat.min(input_ts, other_ts, True) + self.test_op(input_ts, other_ts, True) # Tensor input for dim case 1 with self.assertRaises(TypeError) as cm: - paddle.compat.min(input_ts, dim=paddle.to_tensor([0])) + self.test_op(input_ts, dim=paddle.to_tensor([0])) # Tensor input for dim case 2 with self.assertRaises(TypeError) as cm: - paddle.compat.min(input_ts, dim=paddle.to_tensor(0)) + self.test_op(input_ts, dim=paddle.to_tensor(0)) # Duplicate Arguments case 1 with self.assertRaises(TypeError) as cm: - paddle.compat.max(input_ts, 0, dim=0) + self.test_op(input_ts, 0, dim=0) # Duplicate Arguments case 2 with self.assertRaises(TypeError) as cm: - paddle.compat.max(input_ts, other_ts, other=0) + self.test_op(input_ts, other_ts, other=0) # Duplicate Arguments case 3 with self.assertRaises(TypeError) as cm: - paddle.compat.max(input_ts, dim=0, other=0, keepdim=True) + self.test_op(input_ts, dim=0, other=0, keepdim=True) # Wrong API used case 1 with self.assertRaises(TypeError) as cm: - paddle.min(input=input_ts, dim=0) + self.origin_op(input=input_ts, dim=0) self.assertEqual(str(cm.exception), err_msg2) # Wrong API used case 2 with self.assertRaises(TypeError) as cm: - paddle.compat.max(input_ts, axis=0) + self.test_op(input_ts, axis=0) self.assertEqual(str(cm.exception), err_msg3) # Rejected on CPU types with self.assertRaises(TypeError) as cm: tensor = paddle.to_tensor([1, 2, 3], dtype="float16") cpu_tensor = tensor.to("cpu") - paddle.compat.max(cpu_tensor, dim=0) + self.test_op(cpu_tensor, dim=0) self.assertEqual(str(cm.exception), err_msg4) + def _compare_with_origin_static(self, input_shape, axis=0, keepdim=False): + if not paddle.is_compiled_with_cuda(): + return + numel = 1 + for v in input_shape: + numel *= v + paddle.enable_static() + with paddle.static.program_guard(paddle.static.Program()): + input_tensor = paddle.arange(numel, dtype=paddle.float32).reshape( + input_shape + ) + + y = input_tensor**2 + input_tensor + values, indices = self.test_op(y, dim=axis, keepdim=keepdim) + values += 1 + + gt_values = self.origin_op(y, axis=axis, keepdim=keepdim) + 1 + gt_indices = self.index_op(y, axis=axis, keepdim=keepdim) + + place = paddle.CUDAPlace(0) + exe = paddle.static.Executor(place) + values_np, indices_np, gt_values_np, gt_indices_np = exe.run( + fetch_list=[values, indices, gt_values, gt_indices] + ) + np.testing.assert_allclose(values_np, gt_values_np) + np.testing.assert_equal(indices_np, gt_indices_np) + paddle.disable_static() + + def test_static_graph(self): + self._compare_with_origin_static([3, 10, 2], axis=1) + self._compare_with_origin_static([3, 10, 2], axis=0, keepdim=True) + self._compare_with_origin_static([17], axis=0) + + +class TestCompatMax(TestCompatMinMaxBase): + def __init__(self, *args, **kwargs): + super().__init__( + *args, + test_op=paddle.compat.max, + origin_op=paddle.max, + index_op=paddle.argmax, + test_op_name="paddle.compat.max", + origin_op_name="paddle.max", + **kwargs, + ) + if __name__ == '__main__': unittest.main() From 693ff5289ed9f12a26e8eefcdb40c96bb76dec73 Mon Sep 17 00:00:00 2001 From: Enigmatisms Date: Mon, 11 Aug 2025 13:22:16 +0000 Subject: [PATCH 15/24] [API-Compat] Resolved conflicts in decorator_utils.py --- python/paddle/utils/decorator_utils.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/python/paddle/utils/decorator_utils.py b/python/paddle/utils/decorator_utils.py index 55adce7e8961c4..5651962ea14341 100644 --- a/python/paddle/utils/decorator_utils.py +++ b/python/paddle/utils/decorator_utils.py @@ -362,3 +362,28 @@ def wrapper(*args: _InputT.args, **kwargs: _InputT.kwargs) -> _RetT: return wrapper return decorator +class ForbidKeywordsDecorator(DecoratorBase): + """A decorator that hints users to use the correct `compat` functions, when erroneous keyword arguments are detected""" + + def __init__( + self, illegal_keys: list[str], func_name: str, correct_name: str + ) -> None: + super().__init__() + self.illegal_keys = illegal_keys + self.func_name = func_name + self.correct_name = correct_name + + def process( + self, args: tuple[Any, ...], kwargs: dict[str, Any] + ) -> tuple[tuple[Any, ...], dict[str, Any]]: + found_keys = [key for key in self.illegal_keys if key in kwargs] + + if found_keys: + keys_str = ", ".join(f"'{key}'" for key in found_keys) + plural = "s" if len(found_keys) > 1 else "" + + raise TypeError( + f"{self.func_name}() received unexpected keyword argument{plural} {keys_str}. " + f"\nDid you mean to use {self.correct_name}() instead?" + ) + return args, kwargs From f3d7353a77b02eb17fdde0ebac2fdbd914041f7f Mon Sep 17 00:00:00 2001 From: Enigmatisms Date: Wed, 13 Aug 2025 06:59:39 +0000 Subject: [PATCH 16/24] [API-Compat] Added static graph min/max_with_index op check, simplified implementation --- python/paddle/tensor/compat.py | 52 ++-- python/paddle/tensor/math.py | 4 +- python/paddle/utils/decorator_utils.py | 3 +- test/legacy_test/test_compat_minmax.py | 36 ++- test/legacy_test/test_minmax_with_index_op.py | 281 ++++++++++++++++++ 5 files changed, 331 insertions(+), 45 deletions(-) create mode 100644 test/legacy_test/test_minmax_with_index_op.py diff --git a/python/paddle/tensor/compat.py b/python/paddle/tensor/compat.py index fb7e21f5c5ebfe..7b5e035e551d40 100644 --- a/python/paddle/tensor/compat.py +++ b/python/paddle/tensor/compat.py @@ -424,7 +424,6 @@ def try_get_keys(key): except KeyError: raise invalid_arguments_exception() from None return res - found_key = None dim_or_other = None keepdim = False @@ -436,25 +435,19 @@ def try_get_keys(key): elif total_arg_num == 2: if num_args == 2: dim_or_other, keepdim = args - if dim_or_other is None or isinstance( - dim_or_other, (Variable, paddle.pir.Value) - ): - raise invalid_arguments_exception() elif num_args == 1: dim_or_other = args[0] - if dim_or_other is None or isinstance( - dim_or_other, (Variable, paddle.pir.Value) - ): - raise invalid_arguments_exception() keepdim = try_get_keys("keepdim") else: dim_or_other = try_get_keys("dim") keepdim = try_get_keys("keepdim") + if dim_or_other is None or isinstance( + dim_or_other, (Variable, paddle.pir.Value) + ): + raise invalid_arguments_exception() elif total_arg_num == 1: if num_args: dim_or_other = args[0] - if dim_or_other is None: - raise invalid_arguments_exception() else: if "dim" in kwargs: dim_or_other = kwargs["dim"] @@ -462,8 +455,8 @@ def try_get_keys(key): dim_or_other = kwargs["other"] if not isinstance(dim_or_other, (Variable, paddle.pir.Value)): raise invalid_arguments_exception() - if dim_or_other is None: - raise invalid_arguments_exception() + if dim_or_other is None: + raise invalid_arguments_exception() if ( dim_or_other is not None @@ -507,9 +500,9 @@ def _min_max_allow_cpu_composite(input: Tensor): @ForbidKeywordsDecorator( - illegal_keys=['x', 'axis'], + illegal_keys={"x", "axis"}, func_name="paddle.compat.min", - correct_name='paddle.min', + correct_name="paddle.min", ) def min(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType: """ @@ -521,7 +514,7 @@ def min(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType: 3. paddle.compat.min(input: Tensor, other: Tensor): see `paddle.minimum` Special warning: the gradient behavior is NOT well-documented by PyTorch, the actual behavior should be: - 1. Case 1: the same as `amin` + 1. Case 1: the same as `min` 2. Case 2: NOT evenly distributing the gradient for equal minimum elements! PyTorch actually only propagates to the elements with indices, for example: Tensor([1, 1, 1]) -> min(..., dim=0) -> values=Tensor(0, ...), indices=Tensor(0), the gradient for input tensor won't be Tensor([1/3, 1/3, 1/3]) as stated in their documentation, but will be Tensor([1, 0, 0]). This API implements a similar backward kernel. @@ -593,9 +586,7 @@ def min(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType: [[0.20000000, 0.30000000, 0.10000000, 0.20000000], [0.10000000, 0.10000000, 0.60000000, 0.70000000]]) """ - if not isinstance(input, paddle.pir.Value) and not isinstance( - input, paddle.Tensor - ): + if not isinstance(input, (paddle.pir.Value, paddle.Tensor)): raise TypeError( f"input should be a tensor, but got an instance with type '{type(input).__name__}'" ) @@ -604,11 +595,8 @@ def min(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType: dim_or_other, keepdim = _min_max_param_checker("min", *args, **kwargs) if dim_or_other is None: - if input.numel() == 0: - raise ValueError( - "Reduce max cannot apply on empty tensor (numel == 0)" - ) - return paddle.amin(input) + # paddle.min and paddle.amin actually shares the same grad op (ReduceAminKernel) + return paddle.min(input) elif isinstance(dim_or_other, int): if in_dynamic_mode() and not input.place.is_gpu_place(): _min_max_allow_cpu_composite(input) @@ -632,9 +620,9 @@ def min(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType: @ForbidKeywordsDecorator( - illegal_keys=['x', 'axis'], + illegal_keys={"x", "axis"}, func_name="paddle.compat.max", - correct_name='paddle.max', + correct_name="paddle.max", ) def max(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType: """ @@ -646,7 +634,7 @@ def max(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType: 3. paddle.compat.max(input: Tensor, other: Tensor): see `paddle.maximum` Special warning: the gradient behavior is NOT well-documented by PyTorch, the actual behavior should be: - 1. Case 1: the same as `amax` + 1. Case 1: the same as `max` 2. Case 2: NOT evenly distributing the gradient for equal maximum elements! PyTorch actually only propagates to the elements with indices, for example: Tensor([1, 1, 1]) -> max(..., dim=0) -> values=Tensor(0, ...), indices=Tensor(0), the gradient for input tensor won't be Tensor([1/3, 1/3, 1/3]) as stated in their documentation, but will be Tensor([1, 0, 0]). This API implements a similar backward kernel. @@ -718,9 +706,7 @@ def max(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType: [[0.50000000, 0.40000000, 0.50000000, 0.90000000], [0.30000000, 0.20000000, 0.60000000, 0.70000000]]) """ - if not isinstance(input, paddle.pir.Value) and not isinstance( - input, paddle.Tensor - ): + if not isinstance(input, (paddle.pir.Value, paddle.Tensor)): raise TypeError( f"input should be a tensor, but got an instance with type '{type(input).__name__}'" ) @@ -729,11 +715,7 @@ def max(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType: dim_or_other, keepdim = _min_max_param_checker("max", *args, **kwargs) if dim_or_other is None: - if input.numel() == 0: - raise ValueError( - "Reduce max cannot apply on empty tensor (numel == 0)" - ) - return paddle.amax(input) + return paddle.max(input) elif isinstance(dim_or_other, int): if in_dynamic_mode() and not input.place.is_gpu_place(): _min_max_allow_cpu_composite(input) diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py index da5eedfe992c26..62ff59ac412546 100644 --- a/python/paddle/tensor/math.py +++ b/python/paddle/tensor/math.py @@ -3275,7 +3275,7 @@ def _check_input(x): @ForbidKeywordsDecorator( - illegal_keys=["input", "dim", "other"], + illegal_keys={"input", "dim", "other"}, func_name="paddle.max", correct_name="paddle.compat.max", ) @@ -3439,7 +3439,7 @@ def max( @ForbidKeywordsDecorator( - illegal_keys=["input", "dim", "other"], + illegal_keys={"input", "dim", "other"}, func_name="paddle.min", correct_name="paddle.compat.min", ) diff --git a/python/paddle/utils/decorator_utils.py b/python/paddle/utils/decorator_utils.py index 5651962ea14341..8c3381463f28a7 100644 --- a/python/paddle/utils/decorator_utils.py +++ b/python/paddle/utils/decorator_utils.py @@ -366,7 +366,7 @@ class ForbidKeywordsDecorator(DecoratorBase): """A decorator that hints users to use the correct `compat` functions, when erroneous keyword arguments are detected""" def __init__( - self, illegal_keys: list[str], func_name: str, correct_name: str + self, illegal_keys: set[str], func_name: str, correct_name: str ) -> None: super().__init__() self.illegal_keys = illegal_keys @@ -379,6 +379,7 @@ def process( found_keys = [key for key in self.illegal_keys if key in kwargs] if found_keys: + found_keys.sort() keys_str = ", ".join(f"'{key}'" for key in found_keys) plural = "s" if len(found_keys) > 1 else "" diff --git a/test/legacy_test/test_compat_minmax.py b/test/legacy_test/test_compat_minmax.py index 00245894df0480..46d4bb8f257a0a 100644 --- a/test/legacy_test/test_compat_minmax.py +++ b/test/legacy_test/test_compat_minmax.py @@ -17,6 +17,7 @@ import numpy as np import paddle +from paddle.base import core class TestCompatMinMaxBase(unittest.TestCase): @@ -90,6 +91,8 @@ def test_case2_reduce_dim(self): result_keep = self.test_op(data, dim=0, keepdim=True) self.assertEqual(result_keep.values.shape, [1, 2, 2]) np.testing.assert_array_equal(result_keep.values.numpy(), expected_res) + result_keep = self.test_op(data, 0, keepdim=True) + np.testing.assert_array_equal(result_keep.values.numpy(), expected_res) result_neg = self.test_op(data, dim=in_dim - 3) np.testing.assert_array_equal( @@ -206,13 +209,10 @@ def test_edge_cases(self): def test_compare_with_index_ops_to_origin(self): dtypes = ['float32', 'float64', 'int32', 'int64', 'uint8'] - cpu_reject_types = {'int16', 'bfloat16', 'float16'} for i, dtype in enumerate(dtypes): data = paddle.to_tensor([[1, 2, 3], [4, 5, 6]], dtype=dtype) - # `bfloat16` and `float16` are rejected on CPU - if not data.place.is_gpu_place() and dtype in cpu_reject_types: - continue + # `bfloat16`, `uint8` and `float16` are rejected for min/argmin vals_inds = self.test_op(data, dim=0) self.assertEqual(vals_inds.values.dtype, data.dtype) self.assertEqual(vals_inds.indices.dtype, paddle.int64) @@ -244,7 +244,7 @@ def test_error_handling(self): "Tensors with integral type: 'paddle.int32' should stop gradient." ) err_msg2 = ( - f"{self.origin_op_name}() received unexpected keyword arguments 'input', 'dim'. " + f"{self.origin_op_name}() received unexpected keyword arguments 'dim', 'input'. " f"\nDid you mean to use {self.test_op_name}() instead?" ) err_msg3 = ( @@ -255,6 +255,9 @@ def test_error_handling(self): "Non-CUDA GPU placed Tensor does not have 'paddle.float16' op registered.\n" "Paddle support following DataTypes: int32, int64, float64, float32, uint8" ) + err_msg5 = ( + "input should be a tensor, but got an instance with type 'list'" + ) # empty tensor empty_tensor = paddle.to_tensor([], dtype='float32') @@ -306,6 +309,14 @@ def test_error_handling(self): with self.assertRaises(TypeError) as cm: self.test_op(input_ts, dim=paddle.to_tensor(0)) + # Tensor input for dim case 3 + with self.assertRaises(TypeError) as cm: + self.test_op(input_ts, paddle.to_tensor([0]), keepdim=True) + + # Tensor input for dim case 4 + with self.assertRaises(TypeError) as cm: + self.test_op(input_ts, paddle.to_tensor([0]), True) + # Duplicate Arguments case 1 with self.assertRaises(TypeError) as cm: self.test_op(input_ts, 0, dim=0) @@ -335,9 +346,16 @@ def test_error_handling(self): self.test_op(cpu_tensor, dim=0) self.assertEqual(str(cm.exception), err_msg4) + # Wrong input type + with self.assertRaises(TypeError) as cm: + self.test_op([1, 2]) + self.assertEqual(str(cm.exception), err_msg5) + + # Wrong second parameter type + with self.assertRaises(TypeError): + self.test_op(input_ts, "first_dim") + def _compare_with_origin_static(self, input_shape, axis=0, keepdim=False): - if not paddle.is_compiled_with_cuda(): - return numel = 1 for v in input_shape: numel *= v @@ -363,6 +381,10 @@ def _compare_with_origin_static(self, input_shape, axis=0, keepdim=False): np.testing.assert_equal(indices_np, gt_indices_np) paddle.disable_static() + @unittest.skipIf( + not core.is_compiled_with_cuda(), + "core is not compiled with CUDA, skipping", + ) def test_static_graph(self): self._compare_with_origin_static([3, 10, 2], axis=1) self._compare_with_origin_static([3, 10, 2], axis=0, keepdim=True) diff --git a/test/legacy_test/test_minmax_with_index_op.py b/test/legacy_test/test_minmax_with_index_op.py new file mode 100644 index 00000000000000..cf1ff6f6bd5dc9 --- /dev/null +++ b/test/legacy_test/test_minmax_with_index_op.py @@ -0,0 +1,281 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np +from op_test import OpTest + +import paddle +from paddle.base import core + +np.random.seed(0) +paddle.enable_static() + + +def max_with_index(x, dim=None, keepdim=False): + """makeshift wrapper for the C++ op, extracted from compat.max""" + vals, inds = paddle._C_ops.max_with_index(x, dim, keepdim, False) + inds.stop_gradient = True + return vals, inds + + +def min_with_index(x, dim=None, keepdim=False): + """makeshift wrapper for the C++ op, extracted from compat.min""" + vals, inds = paddle._C_ops.min_with_index(x, dim, keepdim, False) + inds.stop_gradient = True + return vals, inds + + +@unittest.skipIf( + not core.is_compiled_with_cuda(), + "core is not compiled with CUDA, skipping", +) +class TestMaxWithIndexBasic(OpTest): + def setUp(self): + self.set_op_input_attr() + self.set_testing_op() + self.set_data_type() + self.set_input_shape() + if self.is_int: + inputs = np.random.randint(0, 255, self.input_shape).astype( + self.dtype + ) + else: + inputs = np.random.rand(*self.input_shape).astype(self.dtype) + + self.prim_op_type = "prim" + self.python_out_sig = ["values", "indices"] + self.attrs = {"dim": self.dim, "keepdim": self.keepdim} + + gt_values = self.value_op(inputs, axis=self.dim, keepdims=self.keepdim) + gt_indices = self.index_op(inputs, axis=self.dim, keepdims=self.keepdim) + self.inputs = { + 'x': inputs, + } + self.outputs = { + 'values': gt_values, + 'indices': gt_indices, + } + + def compute_grad(self): + grad = np.zeros_like(self.inputs['x'], dtype=self.dtype) + indices = ( + self.outputs['indices'] + if self.keepdim + else np.expand_dims(self.outputs['indices'], axis=self.dim) + ) + np.put_along_axis(grad, indices, 1, axis=self.dim) + return grad + + def set_testing_op(self): + self.op_type = "max_with_index" + self.python_api = max_with_index + self.public_python_api = max_with_index + self.value_op = np.max + self.index_op = np.argmax + + def set_data_type(self): + self.dtype = np.float64 + self.is_int = False + + def set_input_shape(self): + self.input_shape = [30, 257, 21] + + def set_op_input_attr(self): + self.dim = 0 + self.keepdim = False + + def test_check_output(self): + self.check_output(check_pir=True) + + def test_check_grad(self): + grad = self.compute_grad() + self.check_grad( + ['x'], + 'values', + check_pir=True, + user_defined_grads=[grad * (1.0 / grad.sum())], + ) + + +@unittest.skipIf( + not core.is_compiled_with_cuda(), + "core is not compiled with CUDA, skipping", +) +class TestMinWithIndexBasic(TestMaxWithIndexBasic): + def set_testing_op(self): + self.op_type = "min_with_index" + self.python_api = min_with_index + self.public_python_api = min_with_index + self.value_op = np.min + self.index_op = np.argmin + + +@unittest.skipIf( + not core.is_compiled_with_cuda(), + "core is not compiled with CUDA, skipping", +) +class TestMinWithIndexKeepDim(TestMinWithIndexBasic): + def set_op_input_attr(self): + self.dim = 1 + self.keepdim = True + + +@unittest.skipIf( + not core.is_compiled_with_cuda(), + "core is not compiled with CUDA, skipping", +) +class TestMaxWithIndexKeepDim(TestMaxWithIndexBasic): + def set_op_input_attr(self): + self.dim = 1 + self.keepdim = True + + +@unittest.skipIf( + not core.is_compiled_with_cuda(), + "core is not compiled with CUDA, skipping", +) +class TestMinWithIndexNegDim(TestMinWithIndexBasic): + def set_op_input_attr(self): + self.dim = -1 + self.keepdim = False + + +@unittest.skipIf( + not core.is_compiled_with_cuda(), + "core is not compiled with CUDA, skipping", +) +class TestMaxWithIndexNegDim(TestMaxWithIndexBasic): + def set_op_input_attr(self): + self.dim = 1 + self.keepdim = False + + +@unittest.skipIf( + not core.is_compiled_with_cuda(), + "core is not compiled with CUDA, skipping", +) +class TestMinWithIndexMoreTypeAndShape(TestMinWithIndexBasic): + def set_op_input_attr(self): + self.dim = 1 + self.keepdim = True + + def set_data_type(self): + self.dtype = np.float32 + self.is_int = False + + def set_input_shape(self): + self.input_shape = [10, 20, 16] + + +@unittest.skipIf( + not core.is_compiled_with_cuda(), + "core is not compiled with CUDA, skipping", +) +class TestMinWithIndexFP16(TestMinWithIndexBasic): + def set_data_type(self): + self.dtype = np.float16 + self.is_int = False + + +@unittest.skipIf( + not core.is_compiled_with_cuda(), + "core is not compiled with CUDA, skipping", +) +class TestMaxWithIndexU8(TestMaxWithIndexBasic): + def set_data_type(self): + self.dtype = np.uint8 + self.is_int = True + + @unittest.skipIf( + True, + "integral type does not need to check grad", + ) + def test_check_grad(self): + pass + + +@unittest.skipIf( + not core.is_compiled_with_cuda(), + "core is not compiled with CUDA, skipping", +) +class TestMaxWithIndexMoreTypeAndShape(TestMaxWithIndexBasic): + def set_op_input_attr(self): + self.dim = -1 + self.keepdim = False + + def set_data_type(self): + self.dtype = np.uint8 + self.is_int = True + + def set_input_shape(self): + self.input_shape = [4095] + + @unittest.skipIf( + True, + "integral type does not need to check grad", + ) + def test_check_grad(self): + pass + + +class TestMinMaxWithIndexPlace(unittest.TestCase): + def init(self): + self.input_shape = [30, 10, 10] + self.data = np.random.randn(30, 10, 10) + + def setUp(self): + self.init() + + def cpu_place(self): + self.place = core.CPUPlace() + + def test_api_static_cpu_err_handling_1(self): + self.cpu_place() + with ( + self.assertRaises(NotImplementedError), + paddle.static.program_guard(paddle.static.Program()), + ): + input = paddle.static.data( + name="input", shape=self.input_shape, dtype="float64" + ) + output = max_with_index(input, dim=0) + exe = paddle.static.Executor(self.place) + result = exe.run( + paddle.static.default_main_program(), + feed={'input': self.data}, + fetch_list=[output], + ) + + def test_api_static_cpu_err_handling_2(self): + self.cpu_place() + with ( + self.assertRaises(NotImplementedError), + paddle.static.program_guard(paddle.static.Program()), + ): + input = paddle.static.data( + name="input", shape=self.input_shape, dtype="float32" + ) + output = min_with_index(input, dim=-2, keepdim=True) + exe = paddle.static.Executor(self.place) + result = exe.run( + paddle.static.default_main_program(), + feed={'input': self.data.astype(np.float32)}, + fetch_list=[output], + ) + + +if __name__ == "__main__": + unittest.main() From bfd5134a0b366d4eeffe80f5cbfec5703bfa8207 Mon Sep 17 00:00:00 2001 From: Enigmatisms Date: Thu, 14 Aug 2025 06:55:57 +0000 Subject: [PATCH 17/24] [API-Compat] min/max static graph op test and out tensor support --- python/paddle/tensor/compat.py | 107 ++++++++++++++++---- test/legacy_test/test_compat_minmax.py | 131 +++++++++++++++++++++++-- 2 files changed, 210 insertions(+), 28 deletions(-) diff --git a/python/paddle/tensor/compat.py b/python/paddle/tensor/compat.py index 7b5e035e551d40..bf5c52f12bc144 100644 --- a/python/paddle/tensor/compat.py +++ b/python/paddle/tensor/compat.py @@ -499,12 +499,44 @@ def _min_max_allow_cpu_composite(input: Tensor): ) +def _check_out_status( + out: Tensor | tuple[Tensor, Tensor] | list[Tensor], + expect_multiple: bool = False, +): + if out is None: + return + if not in_dynamic_mode(): + raise RuntimeError( + "Using `out` static graph CINN backend is currently not supported. Directly return the tensor tuple instead.\n" + ) + if expect_multiple: + if not isinstance(out, (tuple, list)) or len(out) != 2: + raise TypeError( + f"Expected a list or tuple of two tensors, got {type(out)} instead." + ) + if not ( + isinstance(out[0], paddle.Tensor) + and isinstance(out[1], paddle.Tensor) + ): + raise TypeError( + f"Expected Tensor type in the tuple/list, got ({type(out[0])}, {type(out[1])}) instead." + ) + else: + if not isinstance(out, paddle.Tensor): + raise TypeError(f"Expected a Tensor, got {type(out)} instead.") + + @ForbidKeywordsDecorator( illegal_keys={"x", "axis"}, func_name="paddle.compat.min", correct_name="paddle.min", ) -def min(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType: +def min( + input: Tensor, + *args: Any, + out: Tensor | tuple[Tensor, Tensor] | list[Tensor] = None, + **kwargs: Any, +) -> Tensor | MinMaxRetType: """ Computes the minimum of tensor elements. There are mainly 3 cases (functionalities): @@ -537,6 +569,9 @@ def min(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType: other (Tensor, optional): the other tensor to perform `paddle.minimum` with. This Tensor should have the same or broadcast-able shape as the `input`. Note that (`dim` & `keepdim`) and `other` are mutually exclusive meaning that trying to composite both will result in TypeError + out (Tensor|tuple[Tensor, Tensor], optional): the output Tensor or tuple of (Tensor, int64 Tensor) that can be optionally + given to be used as output buffers. For case 1 and 3 out is just a Tensor, while for case 2 we expect a tuple + Returns: - For case 1: a single value Tensor (0-dim) @@ -594,29 +629,43 @@ def min(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType: dim_or_other, keepdim = _min_max_param_checker("min", *args, **kwargs) + ret = None if dim_or_other is None: # paddle.min and paddle.amin actually shares the same grad op (ReduceAminKernel) - return paddle.min(input) + _check_out_status(out, False) + ret = paddle.min(input) elif isinstance(dim_or_other, int): + _check_out_status(out, True) if in_dynamic_mode() and not input.place.is_gpu_place(): _min_max_allow_cpu_composite(input) # CPUPlace and other placements are implemented by composition indices = paddle.argmin(input, axis=dim_or_other, keepdim=True) values = paddle.take_along_axis(input, indices, axis=dim_or_other) if keepdim: - return MinMaxRetType(values=values, indices=indices) - return MinMaxRetType( - values=values.squeeze_(axis=dim_or_other), - indices=indices.squeeze_(axis=dim_or_other), - ) + ret = MinMaxRetType(values=values, indices=indices) + else: + ret = MinMaxRetType( + values=values.squeeze_(axis=dim_or_other), + indices=indices.squeeze_(axis=dim_or_other), + ) else: vals, inds = _C_ops.min_with_index( input, dim_or_other, keepdim, False ) inds.stop_gradient = True - return MinMaxRetType(values=vals, indices=inds) + ret = MinMaxRetType(values=vals, indices=inds) + else: + _check_out_status(out, False) + ret = _C_ops.minimum(input, dim_or_other) + + if out is None: + return ret else: - return _C_ops.minimum(input, dim_or_other) + if isinstance(ret, MinMaxRetType): + paddle.assign(ret.values, out[0]) + paddle.assign(ret.indices, out[1]) + else: + paddle.assign(ret, out) @ForbidKeywordsDecorator( @@ -624,7 +673,12 @@ def min(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType: func_name="paddle.compat.max", correct_name="paddle.max", ) -def max(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType: +def max( + input: Tensor, + *args: Any, + out: Tensor | tuple[Tensor, Tensor] | list[Tensor] = None, + **kwargs: Any, +) -> Tensor | MinMaxRetType: """ Computes the maximum of tensor elements. There are mainly 3 cases (functionalities): @@ -657,6 +711,9 @@ def max(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType: other (Tensor, optional): the other tensor to perform `paddle.maximum` with. This Tensor should have the same or broadcast-able shape as the `input`. Note that (`dim` & `keepdim`) and `other` are mutually exclusive meaning that trying to composite both will result in TypeError + out (Tensor|tuple[Tensor, Tensor], optional): the output Tensor or tuple of (Tensor, int64 Tensor) that can be optionally + given to be used as output buffers. For case 1 and 3 out is just a Tensor, while for case 2 we expect a tuple + Returns: - For case 1: a single value Tensor (0-dim) @@ -714,24 +771,38 @@ def max(input: Tensor, *args: Any, **kwargs: Any) -> Tensor | MinMaxRetType: dim_or_other, keepdim = _min_max_param_checker("max", *args, **kwargs) + ret = None if dim_or_other is None: - return paddle.max(input) + _check_out_status(out, False) + ret = paddle.max(input) elif isinstance(dim_or_other, int): + _check_out_status(out, True) if in_dynamic_mode() and not input.place.is_gpu_place(): _min_max_allow_cpu_composite(input) indices = paddle.argmax(input, axis=dim_or_other, keepdim=True) values = paddle.take_along_axis(input, indices, axis=dim_or_other) if keepdim: - return MinMaxRetType(values=values, indices=indices) - return MinMaxRetType( - values=values.squeeze_(axis=dim_or_other), - indices=indices.squeeze_(axis=dim_or_other), - ) + ret = MinMaxRetType(values=values, indices=indices) + else: + ret = MinMaxRetType( + values=values.squeeze_(axis=dim_or_other), + indices=indices.squeeze_(axis=dim_or_other), + ) else: vals, inds = _C_ops.max_with_index( input, dim_or_other, keepdim, False ) inds.stop_gradient = True - return MinMaxRetType(values=vals, indices=inds) + ret = MinMaxRetType(values=vals, indices=inds) else: - return _C_ops.maximum(input, dim_or_other) + _check_out_status(out, False) + ret = _C_ops.maximum(input, dim_or_other) + + if out is None: + return ret + else: + if isinstance(ret, MinMaxRetType): + paddle.assign(ret.values, out[0]) + paddle.assign(ret.indices, out[1]) + else: + paddle.assign(ret, out) diff --git a/test/legacy_test/test_compat_minmax.py b/test/legacy_test/test_compat_minmax.py index 46d4bb8f257a0a..1fb22cbff256c6 100644 --- a/test/legacy_test/test_compat_minmax.py +++ b/test/legacy_test/test_compat_minmax.py @@ -40,6 +40,7 @@ def __init__( self.index_op = index_op self.test_op_name = test_op_name self.origin_op_name = origin_op_name + np.random.seed(1) def test_case1_simple_reduce_all(self): data = paddle.to_tensor([[1.0, 2.0], [3.0, 4.0]], dtype='float32') @@ -237,6 +238,47 @@ def test_compare_with_index_ops_to_origin(self): vals_inds[1].numpy(), origin_indices.numpy() ) + def test_case1_out(self): + data = np.random.randn(4, 5, 6).astype(np.float32) + x = paddle.to_tensor(data, stop_gradient=False) + y = paddle.to_tensor(data, stop_gradient=False) + out = paddle.to_tensor(0) + self.test_op(x, out=out) + gt_out = self.origin_op(y) + gt_out.backward() + out.backward() + + np.testing.assert_allclose(out.numpy(), gt_out.numpy()) + np.testing.assert_allclose(x.grad.numpy(), y.grad.numpy()) + + def test_case2_out(self): + for type_to_use in (list, tuple): + data = np.random.randn(3, 17, 5).astype(np.float32) + x = paddle.to_tensor(data, stop_gradient=False) + y = paddle.to_tensor(data, stop_gradient=False) + out = type_to_use((paddle.to_tensor(0), paddle.to_tensor(0))) + self.test_op(x, dim=1, out=out) + gt_vals = self.origin_op(y, axis=1) + gt_inds = self.index_op(y, axis=1) + gt_vals.backward() + out[0].backward() + + np.testing.assert_allclose(out[0].numpy(), gt_vals.numpy()) + np.testing.assert_array_equal(out[1].numpy(), gt_inds.numpy()) + np.testing.assert_allclose(x.grad.numpy(), y.grad.numpy()) + + def test_case3_out(self): + data = np.random.randn(3, 4, 5).astype(np.float32) + x = paddle.to_tensor(data) + y = paddle.to_tensor(data) + out = paddle.to_tensor(0) + self.test_op(x, paddle.ones_like(x), out=out) + if self.test_op_name.endswith("min"): + gt_vals = paddle.minimum(x, paddle.ones_like(x)) + else: + gt_vals = paddle.maximum(x, paddle.ones_like(x)) + np.testing.assert_allclose(out.numpy(), gt_vals.numpy()) + def test_error_handling(self): """Test whether correct exception will be thrown. Skip error messages (some of them are long)""" @@ -355,7 +397,54 @@ def test_error_handling(self): with self.assertRaises(TypeError): self.test_op(input_ts, "first_dim") - def _compare_with_origin_static(self, input_shape, axis=0, keepdim=False): + paddle.enable_static() + with ( + self.assertRaises(RuntimeError) as cm, + paddle.static.program_guard(paddle.static.Program()), + ): + x = paddle.static.data(name='x', shape=[None, 6], dtype='float32') + result0, result1 = self.test_op( + paddle.zeros([3, 4]), + dim=1, + out=( + paddle.zeros([3, 4]), + paddle.zeros([3, 4], dtype=paddle.int64), + ), + ) + + place = ( + paddle.CUDAPlace(0) + if paddle.is_compiled_with_cuda() + else paddle.CPUPlace() + ) + paddle.static.Executor(place).run() + self.assertEqual( + str(cm.exception), + "Using `out` static graph CINN backend is currently not supported. Directly return the tensor tuple instead.\n", + ) + paddle.disable_static() + + def test_wrong_out_input(dim, out_input): + with self.assertRaises(TypeError) as cm: + if dim is None: + self.test_op(input_ts, out=out_input) + else: + self.test_op(input_ts, dim=dim, out=out_input) + + test_wrong_out_input(0, [0, paddle.to_tensor(0)]) + test_wrong_out_input(0, paddle.to_tensor(0)) + test_wrong_out_input(None, 0) + test_wrong_out_input(None, (paddle.to_tensor(0),)) + + def _compare_with_origin_static( + self, input_shape, axis_or_other=0, keepdim=False, use_out=False + ): + """Test Case 2 and Case 3 for return output or param output in static graph mode + + TODO(heqianyue): DO NOT set use_out for now! + Currently, static graph + CINN backend will result in unresolved dependency bug for assign op + This test is disabled for now, but will be useful when dy2st bug is fixed. + """ numel = 1 for v in input_shape: numel *= v @@ -365,12 +454,34 @@ def _compare_with_origin_static(self, input_shape, axis=0, keepdim=False): input_shape ) - y = input_tensor**2 + input_tensor - values, indices = self.test_op(y, dim=axis, keepdim=keepdim) - values += 1 - - gt_values = self.origin_op(y, axis=axis, keepdim=keepdim) + 1 - gt_indices = self.index_op(y, axis=axis, keepdim=keepdim) + y = input_tensor**2 + if isinstance(axis_or_other, int): + if use_out: + out = [paddle.to_tensor(0), paddle.to_tensor([0])] + self.test_op(y, dim=axis_or_other, keepdim=keepdim, out=out) + values, indices = out + else: + values, indices = self.test_op( + y, dim=axis_or_other, keepdim=keepdim + ) + gt_values = self.origin_op( + y, axis=axis_or_other, keepdim=keepdim + ) + gt_indices = self.index_op( + y, axis=axis_or_other, keepdim=keepdim + ) + else: + if use_out: + out = paddle.to_tensor(0) + self.test_op(y, axis_or_other, out=out) + values, indices = out, paddle.to_tensor(0) + else: + values, indices = self.test_op(y, axis_or_other) + if self.test_op_name.endswith("min"): + gt_values = paddle.minimum(y, axis=axis_or_other, out=None) + else: + gt_values = paddle.maximum(y, axis=axis_or_other) + gt_indices = paddle.to_tensor(0) place = paddle.CUDAPlace(0) exe = paddle.static.Executor(place) @@ -386,9 +497,9 @@ def _compare_with_origin_static(self, input_shape, axis=0, keepdim=False): "core is not compiled with CUDA, skipping", ) def test_static_graph(self): - self._compare_with_origin_static([3, 10, 2], axis=1) - self._compare_with_origin_static([3, 10, 2], axis=0, keepdim=True) - self._compare_with_origin_static([17], axis=0) + self._compare_with_origin_static([3, 10, 2], 1) + self._compare_with_origin_static([3, 10, 2], 0, keepdim=True) + self._compare_with_origin_static([17], 0) class TestCompatMax(TestCompatMinMaxBase): From fb8bba0b37597dbe1bde9d5c6b77516482a98826 Mon Sep 17 00:00:00 2001 From: Enigmatisms Date: Thu, 14 Aug 2025 07:07:42 +0000 Subject: [PATCH 18/24] [API-Compat] Resolved merge conflicts. --- python/paddle/utils/decorator_utils.py | 26 -------------------------- 1 file changed, 26 deletions(-) diff --git a/python/paddle/utils/decorator_utils.py b/python/paddle/utils/decorator_utils.py index 8c3381463f28a7..55adce7e8961c4 100644 --- a/python/paddle/utils/decorator_utils.py +++ b/python/paddle/utils/decorator_utils.py @@ -362,29 +362,3 @@ def wrapper(*args: _InputT.args, **kwargs: _InputT.kwargs) -> _RetT: return wrapper return decorator -class ForbidKeywordsDecorator(DecoratorBase): - """A decorator that hints users to use the correct `compat` functions, when erroneous keyword arguments are detected""" - - def __init__( - self, illegal_keys: set[str], func_name: str, correct_name: str - ) -> None: - super().__init__() - self.illegal_keys = illegal_keys - self.func_name = func_name - self.correct_name = correct_name - - def process( - self, args: tuple[Any, ...], kwargs: dict[str, Any] - ) -> tuple[tuple[Any, ...], dict[str, Any]]: - found_keys = [key for key in self.illegal_keys if key in kwargs] - - if found_keys: - found_keys.sort() - keys_str = ", ".join(f"'{key}'" for key in found_keys) - plural = "s" if len(found_keys) > 1 else "" - - raise TypeError( - f"{self.func_name}() received unexpected keyword argument{plural} {keys_str}. " - f"\nDid you mean to use {self.correct_name}() instead?" - ) - return args, kwargs From 47a08dceaa5e0cde498d12d621460e2a1756c498 Mon Sep 17 00:00:00 2001 From: Enigmatisms Date: Thu, 14 Aug 2025 14:54:23 +0000 Subject: [PATCH 19/24] [API-Compat] Fixed CPU static graph bugs removed split API for independence. --- .../kernels/cpu/min_max_with_index_kernel.cc | 4 +- paddle/phi/kernels/gpu/reduce_kernel.cu | 1 - python/paddle/tensor/compat.py | 4 +- python/paddle/tensor/manipulation.py | 7 - test/legacy_test/test_compat_minmax.py | 11 -- test/legacy_test/test_compat_split.py | 177 ----------------- test/legacy_test/test_compat_split_static.py | 184 ------------------ test/legacy_test/test_minmax_with_index_op.py | 4 +- 8 files changed, 7 insertions(+), 385 deletions(-) delete mode 100644 test/legacy_test/test_compat_split.py delete mode 100644 test/legacy_test/test_compat_split_static.py diff --git a/paddle/phi/kernels/cpu/min_max_with_index_kernel.cc b/paddle/phi/kernels/cpu/min_max_with_index_kernel.cc index f373553389e422..0cf9de846ba043 100644 --- a/paddle/phi/kernels/cpu/min_max_with_index_kernel.cc +++ b/paddle/phi/kernels/cpu/min_max_with_index_kernel.cc @@ -32,7 +32,7 @@ DenseTensor* ind_out) { \ PADDLE_ENFORCE_EQ(0, \ 1, \ - phi::errors::Unimplemented( \ + phi::errors::PreconditionNotMet( \ "In static graph mode, %s PHI kernel is not " \ "currently available on non-GPU devices.", \ #name)); \ @@ -48,7 +48,7 @@ DenseTensor* x_grad) { \ PADDLE_ENFORCE_EQ(0, \ 1, \ - phi::errors::Unimplemented( \ + phi::errors::PreconditionNotMet( \ "In static graph mode, %s PHI kernel is not " \ "currently available on non-GPU devices.", \ #name)); \ diff --git a/paddle/phi/kernels/gpu/reduce_kernel.cu b/paddle/phi/kernels/gpu/reduce_kernel.cu index 3f55297474015c..95132d09e2cc22 100644 --- a/paddle/phi/kernels/gpu/reduce_kernel.cu +++ b/paddle/phi/kernels/gpu/reduce_kernel.cu @@ -13,7 +13,6 @@ // limitations under the License. #include "paddle/phi/kernels/reduce_kernel.h" -#include #include "paddle/phi/kernels/gpu/reduce_amin_amax_common.h" #include "paddle/phi/kernels/reduce_amin_grad_kernel.h" diff --git a/python/paddle/tensor/compat.py b/python/paddle/tensor/compat.py index bf5c52f12bc144..698f082759ef01 100644 --- a/python/paddle/tensor/compat.py +++ b/python/paddle/tensor/compat.py @@ -25,7 +25,6 @@ ) if TYPE_CHECKING: - from collections.abc import Sequence from paddle import Tensor from paddle._typing import ( @@ -223,6 +222,7 @@ class SortRetType(NamedTuple): values: Tensor indices: Tensor + class MinMaxRetType(NamedTuple): values: Tensor indices: Tensor @@ -402,6 +402,8 @@ def to_list_if_necessary(x, size_check=False): dilations=to_list_if_necessary(self.dilations), name=self.name, ) + + def _min_max_param_checker(func_name: str, *args: Any, **kwargs: Any): def invalid_arguments_exception(error_prefix=""): type_strs = [type(v).__name__ for v in args] diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py index 403f48d17c2334..4a65c5695ba6ff 100644 --- a/python/paddle/tensor/manipulation.py +++ b/python/paddle/tensor/manipulation.py @@ -64,8 +64,6 @@ TensorOrTensors, ) -from paddle.utils.decorator_utils import ForbidKeywordsDecorator - __all__ = [] @@ -2738,11 +2736,6 @@ def row_stack(x: Sequence[Tensor], name: str | None = None) -> Tensor: return paddle.vstack(x, name=name) -@ForbidKeywordsDecorator( - illegal_keys={"tensor", "split_size_or_sections", "dim"}, - func_name="paddle.split", - correct_name="paddle.compat.split", -) def split( x: Tensor, num_or_sections: int | Sequence[int], diff --git a/test/legacy_test/test_compat_minmax.py b/test/legacy_test/test_compat_minmax.py index 1fb22cbff256c6..d043b82fcea426 100644 --- a/test/legacy_test/test_compat_minmax.py +++ b/test/legacy_test/test_compat_minmax.py @@ -45,21 +45,10 @@ def __init__( def test_case1_simple_reduce_all(self): data = paddle.to_tensor([[1.0, 2.0], [3.0, 4.0]], dtype='float32') val = self.test_op(data) - if self.test_op_name.endswith("min"): self.assertAlmostEqual(val.item(), 1.0) - expected_grad = np.array([[0.5, 0.5], [0.0, 0.0]]) else: self.assertAlmostEqual(val.item(), 4.0) - expected_grad = np.array([[0.0, 0.0], [0.0, 1.0]]) - - data = paddle.to_tensor( - [[1.0, 1.0], [2.0, 3.0]], dtype='float32', stop_gradient=False - ) - val = self.test_op(data) - val.backward() - - np.testing.assert_allclose(data.grad.numpy(), expected_grad) def test_case2_reduce_dim(self): """Test dim/keepdim""" diff --git a/test/legacy_test/test_compat_split.py b/test/legacy_test/test_compat_split.py deleted file mode 100644 index a582f1b0948c4b..00000000000000 --- a/test/legacy_test/test_compat_split.py +++ /dev/null @@ -1,177 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import numpy as np - -import paddle -from paddle.compat import split - - -class TestCompatSplit(unittest.TestCase): - def _compare_with_origin(self, input_tensor, size, axis=0): - pd_results = split(input_tensor, size, dim=axis) - - if isinstance(size, int): - shape_on_axis = input_tensor.shape[axis] - remaining_num = shape_on_axis % size - num_sections = shape_on_axis // size - if remaining_num == 0: - size = num_sections - else: - size = [size for _ in range(num_sections)] - size.append(remaining_num) - - origin_results = paddle.split( - input_tensor, num_or_sections=size, axis=axis - ) - - self.assertEqual(len(origin_results), len(pd_results)) - - # check shape and output section size of the output - for origin_ts, pd_ts in zip(origin_results, pd_results): - np.testing.assert_allclose(origin_ts.numpy(), pd_ts.numpy()) - - def test_basic_split(self): - """Test basic splitting with integer size""" - data = paddle.arange(12).reshape([3, 4]).astype('float32') - self._compare_with_origin(data, 1, 0) - self._compare_with_origin(data, 2, 1) - - def test_split_with_list_sections(self): - """Test splitting with list of section sizes""" - data = paddle.rand([10, 5]) - self._compare_with_origin(data, [3, 2, 5], 0) - self._compare_with_origin(data, [1, 4], -1) - - def test_chained_operations(self): - """Test split with complex operation chain""" - x = paddle.rand([8, 12]) - y = paddle.sin(x) * 2.0 + paddle.exp(x) / 3.0 - z = paddle.nn.functional.relu(y) - - z1, z2 = split(z, 7, dim=1) - - self.assertEqual(z1.shape, [8, 7]) - self.assertEqual(z2.shape, [8, 5]) - - z_np = z.numpy() - np.testing.assert_allclose(z_np[:, :7], z1.numpy()) - np.testing.assert_allclose(z_np[:, 7:], z2.numpy()) - - def test_split_grad(self): - """Test backprop for split, in1 and in2 are computed by - compat.split and original split""" - - def get_tensors(): - np.random.seed(114514) - np_arr = np.random.normal(0, 1, [2, 3, 4, 5]) - return paddle.to_tensor(np_arr), paddle.to_tensor(np_arr) - - in1, in2 = get_tensors() - in1.stop_gradient = False - in2.stop_gradient = False - - def computation_graph(in_tensor): - y = in_tensor * 2.3 + 3.0 - y = paddle.maximum(y, paddle.to_tensor([0], dtype=paddle.float32)) - return y.mean(axis=0) - - out1 = computation_graph(in1) - out2 = computation_graph(in2) - - packs1 = paddle.compat.split(out1, 2, dim=2) - packs2 = paddle.split(out2, [2, 2, 1], axis=2) - - res1 = packs1[0] + packs1[1] + packs1[2] - res2 = packs2[0] + packs2[1] + packs2[2] - res1.backward() - res2.backward() - np.testing.assert_allclose(in1.grad.numpy(), in2.grad.numpy()) - - def test_empty_dim(self): - """Split with empty dim""" - in_tensor = paddle.arange(72, dtype=paddle.int64).reshape([3, 12, 2]) - self._compare_with_origin(in_tensor, [5, 0, 7], axis=1) - - def test_split_with_one_block(self): - """Resulting tuple should be of length 1""" - in_tensor = paddle.arange(60, dtype=paddle.float32).reshape([3, 4, 5]) - self._compare_with_origin(in_tensor, 5, paddle.to_tensor([-1])) - self._compare_with_origin(in_tensor, [5], paddle.to_tensor(2)) - - def test_edge_cases(self): - """Test edge cases and error handling""" - x = paddle.arange(5) - s1, s2 = split(x, [3, 2]) - np.testing.assert_allclose(s1.numpy(), [0, 1, 2]) - np.testing.assert_allclose(s2.numpy(), [3, 4]) - - x = paddle.rand([2, 2, 2]) - a, b = split(x, 1, 2) - self.assertEqual(a.shape, [2, 2, 1]) - - # invalid split sections - with self.assertRaises(ValueError): - split(x, [3, 1], 1) - - # invalid split axis - with self.assertRaises(ValueError): - split(x, 2, 3) - - def test_error_hint(self): - """Test whether there will be correct exception when users pass paddle.split kwargs in paddle.compat.split, vice versa.""" - x = paddle.randn([3, 9, 5]) - - msg_gt_1 = ( - "paddle.split() received unexpected keyword arguments 'dim', 'split_size_or_sections', 'tensor'. " - "\nDid you mean to use paddle.compat.split() instead?" - ) - msg_gt_2 = ( - "paddle.compat.split() received unexpected keyword argument 'num_or_sections'. " - "\nDid you mean to use paddle.split() instead?" - ) - msg_gt_3 = "(InvalidArgument) The dim is expected to be in range of [-3, 3), but got 3" - msg_gt_4 = "paddle.compat.split expects split_sizes have only non-negative entries, but got size = -5 on dim 2" - - split_size = paddle.to_tensor([3]) - msg_gt_5 = ( - "The type of 'split_size_or_sections' in split must be int, list or tuple in imperative mode, but " - f"received {type(split_size)}." - ) - - with self.assertRaises(TypeError) as cm: - tensors = paddle.split(tensor=x, split_size_or_sections=3, dim=0) - self.assertEqual(str(cm.exception), msg_gt_1) - - with self.assertRaises(TypeError) as cm: - tensors = split(x, num_or_sections=3, dim=0) - self.assertEqual(str(cm.exception), msg_gt_2) - - with self.assertRaises(ValueError) as cm: - tensors = split(x, 3, dim=3) - self.assertEqual(str(cm.exception), msg_gt_3) - - with self.assertRaises(ValueError) as cm: - tensors = split(x, [3, 3, -5], -2) - self.assertEqual(str(cm.exception), msg_gt_4) - - with self.assertRaises(TypeError) as cm: - tensors = split(x, split_size, 1) - self.assertEqual(str(cm.exception), msg_gt_5) - - -if __name__ == '__main__': - unittest.main() diff --git a/test/legacy_test/test_compat_split_static.py b/test/legacy_test/test_compat_split_static.py deleted file mode 100644 index 006e3ec30ea077..00000000000000 --- a/test/legacy_test/test_compat_split_static.py +++ /dev/null @@ -1,184 +0,0 @@ -# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import numpy as np - -import paddle -from paddle.compat import split - - -class TestCompatSplitStatic(unittest.TestCase): - def _compare_with_origin_static( - self, input_shape, size, axis=0, dim_rank=-1 - ): - """size_dim: -1 means we input size by int, 0 means 0-size tensor, 1 means tensor with shape [1]""" - numel = 1 - for v in input_shape: - numel *= v - input_axis = axis - if dim_rank == 0: - input_axis = paddle.to_tensor(axis) - elif dim_rank == 1: - input_axis = paddle.to_tensor([axis]) - paddle.enable_static() - with paddle.static.program_guard(paddle.static.Program()): - input_tensor = paddle.arange(numel, dtype=paddle.float32).reshape( - input_shape - ) - pd_results = split(input_tensor, size, dim=input_axis) - - if isinstance(size, int): - shape_on_axis = input_tensor.shape[axis] - remaining_num = shape_on_axis % size - num_sections = shape_on_axis // size - if remaining_num == 0: - size = num_sections - else: - size = [size for _ in range(num_sections)] - size.append(remaining_num) - - origin_results = paddle.split( - input_tensor, num_or_sections=size, axis=axis - ) - assert len(pd_results) == len(origin_results), "length mismatched" - place = ( - paddle.CUDAPlace(0) - if paddle.is_compiled_with_cuda() - else paddle.CPUPlace() - ) - exe = paddle.static.Executor(place) - results = exe.run(fetch_list=[*origin_results, *pd_results]) - length_needed = len(results) // 2 - for i in range(length_needed): - np.testing.assert_allclose( - results[i], results[i + length_needed] - ) - paddle.disable_static() - - def test_split_composite_static(self): - paddle.seed(114514) - - def get_tensors(): - np.random.seed(114514) - np_arr = np.random.normal(0, 1, [2, 3, 4, 5]) - return paddle.to_tensor(np_arr), paddle.to_tensor(np_arr) - - in1, in2 = get_tensors() - in1.stop_gradient = False - in2.stop_gradient = False - - @paddle.jit.to_static - def computation_graph(in1: paddle.Tensor, in2: paddle.Tensor): - y1 = in1 * 1.5 + 1.0 - y1 = paddle.minimum(y1, paddle.to_tensor([0], dtype=paddle.float32)) - out1 = y1.mean(axis=0) - - y2 = in2 * 1.5 + 1.0 - y2 = paddle.minimum(y2, paddle.to_tensor([0], dtype=paddle.float32)) - out2 = y2.mean(axis=0) - - packs1 = paddle.compat.split(out1, 2, dim=2) - packs2 = paddle.split(out2, [2, 2, 1], axis=2) - - res1 = packs1[0] + packs1[1] + packs1[2] - res2 = packs2[0] + packs2[1] + packs2[2] - - return res1, res2 - - res1, res2 = computation_graph(in1, in2) - np.testing.assert_allclose(res1.numpy(), res2.numpy()) - - def test_static_graph(self): - """Test static graph execution""" - # fixed random seed for reproducibility - np.random.seed(114514) - # old static graph mode - paddle.enable_static() - - with paddle.static.program_guard(paddle.static.Program()): - x = paddle.static.data(name='x', shape=[None, 6], dtype='float32') - result0, result1 = split(x, split_size_or_sections=[3, 3], dim=1) - output = result0 * 2.0 + paddle.sin(result1) - - place = ( - paddle.CUDAPlace(0) - if paddle.is_compiled_with_cuda() - else paddle.CPUPlace() - ) - exe = paddle.static.Executor(place) - - input_data = np.random.rand(3, 6).astype('float32') - feed = {'x': input_data} - - results = exe.run(feed=feed, fetch_list=[result0, result1, output]) - - pd_result0, pd_result1 = results[0], results[1] - np.testing.assert_allclose(input_data[:, :3], pd_result0) - np.testing.assert_allclose(input_data[:, 3:], pd_result1) - - expected_output = input_data[:, :3] * 2.0 + np.sin( - input_data[:, 3:] - ) - np.testing.assert_allclose( - expected_output, results[2], rtol=1e-4, atol=1e-4 - ) - - paddle.disable_static() - - def test_error_hint(self): - """Test whether there will be correct exception when users pass paddle.split kwargs in paddle.compat.split, vice versa.""" - - msg_gt_1 = "split_size_or_sections must be greater than 0." - msg_gt_2 = "len(split_size_or_sections) must not be more than input.shape[dim]." - msg_gt_3 = "The type of 'split_size_or_sections' in split must be int, list or tuple in imperative mode." - msg_gt_4 = ( - "'dim' is not allowed to be a pir.Value in a static graph: " - "\npir.Value can not be used for indexing python lists/tuples." - ) - - paddle.enable_static() - with self.assertRaises(AssertionError) as cm: - x = paddle.randn([3, 4, 5]) - tensors = split(x, -2, dim=0) - self.assertEqual(str(cm.exception), msg_gt_1) - - with self.assertRaises(AssertionError) as cm: - x = paddle.randn([3, 4, 5]) - tensors = split(x, (1, 1, 1, 1, 2, 2), dim=-1) - self.assertEqual(str(cm.exception), msg_gt_2) - - with self.assertRaises(TypeError) as cm: - x = paddle.randn([3, 4, 5]) - tensors = split(x, paddle.to_tensor(2), dim=2) - self.assertEqual(str(cm.exception), msg_gt_3) - - with self.assertRaises(TypeError) as cm: - x = paddle.randn([3, 4, 5]) - tensors = split(x, 2, dim=paddle.to_tensor(2)) - paddle.disable_static() - self.assertEqual(str(cm.exception), msg_gt_4) - - def test_basic_split(self): - """Test basic splitting with integer size""" - input_shape = [3, 6] - self._compare_with_origin_static(input_shape, 1, 0) - self._compare_with_origin_static(input_shape, 3, -1) - self._compare_with_origin_static(input_shape, 4, dim_rank=0) - self._compare_with_origin_static(input_shape, 3, dim_rank=1) - - -if __name__ == '__main__': - unittest.main() diff --git a/test/legacy_test/test_minmax_with_index_op.py b/test/legacy_test/test_minmax_with_index_op.py index cf1ff6f6bd5dc9..c95238739c4be3 100644 --- a/test/legacy_test/test_minmax_with_index_op.py +++ b/test/legacy_test/test_minmax_with_index_op.py @@ -245,7 +245,7 @@ def cpu_place(self): def test_api_static_cpu_err_handling_1(self): self.cpu_place() with ( - self.assertRaises(NotImplementedError), + self.assertRaises(RuntimeError), paddle.static.program_guard(paddle.static.Program()), ): input = paddle.static.data( @@ -262,7 +262,7 @@ def test_api_static_cpu_err_handling_1(self): def test_api_static_cpu_err_handling_2(self): self.cpu_place() with ( - self.assertRaises(NotImplementedError), + self.assertRaises(RuntimeError), paddle.static.program_guard(paddle.static.Program()), ): input = paddle.static.data( From 9300d1703acc6dccccd9353f9007aba7e01f1963 Mon Sep 17 00:00:00 2001 From: Enigmatisms Date: Tue, 19 Aug 2025 08:18:54 +0000 Subject: [PATCH 20/24] [API-Compat] Resolved merged conflicts, add symbolic shape test. --- python/paddle/tensor/compat.py | 76 +++++--- python/paddle/tensor/manipulation.py | 7 + python/paddle/utils/decorator_utils.py | 41 ++-- .../symbolic/test_infer_sym_shape_unary_op.py | 39 ++++ test/legacy_test/test_compat_split.py | 177 +++++++++++++++++ test/legacy_test/test_compat_split_static.py | 184 ++++++++++++++++++ test/legacy_test/test_minmax_with_index_op.py | 5 + .../test_zero_dim_sundry_dygraph_api.py | 55 ++++++ 8 files changed, 542 insertions(+), 42 deletions(-) create mode 100644 test/legacy_test/test_compat_split.py create mode 100644 test/legacy_test/test_compat_split_static.py diff --git a/python/paddle/tensor/compat.py b/python/paddle/tensor/compat.py index 698f082759ef01..6ea2fcb5c80015 100644 --- a/python/paddle/tensor/compat.py +++ b/python/paddle/tensor/compat.py @@ -25,6 +25,7 @@ ) if TYPE_CHECKING: + from collections.abc import Sequence from paddle import Tensor from paddle._typing import ( @@ -638,24 +639,35 @@ def min( ret = paddle.min(input) elif isinstance(dim_or_other, int): _check_out_status(out, True) - if in_dynamic_mode() and not input.place.is_gpu_place(): - _min_max_allow_cpu_composite(input) - # CPUPlace and other placements are implemented by composition - indices = paddle.argmin(input, axis=dim_or_other, keepdim=True) - values = paddle.take_along_axis(input, indices, axis=dim_or_other) - if keepdim: - ret = MinMaxRetType(values=values, indices=indices) + if input.ndim: + if in_dynamic_mode() and not input.place.is_gpu_place(): + _min_max_allow_cpu_composite(input) + # CPUPlace and other placements are implemented by composition + + indices = paddle.argmin(input, axis=dim_or_other, keepdim=True) + values = paddle.take_along_axis( + input, indices, axis=dim_or_other + ) + if keepdim: + ret = MinMaxRetType(values=values, indices=indices) + else: + ret = MinMaxRetType( + values=values.squeeze_(axis=dim_or_other), + indices=indices.squeeze_(axis=dim_or_other), + ) else: - ret = MinMaxRetType( - values=values.squeeze_(axis=dim_or_other), - indices=indices.squeeze_(axis=dim_or_other), + vals, inds = _C_ops.min_with_index( + input, dim_or_other, keepdim, False ) + inds.stop_gradient = True + ret = MinMaxRetType(values=vals, indices=inds) else: - vals, inds = _C_ops.min_with_index( - input, dim_or_other, keepdim, False + ret = MinMaxRetType( + values=input, + indices=paddle.zeros( + [], dtype=paddle.int64, device=input.place + ), ) - inds.stop_gradient = True - ret = MinMaxRetType(values=vals, indices=inds) else: _check_out_status(out, False) ret = _C_ops.minimum(input, dim_or_other) @@ -779,23 +791,33 @@ def max( ret = paddle.max(input) elif isinstance(dim_or_other, int): _check_out_status(out, True) - if in_dynamic_mode() and not input.place.is_gpu_place(): - _min_max_allow_cpu_composite(input) - indices = paddle.argmax(input, axis=dim_or_other, keepdim=True) - values = paddle.take_along_axis(input, indices, axis=dim_or_other) - if keepdim: - ret = MinMaxRetType(values=values, indices=indices) + if input.ndim: + if in_dynamic_mode() and not input.place.is_gpu_place(): + _min_max_allow_cpu_composite(input) + indices = paddle.argmax(input, axis=dim_or_other, keepdim=True) + values = paddle.take_along_axis( + input, indices, axis=dim_or_other + ) + if keepdim: + ret = MinMaxRetType(values=values, indices=indices) + else: + ret = MinMaxRetType( + values=values.squeeze_(axis=dim_or_other), + indices=indices.squeeze_(axis=dim_or_other), + ) else: - ret = MinMaxRetType( - values=values.squeeze_(axis=dim_or_other), - indices=indices.squeeze_(axis=dim_or_other), + vals, inds = _C_ops.max_with_index( + input, dim_or_other, keepdim, False ) + inds.stop_gradient = True + ret = MinMaxRetType(values=vals, indices=inds) else: - vals, inds = _C_ops.max_with_index( - input, dim_or_other, keepdim, False + ret = MinMaxRetType( + values=input, + indices=paddle.zeros( + [], dtype=paddle.int64, device=input.place + ), ) - inds.stop_gradient = True - ret = MinMaxRetType(values=vals, indices=inds) else: _check_out_status(out, False) ret = _C_ops.maximum(input, dim_or_other) diff --git a/python/paddle/tensor/manipulation.py b/python/paddle/tensor/manipulation.py index 4a65c5695ba6ff..403f48d17c2334 100644 --- a/python/paddle/tensor/manipulation.py +++ b/python/paddle/tensor/manipulation.py @@ -64,6 +64,8 @@ TensorOrTensors, ) +from paddle.utils.decorator_utils import ForbidKeywordsDecorator + __all__ = [] @@ -2736,6 +2738,11 @@ def row_stack(x: Sequence[Tensor], name: str | None = None) -> Tensor: return paddle.vstack(x, name=name) +@ForbidKeywordsDecorator( + illegal_keys={"tensor", "split_size_or_sections", "dim"}, + func_name="paddle.split", + correct_name="paddle.compat.split", +) def split( x: Tensor, num_or_sections: int | Sequence[int], diff --git a/python/paddle/utils/decorator_utils.py b/python/paddle/utils/decorator_utils.py index 55adce7e8961c4..8f0c55e38caf5c 100644 --- a/python/paddle/utils/decorator_utils.py +++ b/python/paddle/utils/decorator_utils.py @@ -127,21 +127,6 @@ def __init__( self.default_params = default_params warnings.simplefilter("always", category=Warning) - -# *size => shape decorator -class SizeArgsDecorator(DecoratorBase): - """ - Usage Example: - - paddle.ones(1, dtype=paddle.float32) - paddle.ones(1, 2, 3, dtype=paddle.float32) - paddle.ones([1, 2, 3], dtype=paddle.float32) - paddle.ones(size=[1, 2, 3], dtype=paddle.float32) - - paddle.ones([1, 2, 3], paddle.float32) - paddle.ones(shape=[1, 2, 3], dtype=paddle.float32) - """ - def process( self, args: tuple[Any, ...], kwargs: dict[str, Any] ) -> tuple[tuple[Any, ...], dict[str, Any]]: @@ -262,6 +247,32 @@ def wrapper(*args: _InputT.args, **kwargs: _InputT.kwargs) -> _RetT: return decorator +# *size => shape decorator +class SizeArgsDecorator(DecoratorBase): + """ + Usage Example: + + paddle.ones(1, dtype=paddle.float32) + paddle.ones(1, 2, 3, dtype=paddle.float32) + paddle.ones([1, 2, 3], dtype=paddle.float32) + paddle.ones(size=[1, 2, 3], dtype=paddle.float32) + + paddle.ones([1, 2, 3], paddle.float32) + paddle.ones(shape=[1, 2, 3], dtype=paddle.float32) + """ + + def process( + self, args: tuple[Any, ...], kwargs: dict[str, Any] + ) -> tuple[tuple[Any, ...], dict[str, Any]]: + if 'size' in kwargs: + kwargs['shape'] = kwargs.pop('size') + elif len(args) >= 1 and isinstance(args[0], int): + kwargs['shape'] = list(args) + args = () + + return args, kwargs + + class VariableArgsDecorator(DecoratorBase): def __init__(self, var: str) -> None: super().__init__() diff --git a/test/ir/pir/cinn/symbolic/test_infer_sym_shape_unary_op.py b/test/ir/pir/cinn/symbolic/test_infer_sym_shape_unary_op.py index 841c08919c5e9f..ed77143313948b 100644 --- a/test/ir/pir/cinn/symbolic/test_infer_sym_shape_unary_op.py +++ b/test/ir/pir/cinn/symbolic/test_infer_sym_shape_unary_op.py @@ -73,6 +73,45 @@ def test_eval_symbolic(self): return True +class MaxMinWithIndexNet(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward(self, x): + min_vals, min_inds = paddle.compat.min(x, dim=-1, keepdim=False) + max_vals, max_inds = paddle.compat.max(x, dim=-1, keepdim=True) + return min_vals + max_vals.squeeze(axis=-1), min_inds + max_inds + + +class MinMaxWithIndexOpInferSymbolicShapeTest(TestBase): + def prepare_data(self): + self.cases = [np.random.rand(3, 4, 5, 6), np.random.rand(257)] + self.expected = [ + [ + 'shape[S0, S1, S2], data[NULL]', + 'shape[S0, Broadcast(S0, S1), Broadcast(S1, S2), S2], data[NULL]', + ], + ['shape[], data[NULL]', 'shape[1], data[NULL]'], + ] + + def test_eval_symbolic(self): + net = MaxMinWithIndexNet() + + for i in range(len(self.cases)): + x = self.cases[i] + x_spec = InputSpec( + shape=[None for index in range(len(x.shape))], dtype='float32' + ) + input_spec = [x_spec] + net = apply_to_static(net, False, input_spec) + net.eval() + check_infer_results( + net, input_spec, 'builtin.shadow_output', self.expected[i] + ) + + return True + + class AsComplexAsRealNet(paddle.nn.Layer): def __init__(self): super().__init__() diff --git a/test/legacy_test/test_compat_split.py b/test/legacy_test/test_compat_split.py new file mode 100644 index 00000000000000..a582f1b0948c4b --- /dev/null +++ b/test/legacy_test/test_compat_split.py @@ -0,0 +1,177 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np + +import paddle +from paddle.compat import split + + +class TestCompatSplit(unittest.TestCase): + def _compare_with_origin(self, input_tensor, size, axis=0): + pd_results = split(input_tensor, size, dim=axis) + + if isinstance(size, int): + shape_on_axis = input_tensor.shape[axis] + remaining_num = shape_on_axis % size + num_sections = shape_on_axis // size + if remaining_num == 0: + size = num_sections + else: + size = [size for _ in range(num_sections)] + size.append(remaining_num) + + origin_results = paddle.split( + input_tensor, num_or_sections=size, axis=axis + ) + + self.assertEqual(len(origin_results), len(pd_results)) + + # check shape and output section size of the output + for origin_ts, pd_ts in zip(origin_results, pd_results): + np.testing.assert_allclose(origin_ts.numpy(), pd_ts.numpy()) + + def test_basic_split(self): + """Test basic splitting with integer size""" + data = paddle.arange(12).reshape([3, 4]).astype('float32') + self._compare_with_origin(data, 1, 0) + self._compare_with_origin(data, 2, 1) + + def test_split_with_list_sections(self): + """Test splitting with list of section sizes""" + data = paddle.rand([10, 5]) + self._compare_with_origin(data, [3, 2, 5], 0) + self._compare_with_origin(data, [1, 4], -1) + + def test_chained_operations(self): + """Test split with complex operation chain""" + x = paddle.rand([8, 12]) + y = paddle.sin(x) * 2.0 + paddle.exp(x) / 3.0 + z = paddle.nn.functional.relu(y) + + z1, z2 = split(z, 7, dim=1) + + self.assertEqual(z1.shape, [8, 7]) + self.assertEqual(z2.shape, [8, 5]) + + z_np = z.numpy() + np.testing.assert_allclose(z_np[:, :7], z1.numpy()) + np.testing.assert_allclose(z_np[:, 7:], z2.numpy()) + + def test_split_grad(self): + """Test backprop for split, in1 and in2 are computed by + compat.split and original split""" + + def get_tensors(): + np.random.seed(114514) + np_arr = np.random.normal(0, 1, [2, 3, 4, 5]) + return paddle.to_tensor(np_arr), paddle.to_tensor(np_arr) + + in1, in2 = get_tensors() + in1.stop_gradient = False + in2.stop_gradient = False + + def computation_graph(in_tensor): + y = in_tensor * 2.3 + 3.0 + y = paddle.maximum(y, paddle.to_tensor([0], dtype=paddle.float32)) + return y.mean(axis=0) + + out1 = computation_graph(in1) + out2 = computation_graph(in2) + + packs1 = paddle.compat.split(out1, 2, dim=2) + packs2 = paddle.split(out2, [2, 2, 1], axis=2) + + res1 = packs1[0] + packs1[1] + packs1[2] + res2 = packs2[0] + packs2[1] + packs2[2] + res1.backward() + res2.backward() + np.testing.assert_allclose(in1.grad.numpy(), in2.grad.numpy()) + + def test_empty_dim(self): + """Split with empty dim""" + in_tensor = paddle.arange(72, dtype=paddle.int64).reshape([3, 12, 2]) + self._compare_with_origin(in_tensor, [5, 0, 7], axis=1) + + def test_split_with_one_block(self): + """Resulting tuple should be of length 1""" + in_tensor = paddle.arange(60, dtype=paddle.float32).reshape([3, 4, 5]) + self._compare_with_origin(in_tensor, 5, paddle.to_tensor([-1])) + self._compare_with_origin(in_tensor, [5], paddle.to_tensor(2)) + + def test_edge_cases(self): + """Test edge cases and error handling""" + x = paddle.arange(5) + s1, s2 = split(x, [3, 2]) + np.testing.assert_allclose(s1.numpy(), [0, 1, 2]) + np.testing.assert_allclose(s2.numpy(), [3, 4]) + + x = paddle.rand([2, 2, 2]) + a, b = split(x, 1, 2) + self.assertEqual(a.shape, [2, 2, 1]) + + # invalid split sections + with self.assertRaises(ValueError): + split(x, [3, 1], 1) + + # invalid split axis + with self.assertRaises(ValueError): + split(x, 2, 3) + + def test_error_hint(self): + """Test whether there will be correct exception when users pass paddle.split kwargs in paddle.compat.split, vice versa.""" + x = paddle.randn([3, 9, 5]) + + msg_gt_1 = ( + "paddle.split() received unexpected keyword arguments 'dim', 'split_size_or_sections', 'tensor'. " + "\nDid you mean to use paddle.compat.split() instead?" + ) + msg_gt_2 = ( + "paddle.compat.split() received unexpected keyword argument 'num_or_sections'. " + "\nDid you mean to use paddle.split() instead?" + ) + msg_gt_3 = "(InvalidArgument) The dim is expected to be in range of [-3, 3), but got 3" + msg_gt_4 = "paddle.compat.split expects split_sizes have only non-negative entries, but got size = -5 on dim 2" + + split_size = paddle.to_tensor([3]) + msg_gt_5 = ( + "The type of 'split_size_or_sections' in split must be int, list or tuple in imperative mode, but " + f"received {type(split_size)}." + ) + + with self.assertRaises(TypeError) as cm: + tensors = paddle.split(tensor=x, split_size_or_sections=3, dim=0) + self.assertEqual(str(cm.exception), msg_gt_1) + + with self.assertRaises(TypeError) as cm: + tensors = split(x, num_or_sections=3, dim=0) + self.assertEqual(str(cm.exception), msg_gt_2) + + with self.assertRaises(ValueError) as cm: + tensors = split(x, 3, dim=3) + self.assertEqual(str(cm.exception), msg_gt_3) + + with self.assertRaises(ValueError) as cm: + tensors = split(x, [3, 3, -5], -2) + self.assertEqual(str(cm.exception), msg_gt_4) + + with self.assertRaises(TypeError) as cm: + tensors = split(x, split_size, 1) + self.assertEqual(str(cm.exception), msg_gt_5) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/legacy_test/test_compat_split_static.py b/test/legacy_test/test_compat_split_static.py new file mode 100644 index 00000000000000..006e3ec30ea077 --- /dev/null +++ b/test/legacy_test/test_compat_split_static.py @@ -0,0 +1,184 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np + +import paddle +from paddle.compat import split + + +class TestCompatSplitStatic(unittest.TestCase): + def _compare_with_origin_static( + self, input_shape, size, axis=0, dim_rank=-1 + ): + """size_dim: -1 means we input size by int, 0 means 0-size tensor, 1 means tensor with shape [1]""" + numel = 1 + for v in input_shape: + numel *= v + input_axis = axis + if dim_rank == 0: + input_axis = paddle.to_tensor(axis) + elif dim_rank == 1: + input_axis = paddle.to_tensor([axis]) + paddle.enable_static() + with paddle.static.program_guard(paddle.static.Program()): + input_tensor = paddle.arange(numel, dtype=paddle.float32).reshape( + input_shape + ) + pd_results = split(input_tensor, size, dim=input_axis) + + if isinstance(size, int): + shape_on_axis = input_tensor.shape[axis] + remaining_num = shape_on_axis % size + num_sections = shape_on_axis // size + if remaining_num == 0: + size = num_sections + else: + size = [size for _ in range(num_sections)] + size.append(remaining_num) + + origin_results = paddle.split( + input_tensor, num_or_sections=size, axis=axis + ) + assert len(pd_results) == len(origin_results), "length mismatched" + place = ( + paddle.CUDAPlace(0) + if paddle.is_compiled_with_cuda() + else paddle.CPUPlace() + ) + exe = paddle.static.Executor(place) + results = exe.run(fetch_list=[*origin_results, *pd_results]) + length_needed = len(results) // 2 + for i in range(length_needed): + np.testing.assert_allclose( + results[i], results[i + length_needed] + ) + paddle.disable_static() + + def test_split_composite_static(self): + paddle.seed(114514) + + def get_tensors(): + np.random.seed(114514) + np_arr = np.random.normal(0, 1, [2, 3, 4, 5]) + return paddle.to_tensor(np_arr), paddle.to_tensor(np_arr) + + in1, in2 = get_tensors() + in1.stop_gradient = False + in2.stop_gradient = False + + @paddle.jit.to_static + def computation_graph(in1: paddle.Tensor, in2: paddle.Tensor): + y1 = in1 * 1.5 + 1.0 + y1 = paddle.minimum(y1, paddle.to_tensor([0], dtype=paddle.float32)) + out1 = y1.mean(axis=0) + + y2 = in2 * 1.5 + 1.0 + y2 = paddle.minimum(y2, paddle.to_tensor([0], dtype=paddle.float32)) + out2 = y2.mean(axis=0) + + packs1 = paddle.compat.split(out1, 2, dim=2) + packs2 = paddle.split(out2, [2, 2, 1], axis=2) + + res1 = packs1[0] + packs1[1] + packs1[2] + res2 = packs2[0] + packs2[1] + packs2[2] + + return res1, res2 + + res1, res2 = computation_graph(in1, in2) + np.testing.assert_allclose(res1.numpy(), res2.numpy()) + + def test_static_graph(self): + """Test static graph execution""" + # fixed random seed for reproducibility + np.random.seed(114514) + # old static graph mode + paddle.enable_static() + + with paddle.static.program_guard(paddle.static.Program()): + x = paddle.static.data(name='x', shape=[None, 6], dtype='float32') + result0, result1 = split(x, split_size_or_sections=[3, 3], dim=1) + output = result0 * 2.0 + paddle.sin(result1) + + place = ( + paddle.CUDAPlace(0) + if paddle.is_compiled_with_cuda() + else paddle.CPUPlace() + ) + exe = paddle.static.Executor(place) + + input_data = np.random.rand(3, 6).astype('float32') + feed = {'x': input_data} + + results = exe.run(feed=feed, fetch_list=[result0, result1, output]) + + pd_result0, pd_result1 = results[0], results[1] + np.testing.assert_allclose(input_data[:, :3], pd_result0) + np.testing.assert_allclose(input_data[:, 3:], pd_result1) + + expected_output = input_data[:, :3] * 2.0 + np.sin( + input_data[:, 3:] + ) + np.testing.assert_allclose( + expected_output, results[2], rtol=1e-4, atol=1e-4 + ) + + paddle.disable_static() + + def test_error_hint(self): + """Test whether there will be correct exception when users pass paddle.split kwargs in paddle.compat.split, vice versa.""" + + msg_gt_1 = "split_size_or_sections must be greater than 0." + msg_gt_2 = "len(split_size_or_sections) must not be more than input.shape[dim]." + msg_gt_3 = "The type of 'split_size_or_sections' in split must be int, list or tuple in imperative mode." + msg_gt_4 = ( + "'dim' is not allowed to be a pir.Value in a static graph: " + "\npir.Value can not be used for indexing python lists/tuples." + ) + + paddle.enable_static() + with self.assertRaises(AssertionError) as cm: + x = paddle.randn([3, 4, 5]) + tensors = split(x, -2, dim=0) + self.assertEqual(str(cm.exception), msg_gt_1) + + with self.assertRaises(AssertionError) as cm: + x = paddle.randn([3, 4, 5]) + tensors = split(x, (1, 1, 1, 1, 2, 2), dim=-1) + self.assertEqual(str(cm.exception), msg_gt_2) + + with self.assertRaises(TypeError) as cm: + x = paddle.randn([3, 4, 5]) + tensors = split(x, paddle.to_tensor(2), dim=2) + self.assertEqual(str(cm.exception), msg_gt_3) + + with self.assertRaises(TypeError) as cm: + x = paddle.randn([3, 4, 5]) + tensors = split(x, 2, dim=paddle.to_tensor(2)) + paddle.disable_static() + self.assertEqual(str(cm.exception), msg_gt_4) + + def test_basic_split(self): + """Test basic splitting with integer size""" + input_shape = [3, 6] + self._compare_with_origin_static(input_shape, 1, 0) + self._compare_with_origin_static(input_shape, 3, -1) + self._compare_with_origin_static(input_shape, 4, dim_rank=0) + self._compare_with_origin_static(input_shape, 3, dim_rank=1) + + +if __name__ == '__main__': + unittest.main() diff --git a/test/legacy_test/test_minmax_with_index_op.py b/test/legacy_test/test_minmax_with_index_op.py index c95238739c4be3..c38509c9c44285 100644 --- a/test/legacy_test/test_minmax_with_index_op.py +++ b/test/legacy_test/test_minmax_with_index_op.py @@ -232,6 +232,11 @@ def test_check_grad(self): class TestMinMaxWithIndexPlace(unittest.TestCase): + """min/max_with_index has no CPU version, so when CUDA is not available, + we skip all the above test. A runtime error will be emitted if min/max_with_index + is called on CPU, this unit test tries capturing it. + """ + def init(self): self.input_shape = [30, 10, 10] self.data = np.random.randn(30, 10, 10) diff --git a/test/legacy_test/test_zero_dim_sundry_dygraph_api.py b/test/legacy_test/test_zero_dim_sundry_dygraph_api.py index bc958ca42bf242..b8debdcab006bb 100644 --- a/test/legacy_test/test_zero_dim_sundry_dygraph_api.py +++ b/test/legacy_test/test_zero_dim_sundry_dygraph_api.py @@ -551,6 +551,61 @@ def test_argmax(self): out = paddle.argmax(x, keepdim=True) self.assertEqual(out.shape, [1, 1]) + def _make_compat_minmax_test(self, func_name): + # 1) x is 0D + x = paddle.rand([]) + val1, ind1 = func_name(x, 0) + val2, ind2 = func_name(x, -1) + val3 = func_name(x) + + self.assertEqual(val1.shape, []) + self.assertEqual(ind1.shape, []) + np.testing.assert_allclose(val1, x) + np.testing.assert_allclose(ind1, 0) + + self.assertEqual(val2.shape, []) + self.assertEqual(ind2.shape, []) + np.testing.assert_allclose(val2, x) + np.testing.assert_allclose(ind2, 0) + + self.assertEqual(val3.shape, []) + np.testing.assert_allclose(val3, x) + + # 2) x is 1D + x = paddle.rand([5]) + val, ind = func_name(x, 0) + self.assertEqual(val.shape, []) + self.assertEqual(ind.shape, []) + + # 3) x is ND + x = paddle.rand([3, 5]) + val, ind = func_name(x, dim=1) + self.assertEqual(val.shape, [3]) + self.assertEqual(ind.shape, [3]) + + val = func_name(x) + self.assertEqual(val.shape, []) + + # 4) x is ND, keepdim=True + x = paddle.rand([3, 5]) + val, ind = func_name(x, dim=0, keepdim=True) + self.assertEqual(val.shape, [1, 5]) + self.assertEqual(ind.shape, [1, 5]) + + # 5) test backward + x = paddle.randn([4, 5]) + x.stop_gradient = False + + val, ind = func_name(x, dim=0) + val.backward() + self.assertEqual(x.grad.shape, [4, 5]) + + def test_compat_min(self): + self._make_compat_minmax_test(paddle.compat.min) + + def test_compat_max(self): + self._make_compat_minmax_test(paddle.compat.max) + def test_kthvalue(self): # 1) x is 0D x = paddle.randn([]) From 17d848c3a80c72bf66c4638be37e91c3461ccd36 Mon Sep 17 00:00:00 2001 From: Enigmatisms Date: Tue, 19 Aug 2025 12:36:04 +0000 Subject: [PATCH 21/24] [API-Compat] Updated unittests --- .../kernels/cpu/min_max_with_index_kernel.cc | 96 ------------------- test/legacy_test/test_minmax_with_index_op.py | 51 ---------- 2 files changed, 147 deletions(-) delete mode 100644 paddle/phi/kernels/cpu/min_max_with_index_kernel.cc diff --git a/paddle/phi/kernels/cpu/min_max_with_index_kernel.cc b/paddle/phi/kernels/cpu/min_max_with_index_kernel.cc deleted file mode 100644 index 0cf9de846ba043..00000000000000 --- a/paddle/phi/kernels/cpu/min_max_with_index_kernel.cc +++ /dev/null @@ -1,96 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -#include "paddle/phi/kernels/min_max_with_index_kernel.h" - -#include "paddle/common/ddim.h" -#include "paddle/phi/backends/cpu/cpu_context.h" -#include "paddle/phi/core/kernel_registry.h" -#include "paddle/phi/core/utils/data_type.h" -#include "paddle/phi/kernels/funcs/eigen/common.h" -#include "paddle/phi/kernels/funcs/math_function.h" - -#define DEFINE_WITH_INDEX_KERNEL(OpType, name) \ - template \ - void OpType##WithIndexKernel(const Context& dev_ctx, \ - const DenseTensor& x, \ - const Scalar& dim, \ - bool keepdim, \ - bool flatten, \ - DenseTensor* val_out, \ - DenseTensor* ind_out) { \ - PADDLE_ENFORCE_EQ(0, \ - 1, \ - phi::errors::PreconditionNotMet( \ - "In static graph mode, %s PHI kernel is not " \ - "currently available on non-GPU devices.", \ - #name)); \ - } \ - template \ - void OpType##WithIndexGradKernel(const Context& dev_ctx, \ - const DenseTensor& x, \ - const DenseTensor& values, \ - const DenseTensor& indices, \ - const DenseTensor& values_grad, \ - const Scalar& dim, \ - bool keepdim, \ - DenseTensor* x_grad) { \ - PADDLE_ENFORCE_EQ(0, \ - 1, \ - phi::errors::PreconditionNotMet( \ - "In static graph mode, %s PHI kernel is not " \ - "currently available on non-GPU devices.", \ - #name)); \ - } - -namespace phi { - -DEFINE_WITH_INDEX_KERNEL(Min, min_with_index) -DEFINE_WITH_INDEX_KERNEL(Max, max_with_index) -#undef DEFINE_WITH_INDEX_KERNEL - -} // namespace phi - -#define REGISTER_CPU_KERNELS(OpType, OpName) \ - PD_REGISTER_KERNEL(OpName, \ - CPU, \ - ALL_LAYOUT, \ - phi::OpType##WithIndexKernel, \ - phi::dtype::float16, \ - phi::dtype::bfloat16, \ - float, \ - double, \ - int32_t, \ - int64_t, \ - int16_t, \ - uint8_t) { \ - kernel->OutputAt(0).SetDataType(kernel->InputAt(0).dtype); \ - kernel->OutputAt(1).SetDataType(phi::DataType::INT64); \ - } \ - PD_REGISTER_KERNEL(OpName##_grad, \ - CPU, \ - ALL_LAYOUT, \ - phi::OpType##WithIndexGradKernel, \ - float, \ - double, \ - uint8_t, \ - int, \ - int16_t, \ - int64_t, \ - phi::dtype::float16, \ - phi::dtype::bfloat16) {} - -REGISTER_CPU_KERNELS(Min, min_with_index) -REGISTER_CPU_KERNELS(Max, max_with_index) -#undef REGISTER_CPU_KERNELS diff --git a/test/legacy_test/test_minmax_with_index_op.py b/test/legacy_test/test_minmax_with_index_op.py index c38509c9c44285..6bd686cc71270e 100644 --- a/test/legacy_test/test_minmax_with_index_op.py +++ b/test/legacy_test/test_minmax_with_index_op.py @@ -231,56 +231,5 @@ def test_check_grad(self): pass -class TestMinMaxWithIndexPlace(unittest.TestCase): - """min/max_with_index has no CPU version, so when CUDA is not available, - we skip all the above test. A runtime error will be emitted if min/max_with_index - is called on CPU, this unit test tries capturing it. - """ - - def init(self): - self.input_shape = [30, 10, 10] - self.data = np.random.randn(30, 10, 10) - - def setUp(self): - self.init() - - def cpu_place(self): - self.place = core.CPUPlace() - - def test_api_static_cpu_err_handling_1(self): - self.cpu_place() - with ( - self.assertRaises(RuntimeError), - paddle.static.program_guard(paddle.static.Program()), - ): - input = paddle.static.data( - name="input", shape=self.input_shape, dtype="float64" - ) - output = max_with_index(input, dim=0) - exe = paddle.static.Executor(self.place) - result = exe.run( - paddle.static.default_main_program(), - feed={'input': self.data}, - fetch_list=[output], - ) - - def test_api_static_cpu_err_handling_2(self): - self.cpu_place() - with ( - self.assertRaises(RuntimeError), - paddle.static.program_guard(paddle.static.Program()), - ): - input = paddle.static.data( - name="input", shape=self.input_shape, dtype="float32" - ) - output = min_with_index(input, dim=-2, keepdim=True) - exe = paddle.static.Executor(self.place) - result = exe.run( - paddle.static.default_main_program(), - feed={'input': self.data.astype(np.float32)}, - fetch_list=[output], - ) - - if __name__ == "__main__": unittest.main() From 822e8d75fab6a948854c6f726c9c8571b89e4c6d Mon Sep 17 00:00:00 2001 From: Enigmatisms Date: Wed, 20 Aug 2025 02:11:17 +0000 Subject: [PATCH 22/24] [API-Compat] Update version year --- paddle/phi/kernels/gpu/min_max_with_index_grad_kernel.cu | 2 +- paddle/phi/kernels/gpu/min_max_with_index_kernel.cu | 2 +- paddle/phi/kernels/min_max_with_index_kernel.h | 2 +- test/legacy_test/test_minmax_with_index_op.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/paddle/phi/kernels/gpu/min_max_with_index_grad_kernel.cu b/paddle/phi/kernels/gpu/min_max_with_index_grad_kernel.cu index f34d03bf07e506..2cbffdb67cb3ae 100644 --- a/paddle/phi/kernels/gpu/min_max_with_index_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/min_max_with_index_grad_kernel.cu @@ -1,4 +1,4 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/paddle/phi/kernels/gpu/min_max_with_index_kernel.cu b/paddle/phi/kernels/gpu/min_max_with_index_kernel.cu index 2509c34fb0c8fd..521444ef9e9481 100644 --- a/paddle/phi/kernels/gpu/min_max_with_index_kernel.cu +++ b/paddle/phi/kernels/gpu/min_max_with_index_kernel.cu @@ -1,4 +1,4 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. diff --git a/paddle/phi/kernels/min_max_with_index_kernel.h b/paddle/phi/kernels/min_max_with_index_kernel.h index eca50fc3a752e8..56e733fcdbeef8 100644 --- a/paddle/phi/kernels/min_max_with_index_kernel.h +++ b/paddle/phi/kernels/min_max_with_index_kernel.h @@ -1,4 +1,4 @@ -/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/test/legacy_test/test_minmax_with_index_op.py b/test/legacy_test/test_minmax_with_index_op.py index 6bd686cc71270e..d80d89ae3e3c09 100644 --- a/test/legacy_test/test_minmax_with_index_op.py +++ b/test/legacy_test/test_minmax_with_index_op.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From 17f080e960b07b38e65da4cf2a733093750527b3 Mon Sep 17 00:00:00 2001 From: Enigmatisms Date: Wed, 20 Aug 2025 06:41:15 +0000 Subject: [PATCH 23/24] [API-Compat] Fixed min/max out mechanism --- python/paddle/tensor/compat.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/python/paddle/tensor/compat.py b/python/paddle/tensor/compat.py index 6ea2fcb5c80015..3995a274309144 100644 --- a/python/paddle/tensor/compat.py +++ b/python/paddle/tensor/compat.py @@ -672,14 +672,13 @@ def min( _check_out_status(out, False) ret = _C_ops.minimum(input, dim_or_other) - if out is None: - return ret - else: + if out is not None: if isinstance(ret, MinMaxRetType): paddle.assign(ret.values, out[0]) paddle.assign(ret.indices, out[1]) else: paddle.assign(ret, out) + return ret @ForbidKeywordsDecorator( @@ -822,11 +821,10 @@ def max( _check_out_status(out, False) ret = _C_ops.maximum(input, dim_or_other) - if out is None: - return ret - else: + if out is not None: if isinstance(ret, MinMaxRetType): paddle.assign(ret.values, out[0]) paddle.assign(ret.indices, out[1]) else: paddle.assign(ret, out) + return ret From 0fbbb99c61948a06d2455372a11ff2b77f6206c5 Mon Sep 17 00:00:00 2001 From: Enigmatisms Date: Fri, 22 Aug 2025 17:13:02 +0000 Subject: [PATCH 24/24] [API-Compat] Try adding even more unittests. --- .../infer_symbolic_shape/unary_infer_sym.cc | 2 +- paddle/phi/infermeta/unary.cc | 148 ++++++++---------- paddle/phi/ops/yaml/ops.yaml | 4 +- .../symbolic/test_infer_sym_shape_unary_op.py | 39 ----- .../cinn/symbolic/test_minmax_infer_sym.py | 119 ++++++++++++++ test/legacy_test/test_compat_minmax.py | 56 +++++++ .../test_zero_dim_sundry_dygraph_api.py | 37 +++++ 7 files changed, 279 insertions(+), 126 deletions(-) create mode 100644 test/ir/pir/cinn/symbolic/test_minmax_infer_sym.py diff --git a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/unary_infer_sym.cc b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/unary_infer_sym.cc index 9bf285da4d77a9..ab9e020aea41ea 100644 --- a/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/unary_infer_sym.cc +++ b/paddle/fluid/pir/dialect/operator/interface/infer_symbolic_shape/unary_infer_sym.cc @@ -352,7 +352,7 @@ bool MinMaxOpInferSymbolicShape(pir::Operation *op, std::vector out_sym_shape; if (flatten) { if (keepdims) { - out_sym_shape.emplace_back(std::int64_t(rank)); + out_sym_shape.resize(rank, std::int64_t(1)); } else { out_sym_shape = {}; } diff --git a/paddle/phi/infermeta/unary.cc b/paddle/phi/infermeta/unary.cc index fe014446c88ce2..ab8dff4a9e8d2d 100644 --- a/paddle/phi/infermeta/unary.cc +++ b/paddle/phi/infermeta/unary.cc @@ -366,90 +366,6 @@ void ArgMinMaxInferMeta(const MetaTensor& x, } } -void MinMaxWithIndexInferMeta(const MetaTensor& x, - const Scalar& axis, - bool keepdims, - bool flatten, - MetaTensor* val_out, - MetaTensor* ind_out, - MetaConfig config) { - DataType val_dtype = x.dtype(); - - if (!config.is_runtime && axis.FromTensor()) { - std::vector vec; - if (flatten) { - if (keepdims) { // NOLINT - vec = std::vector(x.dims().size(), -1); - } else { - vec = {}; - } - } else { - if (keepdims) { - vec = std::vector(x.dims().size(), -1); - } else { - vec = std::vector(x.dims().size() - 1, -1); - } - } - val_out->set_dims(common::make_ddim(vec)); - val_out->set_dtype(val_dtype); - ind_out->set_dims(common::make_ddim(vec)); - ind_out->set_dtype(DataType::INT64); - return; - } - auto int_axis = axis.to(); - const auto& x_dims = x.dims(); - - auto x_rank = x.dims().size(); - if (x_rank > 0) { - PADDLE_ENFORCE_GE(int_axis, - -x_rank, - common::errors::InvalidArgument( - "'axis'(%d) must be greater than or equal to" - " -Rank(X)(%d).", - int_axis, - -x_rank)); - PADDLE_ENFORCE_LT( - int_axis, - x_rank, - common::errors::InvalidArgument( - "'axis'(%d) must be less than Rank(X)(%d) of Input(X).", - int_axis, - x_rank)); - } else { - // 0-dim tensor - PADDLE_ENFORCE_EQ(int_axis == 0 || int_axis == -1, - true, - common::errors::InvalidArgument( - "'axis'(%d) must be 0 or -1 if input tensor is " - "0-dim.", - int_axis)); - } - - if (int_axis < 0) int_axis += x_rank; - - std::vector vec; - if (flatten) { - if (keepdims) { // NOLINT - vec = std::vector(x.dims().size(), 1); - } else { - vec = {}; - } - } else { - for (int64_t i = 0; i < int_axis; i++) - vec.emplace_back(x_dims[static_cast(i)]); - if (keepdims) { - vec.emplace_back(static_cast(1)); - } - for (int64_t i = int_axis + 1; i < x_rank; i++) - vec.emplace_back(x_dims[static_cast(i)]); - } - - val_out->set_dims(common::make_ddim(vec)); - val_out->set_dtype(val_dtype); - ind_out->set_dims(common::make_ddim(vec)); - ind_out->set_dtype(DataType::INT64); -} - void ArgsortInferMeta(const MetaTensor& input, int axis, bool descending, @@ -3034,6 +2950,70 @@ void ModeInferMeta(const MetaTensor& x, indices->set_dtype(DataType::INT64); } +void MinMaxWithIndexInferMeta(const MetaTensor& x, + const Scalar& axis, + bool keepdims, + bool flatten, + MetaTensor* val_out, + MetaTensor* ind_out, + MetaConfig config) { + DataType val_dtype = x.dtype(); + + // axis.FromTensor will never be true for this op + auto int_axis = axis.to(); + const auto& x_dims = x.dims(); + + auto x_rank = x.dims().size(); + if (x_rank > 0) { + PADDLE_ENFORCE_GE(int_axis, + -x_rank, + common::errors::InvalidArgument( + "'axis'(%d) must be greater than or equal to" + " -Rank(X)(%d).", + int_axis, + -x_rank)); + PADDLE_ENFORCE_LT( + int_axis, + x_rank, + common::errors::InvalidArgument( + "'axis'(%d) must be less than Rank(X)(%d) of Input(X).", + int_axis, + x_rank)); + } else { + // 0-dim tensor + PADDLE_ENFORCE_EQ(int_axis == 0 || int_axis == -1, + true, + common::errors::InvalidArgument( + "'axis'(%d) must be 0 or -1 if input tensor is " + "0-dim.", + int_axis)); + } + + if (int_axis < 0) int_axis += x_rank; + + std::vector vec; + if (flatten) { + if (keepdims) { // NOLINT + vec = std::vector(x.dims().size(), 1); + } else { + vec = {}; + } + } else { + for (int64_t i = 0; i < int_axis; i++) + vec.emplace_back(x_dims[static_cast(i)]); + if (keepdims) { + vec.emplace_back(static_cast(1)); + } + for (int64_t i = int_axis + 1; i < x_rank; i++) + vec.emplace_back(x_dims[static_cast(i)]); + } + + val_out->set_dims(common::make_ddim(vec)); + val_out->set_dtype(val_dtype); + ind_out->set_dims(common::make_ddim(vec)); + ind_out->set_dtype(DataType::INT64); +} + void MultinomialInferMeta(const MetaTensor& x, const Scalar& num_samples, bool replacement, diff --git a/paddle/phi/ops/yaml/ops.yaml b/paddle/phi/ops/yaml/ops.yaml index 78dcca6d579589..694b19cbe62188 100644 --- a/paddle/phi/ops/yaml/ops.yaml +++ b/paddle/phi/ops/yaml/ops.yaml @@ -3586,7 +3586,7 @@ func : max_with_index data_type : x backward : max_with_index_grad - interfaces : paddle::dialect::InferSymbolicShapeInterface, paddle::dialect::LayoutTransformationInterface + interfaces : paddle::dialect::InferSymbolicShapeInterface - op : maxout args : (Tensor x, int groups, int axis = 1) @@ -3706,7 +3706,7 @@ func : min_with_index data_type : x backward : min_with_index_grad - interfaces : paddle::dialect::InferSymbolicShapeInterface, paddle::dialect::LayoutTransformationInterface + interfaces : paddle::dialect::InferSymbolicShapeInterface - op : mish args : (Tensor x, float lambda) diff --git a/test/ir/pir/cinn/symbolic/test_infer_sym_shape_unary_op.py b/test/ir/pir/cinn/symbolic/test_infer_sym_shape_unary_op.py index ed77143313948b..841c08919c5e9f 100644 --- a/test/ir/pir/cinn/symbolic/test_infer_sym_shape_unary_op.py +++ b/test/ir/pir/cinn/symbolic/test_infer_sym_shape_unary_op.py @@ -73,45 +73,6 @@ def test_eval_symbolic(self): return True -class MaxMinWithIndexNet(paddle.nn.Layer): - def __init__(self): - super().__init__() - - def forward(self, x): - min_vals, min_inds = paddle.compat.min(x, dim=-1, keepdim=False) - max_vals, max_inds = paddle.compat.max(x, dim=-1, keepdim=True) - return min_vals + max_vals.squeeze(axis=-1), min_inds + max_inds - - -class MinMaxWithIndexOpInferSymbolicShapeTest(TestBase): - def prepare_data(self): - self.cases = [np.random.rand(3, 4, 5, 6), np.random.rand(257)] - self.expected = [ - [ - 'shape[S0, S1, S2], data[NULL]', - 'shape[S0, Broadcast(S0, S1), Broadcast(S1, S2), S2], data[NULL]', - ], - ['shape[], data[NULL]', 'shape[1], data[NULL]'], - ] - - def test_eval_symbolic(self): - net = MaxMinWithIndexNet() - - for i in range(len(self.cases)): - x = self.cases[i] - x_spec = InputSpec( - shape=[None for index in range(len(x.shape))], dtype='float32' - ) - input_spec = [x_spec] - net = apply_to_static(net, False, input_spec) - net.eval() - check_infer_results( - net, input_spec, 'builtin.shadow_output', self.expected[i] - ) - - return True - - class AsComplexAsRealNet(paddle.nn.Layer): def __init__(self): super().__init__() diff --git a/test/ir/pir/cinn/symbolic/test_minmax_infer_sym.py b/test/ir/pir/cinn/symbolic/test_minmax_infer_sym.py new file mode 100644 index 00000000000000..81975c8029bb33 --- /dev/null +++ b/test/ir/pir/cinn/symbolic/test_minmax_infer_sym.py @@ -0,0 +1,119 @@ +# Copyright (c) 2025 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import unittest +from os.path import dirname + +import numpy as np +from test_infer_sym_shape_utils import ( + TestBase, + check_infer_results, +) + +import paddle +from paddle.static import InputSpec + +sys.path.append(dirname(dirname(__file__))) +from utils import apply_to_static + +# NOTE(SigureMo): Disable the CSE optimization to avoid op number change. +paddle.set_flags({"FLAGS_enable_cse_in_dy2st": False}) + + +class MaxMinWithIndexNet(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward(self, x): + min_vals, min_inds = paddle.compat.min(x, dim=-1, keepdim=False) + max_vals, max_inds = paddle.compat.max(x, dim=-1, keepdim=True) + return min_vals + max_vals.squeeze(axis=-1), min_inds + max_inds + + +class MinMaxWithIndexOpInferSymbolicShapeTest(TestBase): + def prepare_data(self): + self.cases = [np.random.rand(3, 4, 5, 6), np.random.rand(257)] + self.expected = [ + [ + 'shape[S0, S1, S2], data[NULL]', + 'shape[S0, Broadcast(S0, S1), Broadcast(S1, S2), S2], data[NULL]', + ], + ['shape[], data[NULL]', 'shape[1], data[NULL]'], + ] + + def test_eval_symbolic(self): + net = MaxMinWithIndexNet() + + for i in range(len(self.cases)): + x = self.cases[i] + x_spec = InputSpec( + shape=[None for index in range(len(x.shape))], dtype='float32' + ) + input_spec = [x_spec] + net = apply_to_static(net, False, input_spec) + net.eval() + check_infer_results( + net, input_spec, 'builtin.shadow_output', self.expected[i] + ) + + return True + + +class MinMaxWithIndexRawNet(paddle.nn.Layer): + def __init__(self): + super().__init__() + + def forward(self, x): + x = x * 2 + 1 + min_vals, min_inds = paddle._C_ops.min_with_index(x, 1, False, True) + max_vals, max_inds = paddle._C_ops.max_with_index(x, 2, True, True) + return min_vals + max_vals.squeeze(), min_inds * max_inds + + +class MinMaxWithIndexOpRawInferShapeTest(TestBase): + def prepare_data(self): + self.cases = [np.random.rand(4, 5, 6), np.random.rand(3, 7, 1, 2)] + self.expected = [ + [ + 'shape[], data[NULL]', + 'shape[1, 1, 1], data[NULL]', + ], + ['shape[], data[NULL]', 'shape[1, 1, 1, 1], data[NULL]'], + ] + + @unittest.skipIf( + not paddle.core.is_compiled_with_cuda(), + "core is not compiled with CUDA, skipping", + ) + def test_eval_symbolic(self): + net = MinMaxWithIndexRawNet() + + for i in range(len(self.cases)): + x = self.cases[i] + x_spec = InputSpec( + shape=[None for index in range(len(x.shape))], dtype='float32' + ) + input_spec = [x_spec] + net = apply_to_static(net, False, input_spec) + net.eval() + check_infer_results( + net, input_spec, 'builtin.shadow_output', self.expected[i] + ) + + return True + + +if __name__ == "__main__": + unittest.main() diff --git a/test/legacy_test/test_compat_minmax.py b/test/legacy_test/test_compat_minmax.py index d043b82fcea426..0354e72a3759b9 100644 --- a/test/legacy_test/test_compat_minmax.py +++ b/test/legacy_test/test_compat_minmax.py @@ -490,6 +490,62 @@ def test_static_graph(self): self._compare_with_origin_static([3, 10, 2], 0, keepdim=True) self._compare_with_origin_static([17], 0) + @unittest.skipIf( + not core.is_compiled_with_cuda(), + "core is not compiled with CUDA, skipping", + ) + def test_static_unary_shape_infer_1(self): + # min/max with index is a GPU only op, no need for testing if there is no GPU + + @paddle.jit.to_static(full_graph=True) + def static_func1(x): + y = paddle.zeros([2, 3, 4]) + return paddle._C_ops.min_with_index(y, x.shape[0], False, False) + + @paddle.jit.to_static(full_graph=True) + def static_func2(x): + y = paddle.zeros([2, 3, 4]) + return paddle._C_ops.min_with_index(y, x.shape[0], True, False) + + input_ts1 = paddle.to_tensor([1]) + input_ts2 = paddle.to_tensor([1, 2]) + val1, ind1 = static_func1(input_ts1) + val2, ind2 = static_func2(input_ts2) + + self.assertEqual(val1.shape, [2, 4]) + self.assertEqual(ind1.shape, [2, 4]) + self.assertEqual(val2.shape, [2, 3, 1]) + self.assertEqual(ind2.shape, [2, 3, 1]) + + @unittest.skipIf( + not core.is_compiled_with_cuda(), + "core is not compiled with CUDA, skipping", + ) + def test_static_unary_shape_infer_2(self): + # min/max with index is a GPU only op, no need for testing if there is no GPU + + @paddle.jit.to_static(full_graph=True) + def static_func1(x): + dim = paddle.arange(0, 1).shape[0] + y = paddle.zeros([2, 3, 4]) + return paddle._C_ops.max_with_index(y, dim, False, True) + + @paddle.jit.to_static(full_graph=True) + def static_func2(x): + dim = paddle.arange(0, 2).shape[0] + y = paddle.zeros([2, 3, 4]) + return paddle._C_ops.max_with_index(y, dim, True, True) + + x1 = paddle.to_tensor([1]) + x2 = paddle.to_tensor([1, 2]) + val1, ind1 = static_func1(x1) + val2, ind2 = static_func2(x2) + + self.assertEqual(val1.shape, []) + self.assertEqual(ind1.shape, []) + self.assertEqual(val2.shape, [1, 1, 1]) + self.assertEqual(ind2.shape, [1, 1, 1]) + class TestCompatMax(TestCompatMinMaxBase): def __init__(self, *args, **kwargs): diff --git a/test/legacy_test/test_zero_dim_sundry_dygraph_api.py b/test/legacy_test/test_zero_dim_sundry_dygraph_api.py index b8debdcab006bb..29d3c5961d6241 100644 --- a/test/legacy_test/test_zero_dim_sundry_dygraph_api.py +++ b/test/legacy_test/test_zero_dim_sundry_dygraph_api.py @@ -600,6 +600,43 @@ def _make_compat_minmax_test(self, func_name): val.backward() self.assertEqual(x.grad.shape, [4, 5]) + def test_minmax_with_index(self): + # min/max_with_index is a GPU only op + if not paddle.is_compiled_with_cuda(): + return + # 1) x is 0D + x = paddle.to_tensor(1) + val1, ind1 = paddle._C_ops.min_with_index(x, 0, False, True) + + self.assertEqual(val1.shape, []) + self.assertEqual(ind1.shape, []) + np.testing.assert_allclose(val1, 1) + np.testing.assert_allclose(ind1, 0) + + # 2) x is 1D + x = paddle.to_tensor([1, 1, 1]) + val1, ind1 = paddle._C_ops.max_with_index(x, 0, False, True) + + self.assertEqual(val1.shape, []) + self.assertEqual(ind1.shape, []) + np.testing.assert_allclose(val1, 1) + np.testing.assert_allclose(ind1, 0) + + # 3) x is 2D + x = paddle.zeros([2, 3]) + val1, ind1 = paddle._C_ops.min_with_index(x, 1, False, True) + val2, ind2 = paddle._C_ops.max_with_index(x, 1, True, True) + + self.assertEqual(val1.shape, []) + self.assertEqual(ind1.shape, []) + np.testing.assert_allclose(val1, 0) + np.testing.assert_allclose(ind1, 0) + + self.assertEqual(val2.shape, [1, 1]) + self.assertEqual(ind2.shape, [1, 1]) + np.testing.assert_allclose(val2, 0) + np.testing.assert_allclose(ind2, 0) + def test_compat_min(self): self._make_compat_minmax_test(paddle.compat.min)